From cc11892672de320e42e6a3d2ef6173963babecfa Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 6 May 2025 17:44:53 -0700 Subject: [PATCH 01/25] Add "final" to method parameters where they were missing. --- pom.xml | 14 +++++++------- .../quantilescommon/DoublesSortedView.java | 4 ++-- .../quantilescommon/FloatsSortedView.java | 4 ++-- .../quantilescommon/LongsSortedView.java | 4 ++-- .../quantilescommon/PartitioningFeature.java | 4 ++-- .../quantilescommon/QuantilesDoublesAPI.java | 12 ++++++------ .../quantilescommon/QuantilesFloatsAPI.java | 12 ++++++------ .../quantilescommon/QuantilesGenericAPI.java | 12 ++++++------ .../quantilescommon/QuantilesLongsAPI.java | 12 ++++++------ .../theta/ConcurrentSharedThetaSketch.java | 2 +- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/pom.xml b/pom.xml index 84e032947..a9858526a 100644 --- a/pom.xml +++ b/pom.xml @@ -94,11 +94,11 @@ under the License. 3.6.3 - 21 - --enable-preview + 22 + ${java.version} ${java.version} - -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-ffm-flag} + -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-arguments} UTF-8 ${charset.encoding} ${charset.encoding} @@ -164,7 +164,7 @@ under the License. ${maven-compiler-plugin.version} - ${jvm-ffm-flag} + ${jvm-arguments} @@ -190,7 +190,7 @@ under the License. - [21,) + [22,) [${maven.version},4.0.0) @@ -239,7 +239,7 @@ under the License. public all,-missing - ${jvm-ffm-flag} + ${jvm-arguments} @@ -285,7 +285,7 @@ under the License. maven-surefire-plugin ${maven-surefire-failsafe-plugins.version} - ${jvm-ffm-flag} + ${jvm-arguments} false false true diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index 1427f6279..47bad1c67 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -60,7 +60,7 @@ public interface DoublesSortedView extends SortedView { * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) { + default double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) { QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints); final int len = splitPoints.length + 1; final double[] buckets = new double[len]; @@ -129,7 +129,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit) { + default double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) { final double[] buckets = getCDF(splitPoints, searchCrit); final int len = buckets.length; for (int i = len; i-- > 1; ) { diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java index eec699d94..0667a6748 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java @@ -60,7 +60,7 @@ public interface FloatsSortedView extends SortedView { * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) { + default double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) { QuantilesUtil.checkFloatsSplitPointsOrder(splitPoints); final int len = splitPoints.length + 1; final double[] buckets = new double[len]; @@ -129,7 +129,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(float[] splitPoints, QuantileSearchCriteria searchCrit) { + default double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) { final double[] buckets = getCDF(splitPoints, searchCrit); final int len = buckets.length; for (int i = len; i-- > 1; ) { diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java index e7e3521c7..eaeceeb92 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -60,7 +60,7 @@ public interface LongsSortedView extends SortedView { * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + default double[] getCDF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) { QuantilesUtil.checkLongsSplitPointsOrder(splitPoints); final int len = splitPoints.length + 1; final double[] buckets = new double[len]; @@ -129,7 +129,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + default double[] getPMF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) { final double[] buckets = getCDF(splitPoints, searchCrit); final int len = buckets.length; for (int i = len; i-- > 1; ) { diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java index 5672c2a02..82b293b3a 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java @@ -53,7 +53,7 @@ public interface PartitioningFeature { * * @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}. */ - default GenericPartitionBoundaries getPartitionBoundariesFromNumParts(int numEquallySizedParts) { + default GenericPartitionBoundaries getPartitionBoundariesFromNumParts(final int numEquallySizedParts) { return getPartitionBoundariesFromNumParts(numEquallySizedParts, INCLUSIVE); } @@ -106,7 +106,7 @@ GenericPartitionBoundaries getPartitionBoundariesFromNumParts( * * @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}. */ - default GenericPartitionBoundaries getPartitionBoundariesFromPartSize(long nominalPartSizeItems) { + default GenericPartitionBoundaries getPartitionBoundariesFromPartSize(final long nominalPartSizeItems) { return getPartitionBoundariesFromPartSize(nominalPartSizeItems, INCLUSIVE); } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index 8c4f6620f..09cfd6071 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -35,7 +35,7 @@ public interface QuantilesDoublesAPI extends QuantilesAPI { * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(double[] splitPoints) { + default double[] getCDF(final double[] splitPoints) { return getCDF(splitPoints, INCLUSIVE); } @@ -98,7 +98,7 @@ default double[] getCDF(double[] splitPoints) { * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(double[] splitPoints) { + default double[] getPMF(final double[] splitPoints) { return getPMF(splitPoints, INCLUSIVE); } @@ -150,7 +150,7 @@ default double[] getPMF(double[] splitPoints) { * @return the approximate quantile given the normalized rank. * @throws IllegalArgumentException if sketch is empty. */ - default double getQuantile(double rank) { + default double getQuantile(final double rank) { return getQuantile(rank, INCLUSIVE); } @@ -207,7 +207,7 @@ default double getQuantile(double rank) { * @return an array of quantiles corresponding to the given array of normalized ranks. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getQuantiles(double[] ranks) { + default double[] getQuantiles(final double[] ranks) { return getQuantiles(ranks, INCLUSIVE); } @@ -230,7 +230,7 @@ default double[] getQuantiles(double[] ranks) { * @return the normalized rank corresponding to the given quantile * @throws IllegalArgumentException if sketch is empty. */ - default double getRank(double quantile) { + default double getRank(final double quantile) { return getRank(quantile, INCLUSIVE); } @@ -251,7 +251,7 @@ default double getRank(double quantile) { * @return an array of normalized ranks corresponding to the given array of quantiles. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getRanks(double[] quantiles) { + default double[] getRanks(final double[] quantiles) { return getRanks(quantiles, INCLUSIVE); } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 8b8a91bdd..2713e1b02 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -34,7 +34,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI { * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(float[] splitPoints) { + default double[] getCDF(final float[] splitPoints) { return getCDF(splitPoints, INCLUSIVE); } @@ -97,7 +97,7 @@ default double[] getCDF(float[] splitPoints) { * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(float[] splitPoints) { + default double[] getPMF(final float[] splitPoints) { return getPMF(splitPoints, INCLUSIVE); } @@ -149,7 +149,7 @@ default double[] getPMF(float[] splitPoints) { * @return the approximate quantile given the normalized rank. * @throws IllegalArgumentException if sketch is empty. */ - default float getQuantile(double rank) { + default float getQuantile(final double rank) { return getQuantile(rank, INCLUSIVE); } @@ -206,7 +206,7 @@ default float getQuantile(double rank) { * @return an array of quantiles corresponding to the given array of normalized ranks. * @throws IllegalArgumentException if sketch is empty. */ - default float[] getQuantiles(double[] ranks) { + default float[] getQuantiles(final double[] ranks) { return getQuantiles(ranks, INCLUSIVE); } @@ -229,7 +229,7 @@ default float[] getQuantiles(double[] ranks) { * @return the normalized rank corresponding to the given quantile. * @throws IllegalArgumentException if sketch is empty. */ - default double getRank(float quantile) { + default double getRank(final float quantile) { return getRank(quantile, INCLUSIVE); } @@ -250,7 +250,7 @@ default double getRank(float quantile) { * @return an array of normalized ranks corresponding to the given array of quantiles. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getRanks(float[] quantiles) { + default double[] getRanks(final float[] quantiles) { return getRanks(quantiles, INCLUSIVE); } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index c6a05ffa1..d1592e244 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -38,7 +38,7 @@ public interface QuantilesGenericAPI extends QuantilesAPI, PartitioningFeatur * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(T[] splitPoints) { + default double[] getCDF(final T[] splitPoints) { return getCDF(splitPoints, INCLUSIVE); } @@ -118,7 +118,7 @@ default int getMaxPartitions() { * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(T[] splitPoints) { + default double[] getPMF(final T[] splitPoints) { return getPMF(splitPoints, INCLUSIVE); } @@ -170,7 +170,7 @@ default double[] getPMF(T[] splitPoints) { * @return the approximate quantile given the normalized rank. * @throws IllegalArgumentException if sketch is empty. */ - default T getQuantile(double rank) { + default T getQuantile(final double rank) { return getQuantile(rank, INCLUSIVE); } @@ -227,7 +227,7 @@ default T getQuantile(double rank) { * @return an array of quantiles corresponding to the given array of normalized ranks. * @throws IllegalArgumentException if sketch is empty. */ - default T[] getQuantiles(double[] ranks) { + default T[] getQuantiles(final double[] ranks) { return getQuantiles(ranks, INCLUSIVE); } @@ -250,7 +250,7 @@ default T[] getQuantiles(double[] ranks) { * @return the normalized rank corresponding to the given quantile. * @throws IllegalArgumentException if sketch is empty. */ - default double getRank(T quantile) { + default double getRank(final T quantile) { return getRank(quantile, INCLUSIVE); } @@ -271,7 +271,7 @@ default double getRank(T quantile) { * @return an array of normalized ranks corresponding to the given array of quantiles. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getRanks(T[] quantiles) { + default double[] getRanks(final T[] quantiles) { return getRanks(quantiles, INCLUSIVE); } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java index fb1ca5817..51802df71 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -35,7 +35,7 @@ public interface QuantilesLongsAPI extends QuantilesAPI { * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getCDF(long[] splitPoints) { + default double[] getCDF(final long[] splitPoints) { return getCDF(splitPoints, INCLUSIVE); } @@ -98,7 +98,7 @@ default double[] getCDF(long[] splitPoints) { * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getPMF(long[] splitPoints) { + default double[] getPMF(final long[] splitPoints) { return getPMF(splitPoints, INCLUSIVE); } @@ -150,7 +150,7 @@ default double[] getPMF(long[] splitPoints) { * @return the approximate quantile given the normalized rank. * @throws IllegalArgumentException if sketch is empty. */ - default long getQuantile(double rank) { + default long getQuantile(final double rank) { return getQuantile(rank, INCLUSIVE); } @@ -207,7 +207,7 @@ default long getQuantile(double rank) { * @return an array of quantiles corresponding to the given array of normalized ranks. * @throws IllegalArgumentException if sketch is empty. */ - default long[] getQuantiles(double[] ranks) { + default long[] getQuantiles(final double[] ranks) { return getQuantiles(ranks, INCLUSIVE); } @@ -230,7 +230,7 @@ default long[] getQuantiles(double[] ranks) { * @return the normalized rank corresponding to the given quantile * @throws IllegalArgumentException if sketch is empty. */ - default double getRank(long quantile) { + default double getRank(final long quantile) { return getRank(quantile, INCLUSIVE); } @@ -251,7 +251,7 @@ default double getRank(long quantile) { * @return an array of normalized ranks corresponding to the given array of quantiles. * @throws IllegalArgumentException if sketch is empty. */ - default double[] getRanks(long[] quantiles) { + default double[] getRanks(final long[] quantiles) { return getRanks(quantiles, INCLUSIVE); } diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java index cdc843f8b..1bbdcc12a 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java @@ -36,7 +36,7 @@ interface ConcurrentSharedThetaSketch extends MemoryStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; - static long computeExactLimit(long k, double error) { + static long computeExactLimit(final long k, final double error) { return 2 * Math.min(k, (long) Math.ceil(1.0 / Math.pow(Math.max(error,MIN_ERROR), 2.0))); } From b9a626194d19effefbf00ea9aedbc6698050ef4e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 7 May 2025 15:52:41 -0700 Subject: [PATCH 02/25] revert to jdk21 for the time being. --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index a9858526a..5ea3a7ac4 100644 --- a/pom.xml +++ b/pom.xml @@ -94,8 +94,8 @@ under the License. 3.6.3 - 22 - + 21 + --enable-preview ${java.version} ${java.version} -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-arguments} @@ -190,7 +190,7 @@ under the License. - [22,) + [21,) [${maven.version},4.0.0) From 127b667f5658b12f920ff2c51809de67199534d2 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 9 May 2025 14:57:04 -0700 Subject: [PATCH 03/25] Set pom to java 24. --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 5ea3a7ac4..bcc76b53b 100644 --- a/pom.xml +++ b/pom.xml @@ -94,8 +94,8 @@ under the License. 3.6.3 - 21 - --enable-preview + 24 + ${java.version} ${java.version} -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${jvm-arguments} @@ -190,7 +190,7 @@ under the License. - [21,) + [22,) [${maven.version},4.0.0) From b0addfe7036316f19a6772fc2bc60eee7636206d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 9 May 2025 15:51:01 -0700 Subject: [PATCH 04/25] The XxHash function was removed from datasketches-memory and moved here. Currently the only usage of this is in filters.bloomfilter. The original dependency on net.openhft.hashing.LongHashFunction has been removed as it is obsolete and uses sun.misc.unsafe. --- .../org/apache/datasketches/hash/XxHash.java | 178 ++- .../hash/XxHash64LoopingTest.java | 1082 +++++++++++++++++ .../datasketches/hash/XxHash64Test.java | 177 +++ .../apache/datasketches/hash/XxHashTest.java | 44 - 4 files changed, 1420 insertions(+), 61 deletions(-) create mode 100644 src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java create mode 100644 src/test/java/org/apache/datasketches/hash/XxHash64Test.java delete mode 100644 src/test/java/org/apache/datasketches/hash/XxHashTest.java diff --git a/src/main/java/org/apache/datasketches/hash/XxHash.java b/src/main/java/org/apache/datasketches/hash/XxHash.java index a93d4d348..e0d6947d5 100644 --- a/src/main/java/org/apache/datasketches/hash/XxHash.java +++ b/src/main/java/org/apache/datasketches/hash/XxHash.java @@ -19,41 +19,185 @@ package org.apache.datasketches.hash; -import org.apache.datasketches.memory.Memory; +import static org.apache.datasketches.memory.internal.XxHash64.hash; +import static org.apache.datasketches.memory.internal.XxHash64.hashBytes; +import static org.apache.datasketches.memory.internal.XxHash64.hashChars; +import static org.apache.datasketches.memory.internal.XxHash64.hashDoubles; +import static org.apache.datasketches.memory.internal.XxHash64.hashFloats; +import static org.apache.datasketches.memory.internal.XxHash64.hashInts; +import static org.apache.datasketches.memory.internal.XxHash64.hashLongs; +import static org.apache.datasketches.memory.internal.XxHash64.hashShorts; /** * The XxHash is a fast, non-cryptographic, 64-bit hash function that has * excellent avalanche and 2-way bit independence properties. + * This java version adapted the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation + * referenced below as inspiration. * - *

This class wraps the - * Memory Component XxHash - * implementation. + *

The C++ source repository: + * + * https://github.com/Cyan4973/xxHash. It has a BSD 2-Clause License: + * + * http://www.opensource.org/licenses/bsd-license.php. See LICENSE. + * + *

Portions of this code were adapted from + * + * OpenHFT/Zero-Allocation-Hashing, which has an Apache 2 license as does this site. See LICENSE. * * @author Lee Rhodes */ -public class XxHash { +public final class XxHash { + + private XxHash() { /* singleton */ } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetBytes starting at this offset + * @param lengthBytes continuing for this length + * @param seed the given seed + * @return the hash + */ + public static long hashByteArr( + final byte[] arr, + final int offsetBytes, + final int lengthBytes, + final long seed) { + return hashBytes(arr, offsetBytes, lengthBytes, seed); + } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetShorts starting at this offset + * @param lengthShorts continuing for this length + * @param seed the given seed + * @return the hash + */ + public static long hashShortArr( + final short[] arr, + final int offsetShorts, + final int lengthShorts, + final long seed) { + return hashShorts(arr, offsetShorts, lengthShorts, seed); + } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetChars starting at this offset + * @param lengthChars continuing for this length + * @param seed the given seed + * @return the hash + */ + public static long hashCharArr( + final char[] arr, + final int offsetChars, + final int lengthChars, + final long seed) { + return hashChars(arr, offsetChars, lengthChars, seed); + } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetInts starting at this offset + * @param lengthInts continuing for this length + * @param seed the given seed + * @return the hash + */ + public static long hashIntArr( + final int[] arr, + final int offsetInts, + final int lengthInts, + final long seed) { + return hashInts(arr, offsetInts, lengthInts, seed); + } /** - * Compute the hash of the given Memory object. - * @param mem The given Memory object - * @param offsetBytes Starting at this offset in bytes - * @param lengthBytes Continuing for this number of bytes - * @param seed use this seed for the hash function - * @return return the resulting 64-bit hash value. + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetLongs starting at this offset + * @param lengthLongs continuing for this length + * @param seed the given seed + * @return the hash */ - public static long hash(final Memory mem, final long offsetBytes, final long lengthBytes, + public static long hashLongArr( + final long[] arr, + final int offsetLongs, + final int lengthLongs, final long seed) { - return mem.xxHash64(offsetBytes, lengthBytes, seed); + return hashLongs(arr, offsetLongs, lengthLongs, seed); } /** - * Returns a 64-bit hash. - * @param in a long + * Returns a 64-bit hash from a single long. This method has been optimized for speed when only + * a single hash of a long is required. + * @param in A long. * @param seed A long valued seed. + * @return the hash. + */ + public static long hashLong( + final long in, + final long seed) { + return hash(in, seed); + } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetFloats starting at this offset + * @param lengthFloats continuing for this length + * @param seed the given seed * @return the hash */ - public static long hash(final long in, final long seed) { - return org.apache.datasketches.memory.XxHash.hashLong(in, seed); + public static long hashFloatArr( + final float[] arr, + final int offsetFloats, + final int lengthFloats, + final long seed) { + return hashFloats(arr, offsetFloats, lengthFloats, seed); + } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param arr the given array + * @param offsetDoubles starting at this offset + * @param lengthDoubles continuing for this length + * @param seed the given seed + * @return the hash + */ + public static long hashDoubleArr( + final double[] arr, + final int offsetDoubles, + final int lengthDoubles, + final long seed) { + return hashDoubles(arr, offsetDoubles, lengthDoubles, seed); + } + + /** + * Hash the given arr starting at the given offset and continuing for the given length using the + * given seed. + * @param str the given string + * @param offsetChars starting at this offset + * @param lengthChars continuing for this length + * @param seed the given seed + * @return the hash + */ + public static long hashString( + final String str, + final int offsetChars, + final int lengthChars, + final long seed) { + return org.apache.datasketches.memory.internal.XxHash64.hashString(str, offsetChars, lengthChars, seed); } } + diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java b/src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java new file mode 100644 index 000000000..4ae2b4956 --- /dev/null +++ b/src/test/java/org/apache/datasketches/hash/XxHash64LoopingTest.java @@ -0,0 +1,1082 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.hash; + +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class XxHash64LoopingTest { + + /* + * This test is adapted from + * + * OpenHFT/Zero-Allocation-Hashing to test hash compatibility with that implementation. + * See LICENSE. + */ + @Test + public void testWithSeed() { + long seed = 42L; + for (int i = 0; i < 1025; i++) { + byte[] byteArr = new byte[i]; + for (int j = 0; j < byteArr.length; j++) { byteArr[j] = (byte) j; } + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + long hash = wmem.xxHash64(0, byteArr.length, seed); + assertEquals(hash, HASHES_OF_LOOPING_BYTES_WITH_SEED_42[i]); + } + } + + /*This data is from + * + * OpenHFT/Zero-Allocation-Hashing to test hash compatibility with that implementation. + * See LICENSE. + */ + private static final long[] HASHES_OF_LOOPING_BYTES_WITH_SEED_42 = { + -7444071767201028348L, + -8959994473701255385L, + 7116559933691734543L, + 6019482000716350659L, + -6625277557348586272L, + -5507563483608914162L, + 1540412690865189709L, + 4522324563441226749L, + -7143238906056518746L, + -7989831429045113014L, + -7103973673268129917L, + -2319060423616348937L, + -7576144055863289344L, + -8903544572546912743L, + 6376815151655939880L, + 5913754614426879871L, + 6466567997237536608L, + -869838547529805462L, + -2416009472486582019L, + -3059673981515537339L, + 4211239092494362041L, + 1414635639471257331L, + 166863084165354636L, + -3761330575439628223L, + 3524931906845391329L, + 6070229753198168844L, + -3740381894759773016L, + -1268276809699008557L, + 1518581707938531581L, + 7988048690914090770L, + -4510281763783422346L, + -8988936099728967847L, + -8644129751861931918L, + 2046936095001747419L, + 339737284852751748L, + -8493525091666023417L, + -3962890767051635164L, + -5799948707353228709L, + -6503577434416464161L, + 7718729912902936653L, + 191197390694726650L, + -2677870679247057207L, + 20411540801847004L, + 2738354376741059902L, + -3754251900675510347L, + -3208495075154651980L, + 5505877218642938179L, + 6710910171520780908L, + -9060809096139575515L, + 6936438027860748388L, + -6675099569841255629L, + -5358120966884144380L, + -4970515091611332076L, + -1810965683604454696L, + -516197887510505242L, + 1240864593087756274L, + 6033499571835033332L, + 7223146028771530185L, + 909128106589125206L, + 1567720774747329341L, + -1867353301780159863L, + 4655107429511759333L, + 5356891185236995950L, + 182631115370802890L, + -3582744155969569138L, + 595148673029792797L, + 495183136068540256L, + 5536689004903505647L, + -8472683670935785889L, + -4335021702965928166L, + 7306662983232020244L, + 4285260837125010956L, + 8288813008819191181L, + -3442351913745287612L, + 4883297703151707194L, + 9135546183059994964L, + 123663780425483012L, + 509606241253238381L, + 5940344208569311369L, + -2650142344608291176L, + 3232776678942440459L, + -922581627593772181L, + 7617977317085633049L, + 7154902266379028518L, + -5806388675416795571L, + 4368003766009575737L, + -2922716024457242064L, + 4771160713173250118L, + 3275897444752647349L, + -297220751499763878L, + 5095659287766176401L, + 1181843887132908826L, + 9058283605301070357L, + 3984713963471276643L, + 6050484112980480005L, + 1551535065359244224L, + 565337293533335618L, + 7412521035272884309L, + -4735469481351389369L, + 6998597101178745656L, + -9107075101236275961L, + 5879828914430779796L, + 6034964979406620806L, + 5666406915264701514L, + -4666218379625258428L, + 2749972203764815656L, + -782986256139071446L, + 6830581400521008570L, + 2588852022632995043L, + -5484725487363818922L, + -3319556935687817112L, + 6481961252981840893L, + 2204492445852963006L, + -5301091763401031066L, + -2615065677047206256L, + -6769817545131782460L, + -8421640685322953142L, + -3669062629317949176L, + -9167016978640750490L, + 2783671191687959562L, + -7599469568522039782L, + -7589134103255480011L, + -5932706841188717592L, + -8689756354284562694L, + -3934347391198581249L, + -1344748563236040701L, + 2172701592984478834L, + -5322052340624064417L, + -8493945390573620511L, + 3349021988137788403L, + -1806262525300459538L, + -8091524448239736618L, + 4022306289903960690L, + -8346915997379834224L, + -2106001381993805461L, + -5784123934724688161L, + 6775158099649720388L, + -3869682756870293568L, + 4356490186652082006L, + 8469371446702290916L, + -2972961082318458602L, + -7188106622222784561L, + -4961006366631572412L, + 3199991182014172900L, + 2917435868590434179L, + 8385845305547872127L, + 7706824402560674655L, + -1587379863634865277L, + -4212156212298809650L, + -1305209322000720233L, + -7866728337506665880L, + 8195089740529247049L, + -4876930125798534239L, + 798222697981617129L, + -2441020897729372845L, + -3926158482651178666L, + -1254795122048514130L, + 5192463866522217407L, + -5426289318796042964L, + -3267454004443530826L, + 471043133625225785L, + -660956397365869974L, + -6149209189144999161L, + -2630977660039166559L, + 8512219789663151219L, + -3309844068134074620L, + -6211275327487847132L, + -2130171729366885995L, + 6569302074205462321L, + 4855778342281619706L, + 3867211421508653033L, + -3002480002418725542L, + -8297543107467502696L, + 8049642289208775831L, + -5439825716055425635L, + 7251760070798756432L, + -4774526021749797528L, + -3892389575184442548L, + 5162451061244344424L, + 6000530226398686578L, + -5713092252241819676L, + 8740913206879606081L, + -8693282419677309723L, + 1576205127972543824L, + 5760354502610401246L, + 3173225529903529385L, + 1785166236732849743L, + -1024443476832068882L, + -7389053248306187459L, + 1171021620017782166L, + 1471572212217428724L, + 7720766400407679932L, + -8844781213239282804L, + -7030159830170200877L, + 2195066352895261150L, + 1343620937208608634L, + 9178233160016731645L, + -757883447602665223L, + 3303032934975960867L, + -3685775162104101116L, + -4454903657585596656L, + -5721532367620482629L, + 8453227136542829644L, + 5397498317904798888L, + 7820279586106842836L, + -2369852356421022546L, + 3910437403657116169L, + 6072677490463894877L, + -2651044781586183960L, + 5173762670440434510L, + -2970017317595590978L, + -1024698859439768763L, + -3098335260967738522L, + -1983156467650050768L, + -8132353894276010246L, + -1088647368768943835L, + -3942884234250555927L, + 7169967005748210436L, + 2870913702735953746L, + -2207022373847083021L, + 1104181306093040609L, + 5026420573696578749L, + -5874879996794598513L, + -4777071762424874671L, + -7506667858329720470L, + -2926679936584725232L, + -5530649174168373609L, + 5282408526788020384L, + 3589529249264153135L, + -6220724706210580398L, + -7141769650716479812L, + 5142537361821482047L, + -7029808662366864423L, + -6593520217660744466L, + 1454581737122410695L, + -139542971769349865L, + 1727752089112067235L, + -775001449688420017L, + -5011311035350652032L, + -8671171179275033159L, + -2850915129917664667L, + -5258897903906998781L, + -6954153088230718761L, + -4070351752166223959L, + -6902592976462171099L, + -7850366369290661391L, + -4562443925864904705L, + 3186922928616271015L, + 2208521081203400591L, + -2727824999830592777L, + -3817861137262331295L, + 2236720618756809066L, + -4888946967413746075L, + -446884183491477687L, + -43021963625359034L, + -5857689226703189898L, + -2156533592262354883L, + -2027655907961967077L, + 7151844076490292500L, + -5029149124756905464L, + 526404452686156976L, + 8741076980297445408L, + 7962851518384256467L, + -105985852299572102L, + -2614605270539434398L, + -8265006689379110448L, + 8158561071761524496L, + -6923530157382047308L, + 5551949335037580397L, + 565709346370307061L, + -4780869469938333359L, + 6931895917517004830L, + 565234767538051407L, + -8663136372880869656L, + 1427340323685448983L, + 6492705666640232290L, + 1481585578088475369L, + -1712711110946325531L, + 3281685342714380741L, + 6441384790483098576L, + -1073539554682358394L, + 5704050067194788964L, + -5495724689443043319L, + -5425043165837577535L, + 8349736730194941321L, + -4123620508872850061L, + 4687874980541143573L, + -468891940172550975L, + -3212254545038049829L, + -6830802881920725628L, + 9033050533972480988L, + 4204031879107709260L, + -677513987701096310L, + -3286978557209370155L, + 1644111582609113135L, + 2040089403280131741L, + 3323690950628902653L, + -7686964480987925756L, + -4664519769497402737L, + 3358384147145476542L, + -4699919744264452277L, + -4795197464927839170L, + 5051607253379734527L, + -8987703459734976898L, + 8993686795574431834L, + -2688919474688811047L, + 375938183536293311L, + 1049459889197081920L, + -1213022037395838295L, + 4932989235110984138L, + -6647247877090282452L, + -7698817539128166242L, + -3264029336002462659L, + 6487828018122309795L, + -2660821091484592878L, + 7104391069028909121L, + -1765840012354703384L, + 85428166783788931L, + -6732726318028261938L, + 7566202549055682933L, + 229664898114413280L, + -1474237851782211353L, + -1571058880058007603L, + -7926453582850712144L, + 2487148368914275243L, + 8740031015380673473L, + 1908345726881363169L, + -2510061320536523178L, + 7854780026906019630L, + -6023415596650016493L, + -6264841978089051107L, + 4024998278016087488L, + -4266288992025826072L, + -3222176619422665563L, + -1999258726038299316L, + 1715270077442385636L, + 6764658837948099754L, + -8646962299105812577L, + -51484064212171546L, + -1482515279051057493L, + -8663965522608868414L, + -256555202123523670L, + 1973279596140303801L, + -7280796173024508575L, + -5691760367231354704L, + -5915786562256300861L, + -3697715074906156565L, + 3710290115318541949L, + 6796151623958134374L, + -935299482515386356L, + -7078378973978660385L, + 5379481350768846927L, + -9011221735308556302L, + 5936568631579608418L, + -6060732654964511813L, + -4243141607840017809L, + 3198488845875349355L, + -7809288876010447646L, + 4371587872421472389L, + -1304197371105522943L, + 7389861473143460103L, + -1892352887992004024L, + 2214828764044713398L, + 6347546952883613388L, + 1275694314105480954L, + -5262663163358903733L, + 1524757505892047607L, + 1474285098416162746L, + -7976447341881911786L, + 4014100291977623265L, + 8994982266451461043L, + -7737118961020539453L, + -2303955536994331092L, + 1383016539349937136L, + 1771516393548245271L, + -5441914919967503849L, + 5449813464890411403L, + -3321280356474552496L, + 4084073849712624363L, + 4290039323210935932L, + 2449523715173349652L, + 7494827882138362156L, + 9035007221503623051L, + 5722056230130603177L, + -5443061851556843748L, + -7554957764207092109L, + 447883090204372074L, + 533916651576859197L, + -3104765246501904165L, + -4002281505194601516L, + -8402008431255610992L, + -408273018037005304L, + 214196458752109430L, + 6458513309998070914L, + 2665048360156607904L, + 96698248584467992L, + -3238403026096269033L, + 6759639479763272920L, + -4231971627796170796L, + -2149574977639731179L, + -1437035755788460036L, + -6000005629185669767L, + 145244292800946348L, + -3056352941404947199L, + 3748284277779018970L, + 7328354565489106580L, + -2176895260373660284L, + 3077983936372755601L, + 1215485830019410079L, + 683050801367331140L, + -3173237622987755212L, + -1951990779107873701L, + -4714366021269652421L, + 4934690664256059008L, + 1674823104333774474L, + -3974408282362828040L, + 2001478896492417760L, + -4115105568354384199L, + -2039694725495941666L, + -587763432329933431L, + -391276713546911316L, + -5543400904809469053L, + 1882564440421402418L, + -4991793588968693036L, + 3454088185914578321L, + 2290855447126188424L, + 3027910585026909453L, + 2136873580213167431L, + -6243562989966916730L, + 5887939953208193029L, + -3491821629467655741L, + -3138303216306660662L, + 8572629205737718669L, + 4154439973110146459L, + 5542921963475106759L, + -2025215496720103521L, + -4047933760493641640L, + -169455456138383823L, + -1164572689128024473L, + -8551078127234162906L, + -7247713218016599028L, + 8725299775220778242L, + 6263466461599623132L, + 7931568057263751768L, + 7365493014712655238L, + -7343740914722477108L, + 8294118602089088477L, + 7677867223984211483L, + -7052188421655969232L, + -3739992520633991431L, + 772835781531324307L, + 881441588914692737L, + 6321450879891466401L, + 5682516032668315027L, + 8493068269270840662L, + -3895212467022280567L, + -3241911302335746277L, + -7199586338775635848L, + -4606922569968527974L, + -806850906331637768L, + 2433670352784844513L, + -5787982146811444512L, + 7852193425348711165L, + 8669396209073850051L, + -6898875695148963118L, + 6523939610287206782L, + -8084962379210153174L, + 8159432443823995836L, + -2631068535470883494L, + -338649779993793113L, + 6514650029997052016L, + 3926259678521802094L, + 5443275905907218528L, + 7312187582713433551L, + -2993773587362997676L, + -1068335949405953411L, + 4499730398606216151L, + 8538015793827433712L, + -4057209365270423575L, + -1504284818438273559L, + -6460688570035010846L, + 1765077117408991117L, + 8278320303525164177L, + 8510128922449361533L, + 1305722765578569816L, + 7250861238779078656L, + -576624504295396147L, + -4363714566147521011L, + -5932111494795524073L, + 1837387625936544674L, + -4186755953373944712L, + -7657073597826358867L, + 140408487263951108L, + 5578463635002659628L, + 3400326044813475885L, + -6092804808386714986L, + -2410324417287268694L, + 3222007930183458970L, + 4932471983280850419L, + 3554114546976144528L, + -7216067928362857082L, + -6115289896923351748L, + -6769646077108881947L, + 4263895947722578066L, + 2939136721007694271L, + 1426030606447416658L, + -1316192446807442076L, + 5366182640480055129L, + 6527003877470258527L, + 5849680119000207603L, + 5263993237214222328L, + -6936533648789185663L, + -9063642143790846605L, + 3795892210758087672L, + 4987213125282940176L, + 2505500970421590750L, + -1014022559552365387L, + -3574736245968367770L, + 1180676507127340259L, + -2261908445207512503L, + -8416682633172243509L, + 1114990703652673283L, + 7753746660364401380L, + 1874908722469707905L, + 2033421444403047677L, + 21412168602505589L, + 385957952615286205L, + 2053171460074727107L, + 1915131899400103774L, + 6680879515029368390L, + 568807208929724162L, + -6211541450459087674L, + -5026690733412145448L, + 1384781941404886235L, + -98027820852587266L, + 1806580495924249669L, + 6322077317403503963L, + 9078162931419569939L, + -2809061215428363978L, + 7697867577577415733L, + -5270063855897737274L, + 5649864555290587388L, + -6970990547695444247L, + 579684606137331754L, + 3871931565451195154L, + 2030008578322050218L, + -5012357307111799829L, + -2271365921756144065L, + 4551962665158074190L, + -3385474923040271312L, + -7647625164191633577L, + 6634635380316963029L, + -5201190933687061585L, + 8864818738548593973L, + 2855828214210882907L, + 9154512990734024165L, + -6945306719789457786L, + 1200243352799481087L, + 875998327415853787L, + 1275313054449881011L, + -6105772045375948736L, + -2926927684328291437L, + 9200050852144954779L, + 5188726645765880663L, + 5197037323312705176L, + 3434926231010121611L, + -5054013669361906544L, + 2582959199749224670L, + -6053757512723474059L, + -5016308176846054473L, + -2509827316698626133L, + 7700343644503853204L, + -1997627249894596731L, + 3993168688325352290L, + -8181743677541277704L, + 3719056119682565597L, + -7264411659282947790L, + 7177028972346484464L, + -5460831176884283278L, + 1799904662416293978L, + -6549616005092764514L, + 5472403994001122052L, + 8683463751708388502L, + -7873363037838316398L, + 689134758256487260L, + -1287443614028696450L, + 4452712919702709507L, + 762909374167538893L, + 6594302592326281411L, + 1183786629674781984L, + 5021847859620133476L, + -2490098069181538915L, + 5105145136026716679L, + 4437836948098585718L, + 1987270426215858862L, + 6170312798826946249L, + 634297557126003407L, + -1672811625495999581L, + 6282971595586218191L, + 4549149305727581687L, + -5652165370435317782L, + 1064501550023753890L, + -5334885527127139723L, + -6904378001629481237L, + -1807576691784201230L, + -205688432992053911L, + 7621619053293393289L, + 6258649161313982470L, + -1111634238359342096L, + -8044260779481691987L, + 400270655839010807L, + -7806833581382890725L, + -2970563349459508036L, + -7392591524816802798L, + 2918924613160219805L, + -6444161627929149002L, + 6096497501321778876L, + -1477975665655830038L, + 1690651307597306138L, + -2364076888826085362L, + -6521987420014905821L, + -4419193480146960582L, + 3538587780233092477L, + 8374665961716940404L, + 7492412312405424500L, + 6311662249091276767L, + -1240235198282023566L, + 5478559631401166447L, + 3476714419313462133L, + 377427285984503784L, + 2570472638778991109L, + -2741381313777447835L, + -7123472905503039596L, + 2493658686946955193L, + 1024677789035847585L, + -2916713904339582981L, + -4532003852004642304L, + -2202143560366234111L, + 5832267856442755135L, + -261740607772957384L, + 239435959690278014L, + 5755548341947719409L, + 6138795458221887696L, + -7709506987360146385L, + -6657487758065140444L, + -7006376793203657499L, + 6544409861846502033L, + 3171929352014159247L, + 1051041925048792869L, + 2617300158375649749L, + 952652799620095175L, + -576661730162168147L, + -1634191369221345988L, + 4833656816115993519L, + 647566759700005786L, + 2473810683785291822L, + 3005977181064745326L, + -3321881966853149523L, + 7595337666427588699L, + 6004093624251057224L, + -563917505657690279L, + 6117428527147449302L, + -6287297509522976113L, + -4527219334756214406L, + 742626429298092489L, + 3057351806086972041L, + 645967551210272605L, + -4428701157828864227L, + 3236379103879435414L, + -8477089892132066300L, + -6127365537275859058L, + -4052490484706946358L, + -8004854976625046469L, + -3679456917426613424L, + -8212793762082595299L, + -818288739465424130L, + 1358812099481667095L, + 7835987612195254310L, + -3663247409614323059L, + -2931105150130396604L, + 7296136776835614792L, + -2014557408985889628L, + 7267662411237959788L, + 3699280615819277743L, + -212010675469091396L, + -6518374332458360120L, + 145026010541628849L, + 1879297324213501001L, + -7146296067751816833L, + -5002958800391379931L, + 6060682439924517608L, + -432234782921170964L, + -6669688947353256956L, + 7728943532792041267L, + 830911367341171721L, + 3396934884314289432L, + -779464156662780749L, + 2330041851883352285L, + -4783350380736276693L, + -5758476056890049254L, + -7551552301614791791L, + 1253334187723911710L, + -2685018208308798978L, + 5379636036360946454L, + 6154668487114681217L, + -8641287462255458898L, + 4676087643800649558L, + -2405142641398691475L, + 1088685126864246881L, + 6431149082338374041L, + -607357695335069155L, + -720970692129524140L, + 2648766932394044468L, + 8408344790179354573L, + -6193808387735667350L, + 7722524628524697419L, + -6975433852560238120L, + -2925851029234475295L, + -4274458387165211028L, + -8355836377702147319L, + 5278146397877332061L, + 8502098812383680707L, + 2292836642336580326L, + -6127608082651070062L, + 2222301962240611208L, + -1930887695854799378L, + 7640503480494894592L, + 1162652186586436094L, + -1918002592943761683L, + 7648998601717261840L, + -8472603250832757057L, + -988877663117552456L, + 2368458128168026494L, + -6480813811998475245L, + -5896967824416018967L, + -2593783161701820446L, + 6950098417530252598L, + 6362589545555771236L, + 7981389665448567125L, + 3954017080198558850L, + 1626078615050230622L, + 6650159066527969109L, + 697345338922935394L, + -1226816215461768626L, + 8740408765973837440L, + -4194155864629568323L, + 7016680023232424746L, + 6043281358142429469L, + -4201005667174376809L, + 1216727117859013155L, + 6367202436544203935L, + 35414869396444636L, + 3715622794033998412L, + 488654435687670554L, + -2503747297224687460L, + 3147101919441470388L, + -8248611218693190922L, + 970697264481229955L, + 3411465763826851418L, + 9117405004661599969L, + -5204346498331519734L, + -19637460819385174L, + -5039124225167977219L, + 2990108874601696668L, + -2623857460235459202L, + 4256291692861397446L, + 6724147860870760443L, + 3558616688507246537L, + 6487680097936412800L, + -6470792832935928161L, + 4314814550912237614L, + -1292878983006062345L, + 6791915152630414174L, + 5971652079925815310L, + 2557529546662864312L, + 466175054322801580L, + -585216717310746872L, + -2486640422147349036L, + 7212029603994220134L, + 3958995069888972500L, + 4950471855791412790L, + -3721948842035712763L, + -6184503487488243051L, + 4079570444585775332L, + -3952156172546996872L, + 4543894231118208322L, + -1739995588466209963L, + 9155948355455935530L, + 5821980345462207860L, + -2431287667309520417L, + -3890108130519441316L, + -558124689277030490L, + 6079823537335801717L, + 5409742395192364262L, + -2329885777717160453L, + -7332804342513677651L, + 1466490574975950555L, + -420549419907427929L, + -5249909814389692516L, + -5145692168206210661L, + 5934113980649113921L, + 3241618428555359661L, + -6622110266160980250L, + 5048250878669516223L, + 5747219637359976174L, + 2975906212588223728L, + 5730216838646273215L, + -176713127129024690L, + 6734624279336671146L, + 5127866734316017180L, + 7111761230887705595L, + 3457811808274317235L, + 3362961434604932375L, + -1877869936854991246L, + 7171428594877765665L, + -8252167178400462374L, + -6306888185035821047L, + -6684702191247683887L, + -7754928454824190529L, + -1902605599135704386L, + -4037319846689421239L, + 8493746058123583457L, + -8156648963857047193L, + 2051510355149839497L, + -1256416624177218909L, + -3344927996254072010L, + -1838853051925943568L, + 316927471680974556L, + -1502257066700798003L, + -5836095610125837606L, + -1594125583615895424L, + 1442211486559637962L, + -144295071206619569L, + 5159850900959273410L, + 4589139881166423678L, + -7038726987463097509L, + 2886082400772974595L, + 2780759114707171916L, + 5694649587906297495L, + 1260349041268169667L, + 4921517488271434890L, + 644696475796073018L, + 6262811963753436289L, + -6128198676595868773L, + -3625352083004760261L, + -8751453332943236675L, + 8749249479868749221L, + -2450808199545048250L, + -6517435817046180917L, + -3433321727429234998L, + -2591586258908763451L, + 3847750870868804507L, + 6603614438546398643L, + -7598682191291031287L, + 8710261565627204971L, + 4753389483755344355L, + -4645333069458786881L, + -6742695046613492214L, + 643070478568866643L, + -7543096104151965610L, + 7171495384655926161L, + 595063872610714431L, + 3292310150781130424L, + 4326847806055440904L, + -4580020566072794152L, + 3142286571820373678L, + 5530356537440155930L, + 546372639737516181L, + 7401214477400367500L, + 7406531960402873109L, + 3287639667219172570L, + 4977301681213633671L, + 5253257820925174498L, + 2906216636104297878L, + 6142955758238347523L, + -3498651268741727235L, + -5875053958265588593L, + 3896719087169993883L, + -910904726885775073L, + 380107493197368177L, + -4993591912695447004L, + 2970487257212582761L, + 2551762717569548774L, + 953061649962736812L, + 8949739538606589463L, + -2962839167079475801L, + -1375673191272573835L, + 3761793818361866390L, + -389577789190726878L, + 5661262051502180269L, + -6558556411143987683L, + -702798336372315031L, + -336662820551371779L, + 998576401126580155L, + -5945021269112582755L, + 6108533925730179871L, + 2207095297001999618L, + -9042779159998880435L, + -6177868444342118372L, + 6775965402605895077L, + -3788428885163306576L, + 7790055010527190387L, + 3581587652196995358L, + -6176354155561607694L, + -5859381340906321207L, + 395898765763528395L, + 8132967590863909348L, + -3329092504090544483L, + -6785855381158040247L, + 1497218517051796750L, + -5352392845588925911L, + -6271364901230559194L, + 2314830370653350118L, + -7617588269001325450L, + 1423166885758213795L, + 8538612578307869519L, + -61918791718295474L, + -8177103503192338593L, + -4740086042584326695L, + 3677931948215558698L, + 6558856291580149558L, + 2674975452453336335L, + 5133796555646930522L, + 5139252693299337100L, + 7949476871295347205L, + 4407815324662880678L, + -3758305875280581215L, + 6066309507576587415L, + -7368508486398350973L, + -3181640264332856492L, + 6905100869343314145L, + 3677177673848733417L, + 8862933624870506941L, + -8575223195813810568L, + 9178470351355678144L, + 4677809017145408358L, + -1194833416287894989L, + 3436364743255571183L, + -5204770725795363579L, + 560599448536335263L, + -3192077522964776200L, + -751575299648803575L, + 6334581746534596579L, + -8358187891202563300L, + -1462480609823525055L, + 5605961062646987941L, + 4968399805931440889L, + 7968693270782626653L, + -5868205923557518188L, + 1830234928743560617L, + -8435261076693154407L, + 2138416970728681332L, + 8088740745199685138L, + 806532400344230520L, + 1800590379902909333L, + -8909128842071238901L, + -7357495566969170860L, + 3679766664126940553L, + 2060050474865839094L, + 2363972840121763414L, + 525695004292982714L, + -1224842191746529593L, + 7011317848855545003L, + -6337167558180299938L, + -5184688833363785939L, + -8426673387248359061L, + -5035438815930785229L, + 3521810320608058994L, + 4803742557254962242L, + 6623527039545786598L, + -1221475882122634738L, + -3344794405518401087L, + 6510298498414053658L, + 2844753907937720338L, + 90502309714994895L, + -750403235344282494L, + -4825474181021465833L, + -3405519947983849510L, + 3503875590944089793L, + 7286294700691822468L, + 7828126881500292486L, + 8437899353709338096L, + 136052254470293480L, + 1113259077339995086L, + -8244887265606191121L, + 8089569503800461649L, + -1429698194850157567L, + 1575595674002364989L, + 3576095286627428675L, + -7653655285807569222L, + -6053506977362539111L, + -3923855345805787169L, + -8001149080454232377L, + -4382867706931832271L, + 4212860258835896297L, + 4207674254247034014L, + 5519424058779519159L, + -754483042161434654L, + 1434113479814210082L, + -6416645032698336896L, + 5624329676066514819L, + -8229557208322175959L, + 3922640911653270376L, + 7826932478782081910L, + -4862787164488635842L, + 1449234668827944573L, + -1781657689570106327L, + 5442827552725289699L, + 3589862161007644641L, + 4787115581650652778L, + -3512152721942525726L, + -6750103117958685206L, + 5012970446659949261L, + 6797752795961689017L, + 5086454597639943700L, + -7616068364979994076L, + 1492846825433110217L, + 2967476304433704510L, + -8413824338284112078L, + -1319049442043273974L, + -1756090916806844109L, + -9061091728950139525L, + -6864767830358160810L, + 4879532090226251157L, + 5528644708740739488L + }; +} diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java new file mode 100644 index 000000000..cda076131 --- /dev/null +++ b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.hash; + +import static org.apache.datasketches.memory.XxHash.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +import org.apache.datasketches.memory.Resource; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class XxHash64Test { + + @Test + public void offsetChecks() { + long seed = 12345; + int blocks = 6; + int cap = blocks * 16; + + long hash; + + WritableMemory wmem = WritableMemory.allocate(cap); + for (int i = 0; i < cap; i++) { wmem.putByte(i, (byte)(-128 + i)); } + + for (int offset = 0; offset < 16; offset++) { + int arrLen = cap - offset; + hash = wmem.xxHash64(offset, arrLen, seed); + assertTrue(hash != 0); + } + } + + @Test + public void byteArrChecks() { + long seed = 0; + int offset = 0; + int bytes = 16; + + for (int j = 1; j < bytes; j++) { + byte[] in = new byte[bytes]; + + WritableMemory wmem = WritableMemory.writableWrap(in); + for (int i = 0; i < j; i++) { wmem.putByte(i, (byte) (-128 + i)); } + + long hash =wmem.xxHash64(offset, bytes, seed); + assertTrue(hash != 0); + } + } + + /* + * This test is adapted from + * + * OpenHFT/Zero-Allocation-Hashing to test hash compatibility with that implementation. + * It is licensed under Apache License, version 2.0. See LICENSE. + */ + @Test + public void collisionTest() { + WritableMemory wmem = WritableMemory.allocate(128); + wmem.putLong(0, 1); + wmem.putLong(16, 42); + wmem.putLong(32, 2); + long h1 = wmem.xxHash64(0, wmem.getCapacity(), 0); + + wmem.putLong(0, 1L + 0xBA79078168D4BAFL); + wmem.putLong(32, 2L + 0x9C90005B80000000L); + long h2 = wmem.xxHash64(0, wmem.getCapacity(), 0); + assertEquals(h1, h2); + + wmem.putLong(0, 1L + (0xBA79078168D4BAFL * 2)); + wmem.putLong(32, 2L + (0x392000b700000000L)); //= (0x9C90005B80000000L * 2) fix overflow false pos + + long h3 = wmem.xxHash64(0, wmem.getCapacity(), 0); + assertEquals(h2, h3); + } + +// This test had to be disabled because the net.openhft.hashing.LongHashFunction is obsolete and depends on sun.misc.unsafe. +// /** +// * This simple test compares the output of {@link Resource#xxHash64(long, long, long)} with the +// * output of {@link net.openhft.hashing.LongHashFunction}, that itself is tested against the +// * reference implementation in C. This increases confidence that the xxHash function implemented +// * in this package is in fact the same xxHash function implemented in C. +// * +// * @author Roman Leventov +// * @author Lee Rhodes +// */ +// @Test +// public void testXxHash() { +// Random random = ThreadLocalRandom.current(); +// for (int len = 0; len < 100; len++) { +// byte[] bytes = new byte[len]; +// for (int i = 0; i < 10; i++) { +// long zahXxHash = LongHashFunction.xx().hashBytes(bytes); +// long memoryXxHash = Memory.wrap(bytes).xxHash64(0, len, 0); +// assertEquals(memoryXxHash, zahXxHash); +// random.nextBytes(bytes); +// } +// } +// } + + private static final byte[] barr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + + @Test + public void testArrHashes() { + WritableMemory wmem = WritableMemory.writableWrap(barr); + long hash0 = wmem.xxHash64(8, 8, 0); + long hash1 = hashByteArr(barr, 8, 8, 0); + assertEquals(hash1, hash0); + + char[] carr = new char[8]; + wmem.getCharArray(0, carr, 0, 8); + hash1 = hashCharArr(carr, 4, 4, 0); + assertEquals(hash1, hash0); + + short[] sarr = new short[8]; + wmem.getShortArray(0, sarr, 0, 8); + hash1 = hashShortArr(sarr, 4, 4, 0); + assertEquals(hash1, hash0); + + int[] iarr = new int[4]; + wmem.getIntArray(0, iarr, 0, 4); + hash1 = hashIntArr(iarr, 2, 2, 0); + assertEquals(hash1, hash0); + + float[] farr = new float[4]; + wmem.getFloatArray(0, farr, 0, 4); + hash1 = hashFloatArr(farr, 2, 2, 0); + assertEquals(hash1, hash0); + + long[] larr = new long[2]; + wmem.getLongArray(0, larr, 0, 2); + hash1 = hashLongArr(larr, 1, 1, 0); + long in = wmem.getLong(8); + long hash2 = hashLong(in, 00); //tests the single long hash + assertEquals(hash1, hash0); + assertEquals(hash2, hash0); + + double[] darr = new double[2]; + wmem.getDoubleArray(0, darr, 0, 2); + hash1 = hashDoubleArr(darr, 1, 1, 0); + assertEquals(hash1, hash0); + } + + @Test + public void testString() { + String s = "Now is the time for all good men to come to the aid of their country."; + char[] arr = s.toCharArray(); + long hash0 = hashString(s, 0, s.length(), 0); + long hash1 = hashCharArr(arr, 0, arr.length, 0); + assertEquals(hash1, hash0); + } + +} diff --git a/src/test/java/org/apache/datasketches/hash/XxHashTest.java b/src/test/java/org/apache/datasketches/hash/XxHashTest.java deleted file mode 100644 index 4ad433b2d..000000000 --- a/src/test/java/org/apache/datasketches/hash/XxHashTest.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.hash; - -import static org.testng.Assert.assertEquals; - -import org.testng.annotations.Test; - -import org.apache.datasketches.memory.Memory; - -/** - * @author Lee Rhodes - */ -public class XxHashTest { - - @Test - public void longCheck() { - long seed = 0; - long hash1 = XxHash.hash(123L, seed); - long[] arr = new long[1]; - arr[0] = 123L; - Memory mem = Memory.wrap(arr); - long hash2 = XxHash.hash(mem, 0, 8, 0); - assertEquals(hash2, hash1); - } - -} From 256ee592d88ca3b0125f632845a00947553d775d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 10 May 2025 16:57:57 -0700 Subject: [PATCH 05/25] Disabled workflows. filters.BloomFilter, common.Util uses XxHash now in o.a.datasketches.hash POM aligned with ds-memory pom --- .github/workflows/auto-jdk-matrix.yml | 14 +++++++------- .github/workflows/auto-os-matrix.yml | 14 +++++++------- .github/workflows/check_cpp_files.yml | 14 +++++++------- .github/workflows/codeql-analysis.yml | 14 +++++++------- .github/workflows/javadoc.yml | 4 ++-- pom.xml | 8 +++++--- .../filters/bloomfilter/BloomFilter.java | 2 +- .../java/org/apache/datasketches/tuple/Util.java | 4 ++-- 8 files changed, 38 insertions(+), 36 deletions(-) diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 0afbaf065..176d1fd6e 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -1,13 +1,13 @@ name: Auto JDK Matrix Test & Install on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml index 83ecc1ffe..413b7a957 100644 --- a/.github/workflows/auto-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -1,13 +1,13 @@ name: Auto OS Matrix Test & Install on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml index 243eda985..778859d0d 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/check_cpp_files.yml @@ -1,13 +1,13 @@ name: CPP SerDe Compatibility Test on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index bb42fe345..f3fde1de0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,13 +1,13 @@ name: "CodeQL" on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 4862d64e4..977c87b27 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -1,8 +1,8 @@ name: JavaDoc on: - push: - branches: main +# push: +# branches: main workflow_dispatch: jobs: diff --git a/pom.xml b/pom.xml index bcc76b53b..8da8c5777 100644 --- a/pom.xml +++ b/pom.xml @@ -86,7 +86,7 @@ under the License. 6.0.0 - 7.10.2 + 7.11.0 generate_java_files check_cpp_files @@ -114,9 +114,9 @@ under the License. 3.4.2 3.11.2 3.1.1 - 3.2.0 3.3.1 - 3.5.2 + + 3.5.2 3.2.0 4.9.10 @@ -340,6 +340,7 @@ under the License. **/*.sk LICENSE NOTICE + **/*.code-workspace @@ -668,5 +669,6 @@ under the License. + diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index 7c166a29d..a56a5eeef 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -26,11 +26,11 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.hash.XxHash; import org.apache.datasketches.memory.Buffer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableBuffer; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.memory.XxHash; /** * A Bloom filter is a data structure that can be used for probabilistic diff --git a/src/main/java/org/apache/datasketches/tuple/Util.java b/src/main/java/org/apache/datasketches/tuple/Util.java index 92193ca56..bda6e7c25 100644 --- a/src/main/java/org/apache/datasketches/tuple/Util.java +++ b/src/main/java/org/apache/datasketches/tuple/Util.java @@ -22,8 +22,8 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.apache.datasketches.memory.XxHash.hashCharArr; -import static org.apache.datasketches.memory.XxHash.hashString; +import static org.apache.datasketches.hash.XxHash.hashCharArr; +import static org.apache.datasketches.hash.XxHash.hashString; import java.lang.reflect.Array; From d90e5a572d36264a0ee11ca9db96c3acb0e83ced Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 10 May 2025 17:20:42 -0700 Subject: [PATCH 06/25] Upadate ds-java dependency on ds-memory to local ds-memory-6.1.0-SNAPSHOT. --- pom.xml | 4 ++-- src/test/java/org/apache/datasketches/hash/XxHash64Test.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 8da8c5777..01d0cc65c 100644 --- a/pom.xml +++ b/pom.xml @@ -83,7 +83,7 @@ under the License. - 6.0.0 + 6.1.0-SNAPSHOT 7.11.0 @@ -126,7 +126,7 @@ under the License. 4.3.0 - 0.8.12 + 0.8.13 2.18.0 diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java index cda076131..8e6aeef0e 100644 --- a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java +++ b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java @@ -19,7 +19,7 @@ package org.apache.datasketches.hash; -import static org.apache.datasketches.memory.XxHash.*; +import static org.apache.datasketches.hash.XxHash.*; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; From 4572611e6e3a390dbf43b0af75e15f973bb3c9a7 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 12 May 2025 10:18:43 -0700 Subject: [PATCH 07/25] Fix imports --- .../apache/datasketches/hash/XxHash64Test.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java index 8e6aeef0e..4b9c31ab9 100644 --- a/src/test/java/org/apache/datasketches/hash/XxHash64Test.java +++ b/src/test/java/org/apache/datasketches/hash/XxHash64Test.java @@ -19,15 +19,18 @@ package org.apache.datasketches.hash; -import static org.apache.datasketches.hash.XxHash.*; +import static org.apache.datasketches.hash.XxHash.hashByteArr; +import static org.apache.datasketches.hash.XxHash.hashCharArr; +import static org.apache.datasketches.hash.XxHash.hashDoubleArr; +import static org.apache.datasketches.hash.XxHash.hashFloatArr; +import static org.apache.datasketches.hash.XxHash.hashIntArr; +import static org.apache.datasketches.hash.XxHash.hashLong; +import static org.apache.datasketches.hash.XxHash.hashLongArr; +import static org.apache.datasketches.hash.XxHash.hashShortArr; +import static org.apache.datasketches.hash.XxHash.hashString; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import java.util.Random; -import java.util.concurrent.ThreadLocalRandom; - -import org.apache.datasketches.memory.Resource; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; From d948ce71ff976ae60268fea6f69cb01280ed0ddb Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 16 May 2025 09:59:26 -0700 Subject: [PATCH 08/25] Update .asf.yaml --- .asf.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.asf.yaml b/.asf.yaml index a149bf396..57e258daa 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -45,11 +45,11 @@ github: edit_comment_discussion: "Re: [D] {title} ({repository})" delete_comment_discussion: "Re: [D] {title} ({repository})" - notifications: - commits: commits@dataskethces.apache.org - issues: dev@dataskethces.apache.org - discussions: dev@dataskethces.apache.org - pullrequests_status: dev@dataskethces.apache.org - pullrequests_comment: dev@dataskethces.apache.org - # Send dependabot PRs to commits@ instead - pullrequests_bot_dependabot: commits@dataskethces.apache.org +notifications: + commits: commits@dataskethces.apache.org + issues: dev@dataskethces.apache.org + discussions: dev@dataskethces.apache.org + pullrequests_status: dev@dataskethces.apache.org + pullrequests_comment: dev@dataskethces.apache.org + # Send dependabot PRs to commits@ instead + pullrequests_bot_dependabot: commits@dataskethces.apache.org From 6008cb35f7dc075d24d0a47af687f15b36a79eee Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 23 May 2025 16:04:37 -0700 Subject: [PATCH 09/25] Theta rework, phase 1, no testing yet! --- .../org/apache/datasketches/common/Util.java | 112 +- .../org/apache/datasketches/theta2/AnotB.java | 205 + .../apache/datasketches/theta2/AnotBimpl.java | 241 + .../datasketches/theta2/BitPacking.java | 6292 +++++++++++++++++ .../BytesCompactCompressedHashIterator.java | 93 + .../theta2/BytesCompactHashIterator.java | 53 + .../theta2/CompactOperations.java | 388 + .../datasketches/theta2/CompactSketch.java | 478 ++ .../ConcurrentBackgroundThetaPropagation.java | 110 + .../ConcurrentDirectQuickSelectSketch.java | 270 + .../ConcurrentHeapQuickSelectSketch.java | 266 + .../theta2/ConcurrentHeapThetaBuffer.java | 223 + .../theta2/ConcurrentPropagationService.java | 72 + .../theta2/ConcurrentSharedThetaSketch.java | 187 + .../theta2/DirectCompactCompressedSketch.java | 142 + .../theta2/DirectCompactSketch.java | 174 + .../theta2/DirectQuickSelectSketch.java | 339 + .../theta2/DirectQuickSelectSketchR.java | 284 + .../theta2/EmptyCompactSketch.java | 147 + .../theta2/ForwardCompatibility.java | 164 + .../datasketches/theta2/HashIterator.java | 40 + .../datasketches/theta2/HeapAlphaSketch.java | 601 ++ .../theta2/HeapCompactHashIterator.java | 41 + .../theta2/HeapCompactSketch.java | 158 + .../datasketches/theta2/HeapHashIterator.java | 54 + .../theta2/HeapQuickSelectSketch.java | 326 + .../datasketches/theta2/HeapUpdateSketch.java | 139 + .../datasketches/theta2/Intersection.java | 218 + .../datasketches/theta2/IntersectionImpl.java | 561 ++ .../MemoryCompactCompressedHashIterator.java | 108 + .../theta2/MemoryHashIterator.java | 62 + .../datasketches/theta2/PreambleUtil.java | 533 ++ .../apache/datasketches/theta2/Rebuilder.java | 175 + .../datasketches/theta2/SetOperation.java | 259 + .../theta2/SetOperationBuilder.java | 275 + .../datasketches/theta2/SingleItemSketch.java | 413 ++ .../apache/datasketches/theta2/Sketch.java | 695 ++ .../org/apache/datasketches/theta2/Union.java | 231 + .../apache/datasketches/theta2/UnionImpl.java | 365 + .../theta2/UpdateReturnState.java | 79 + .../datasketches/theta2/UpdateSketch.java | 469 ++ .../theta2/UpdateSketchBuilder.java | 493 ++ .../WrappedCompactCompressedSketch.java | 111 + .../theta2/WrappedCompactSketch.java | 159 + .../datasketches/theta2/package-info.java | 27 + .../thetacommon/HashOperations.java | 104 + 46 files changed, 16934 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/theta2/AnotB.java create mode 100644 src/main/java/org/apache/datasketches/theta2/AnotBimpl.java create mode 100644 src/main/java/org/apache/datasketches/theta2/BitPacking.java create mode 100644 src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/CompactOperations.java create mode 100644 src/main/java/org/apache/datasketches/theta2/CompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java create mode 100644 src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/Intersection.java create mode 100644 src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java create mode 100644 src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java create mode 100644 src/main/java/org/apache/datasketches/theta2/PreambleUtil.java create mode 100644 src/main/java/org/apache/datasketches/theta2/Rebuilder.java create mode 100644 src/main/java/org/apache/datasketches/theta2/SetOperation.java create mode 100644 src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java create mode 100644 src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/Sketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/Union.java create mode 100644 src/main/java/org/apache/datasketches/theta2/UnionImpl.java create mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java create mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java create mode 100644 src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/theta2/package-info.java diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 19a8ee614..969cdc389 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -24,9 +24,11 @@ import static java.lang.Math.log; import static java.lang.Math.pow; import static java.lang.Math.round; -import static java.util.Arrays.fill; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import java.lang.foreign.MemorySegment; import java.util.Comparator; +import java.util.Objects; /** * Common utility functions. @@ -257,7 +259,7 @@ public static String characterPad(final String s, final int fieldLength, final c final int sLen = s.length(); if (sLen < fieldLength) { final char[] cArr = new char[fieldLength - sLen]; - fill(cArr, padChar); + java.util.Arrays.fill(cArr, padChar); final String addstr = String.valueOf(cArr); return (postpend) ? s.concat(addstr) : addstr.concat(s); } @@ -798,4 +800,110 @@ public static boolean le(final Object item1, final Object item2, final Compa return c.compare((T)item1, (T)item2) <= 0; } + //MemorySegment related + + /** + * Returns true if the two given MemorySegments refer to the same backing resource, + * which is either an off-heap memory location and size, or the same on-heap array object. + * + *

If both segment are off-heap, they both must have the same starting address and the same size.

+ * + *

For on-heap segments, both segments must be based on or derived from the same array object and neither segment + * can be read-only.

+ * + * @param seg1 The first given MemorySegment + * @param seg2 The second given MemorySegment + * @return true if both MemorySegments are determined to be the same backing memory. + */ + public static boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { + Objects.requireNonNull(seg1, "seg1 must not be null."); + Objects.requireNonNull(seg2, "seg2 must not be null."); + if (!seg1.scope().isAlive() || !seg2.scope().isAlive()) { + throw new IllegalArgumentException("Both arguments must be alive."); + } + final boolean seg1Native = seg1.isNative(); + final boolean seg2Native = seg2.isNative(); + if (seg1Native ^ seg2Native) { return false; } + if (seg1Native && seg2Native) { //both off heap + return (seg1.address() == seg2.address()) && (seg1.byteSize() == seg2.byteSize()); + } + //both on heap + if (seg1.isReadOnly() || seg2.isReadOnly()) { + throw new IllegalArgumentException("Cannot determine 'isSameBackingMemory(..)' on heap if either MemorySegment is Read-only."); + } + return (seg1.heapBase().orElse(null) == seg2.heapBase().orElse(null)); + } + + /** + * Request a new heap MemorySegment with the given capacityBytes. + * + *

The returned MemorySegment will be constructed from a long[] array. + * As a result, it will be on-heap and have a memory alignment of 8. + * If the requested capacity is not divisible by eight, the returned size + * will be rolled up to the next multiple of eight.

+ * + * @param capacityBytes The new capacity being requested. + * @return a new MemorySegment with the requested capacity. + */ + public static MemorySegment newHeapSegment(final int capacityBytes) { + if (capacityBytes < 0) { + throw new IllegalArgumentException("Requested capacity must be positive."); + } + final long[] array = ((capacityBytes * 0x7) == 0) ? new long[capacityBytes >>> 3] : new long[(capacityBytes >>> 3) + 1]; + return MemorySegment.ofArray(array); + } + + /** + * Clears all bytes of this MemorySegment to zero. + * @param seg the given MemorySegment + */ + public static void clear(final MemorySegment seg) { + seg.fill((byte)0); + } + + /** + * Clears a portion of this MemorySegment to zero. + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this MemorySegment start + * @param lengthBytes the length in bytes + */ + public static void clear(final MemorySegment seg, final long offsetBytes, final long lengthBytes) { + final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes); + slice.fill((byte)0); + } + + /** + * Fills a portion of this Memory region to the given byte value. + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this Memory start + * @param lengthBytes the length in bytes + * @param value the given byte value + */ + public static void fill(final MemorySegment seg, final long offsetBytes, final long lengthBytes, final byte value) { + final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes); + slice.fill(value); + } + + /** + * Clears the bits defined by the bitMask + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this Memory start. + * @param bitMask the bits set to one will be cleared + */ + public static void clearBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) { + final byte b = seg.get(JAVA_BYTE, offsetBytes); + seg.set(JAVA_BYTE, offsetBytes, (byte)(b & ~bitMask)); + } + + /** + * Sets the bits defined by the bitMask + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this Memory start + * @param bitMask the bits set to one will be set + */ + public static void setBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) { + final byte b = seg.get(JAVA_BYTE, offsetBytes); + seg.set(JAVA_BYTE, offsetBytes, (byte)(b | bitMask)); + } + } diff --git a/src/main/java/org/apache/datasketches/theta2/AnotB.java b/src/main/java/org/apache/datasketches/theta2/AnotB.java new file mode 100644 index 000000000..72f7e89b2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/AnotB.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; + +/** + * Computes a set difference, A-AND-NOT-B, of two theta sketches. + * This class includes both stateful and stateless operations. + * + *

The stateful operation is as follows:

+ *

+ * AnotB anotb = SetOperationBuilder.buildAnotB();
+ *
+ * anotb.setA(Sketch skA); //The first argument.
+ * anotb.notB(Sketch skB); //The second (subtraction) argument.
+ * anotb.notB(Sketch skC); // ...any number of additional subtractions...
+ * anotb.getResult(false); //Get an interim result.
+ * anotb.notB(Sketch skD); //Additional subtractions.
+ * anotb.getResult(true);  //Final result and resets the AnotB operator.
+ * 
+ * + *

The stateless operation is as follows:

+ *

+ * AnotB anotb = SetOperationBuilder.buildAnotB();
+ *
+ * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
+ * 
+ * + *

Calling the setA operation a second time essentially clears the internal state and loads + * the new sketch.

+ * + *

The stateless and stateful operations are independent of each other with the exception of + * sharing the same update hash seed loaded as the default seed or specified by the user as an + * argument to the builder.

+ * + * @author Lee Rhodes + */ +public abstract class AnotB extends SetOperation { + + /** + * Constructor + */ + AnotB() {} + + @Override + public Family getFamily() { + return Family.A_NOT_B; + } + + /** + * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the + * first argument A of A-AND-NOT-B. This overwrites the internal state of this + * AnotB operator with the contents of the given sketch. + * This sets the stage for multiple following notB steps. + * + *

An input argument of null will throw an exception.

+ * + *

Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. + * That is distinctly different from the java null, which represents a nonexistent object. + * In most cases it is a programming error due to some object that was not properly initialized. + * With a null as the first argument, we cannot know what the user's intent is. + * Since it is very likely that a null is a programming error, we throw a an exception.

+ * + *

An empty input argument will set the internal state to empty.

+ * + *

Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent, + * valid argument for B irrelevant, we must allow this and assume the user knows what they are + * doing.

+ * + *

Performing {@link #getResult(boolean)} just after this step will return a compact form of + * the given argument.

+ * + * @param skA The incoming sketch for the first argument, A. + */ + public abstract void setA(Sketch skA); + + /** + * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the + * second (or n+1th) argument B of A-AND-NOT-B. + * Performs an AND NOT operation with the existing internal state of this AnotB operator. + * + *

An input argument of null or empty is ignored.

+ * + *

Rationale: A null for the second or following arguments is more tolerable because + * A NOT null is still A even if we don't know exactly what the null represents. It + * clearly does not have any content that overlaps with A. Also, because this can be part of + * a multistep operation with multiple notB steps. Other following steps can still produce + * a valid result.

+ * + *

Use {@link #getResult(boolean)} to obtain the result.

+ * + * @param skB The incoming Theta sketch for the second (or following) argument B. + */ + public abstract void notB(Sketch skB); + + /** + * Gets the result of the multistep, stateful operation AnotB that have been executed with calls + * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or + * {@link #notB(org.apache.datasketches.theta2.Sketch)}). + * + * @param reset If true, clears this operator to the empty state after this result is + * returned. Set this to false if you wish to obtain an intermediate result. + * + * @return the result of this operation as an ordered, on-heap {@link CompactSketch}. + */ + public abstract CompactSketch getResult(boolean reset); + + /** + * Gets the result of the multistep, stateful operation AnotB that have been executed with calls + * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or + * {@link #notB(org.apache.datasketches.theta2.Sketch)}). + * + * @param dstOrdered If true, the result will be an ordered {@link CompactSketch}. + * See Destination Ordered. + * + * @param dstSeg if not null the given MemorySegment will be the target location of the result. + * See Destination MemorySegment. + * + * @param reset If true, clears this operator to the empty state after this result is + * returned. Set this to false if you wish to obtain an intermediate result. + * + * @return the result of this operation as a {@link CompactSketch} in the given dstMem. + */ + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg, boolean reset); + + /** + * Perform A-and-not-B set operation on the two given sketches and return the result as an + * ordered CompactSketch on the heap. + * + *

This a stateless operation and has no impact on the internal state of this operator. + * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)}, + * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or + * {@link #getResult(boolean, MemorySegment, boolean)} methods.

+ * + *

If either argument is null an exception is thrown.

+ * + *

Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. + * That is distinctly different from the java null, which represents a nonexistent object. + * In most cases null is a programming error due to a non-initialized object.

+ * + *

With a null as the first argument we cannot know what the user's intent is and throw an + * exception. With a null as the second argument for this method we must return a result and + * there is no following possible viable arguments for the second argument so we thrown an + * exception.

+ * + * @param skA The incoming sketch for the first argument. It must not be null. + * @param skB The incoming sketch for the second argument. It must not be null. + * @return an ordered CompactSketch on the heap + */ + public CompactSketch aNotB(final Sketch skA, final Sketch skB) { + return aNotB(skA, skB, true, null); + } + + /** + * Perform A-and-not-B set operation on the two given sketches and return the result as a + * CompactSketch. + * + *

This a stateless operation and has no impact on the internal state of this operator. + * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)}, + * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or + * {@link #getResult(boolean, MemorySegment, boolean)} methods.

+ * + *

If either argument is null an exception is thrown.

+ * + *

Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. + * That is distinctly different from the java null, which represents a nonexistent object. + * In most cases null is a programming error due to a non-initialized object.

+ * + *

With a null as the first argument we cannot know what the user's intent is and throw an + * exception. With a null as the second argument for this method we must return a result and + * there is no following possible viable arguments for the second argument so we thrown an + * exception.

+ * + * @param skA The incoming sketch for the first argument. It must not be null. + * @param skB The incoming sketch for the second argument. It must not be null. + * @param dstOrdered + * See Destination Ordered. + * @param dstSeg + * See Destination MemorySegment. + * @return the result as a CompactSketch. + */ + public abstract CompactSketch aNotB(Sketch skA, Sketch skB, boolean dstOrdered, + MemorySegment dstSeg); + +} diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java new file mode 100644 index 000000000..4931bb680 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.Util.exactLog2OfLong; +import static org.apache.datasketches.thetacommon.HashOperations.checkThetaCorruption; +import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsert; +import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Implements the A-and-not-B operations. + * @author Lee Rhodes + * @author Kevin Lang + */ +final class AnotBimpl extends AnotB { + private final short seedHash_; + private boolean empty_; + private long thetaLong_; + private long[] hashArr_ = new long[0]; //compact array w curCount_ entries + private int curCount_; + + /** + * Construct a new AnotB SetOperation on the java heap. Called by SetOperation.Builder. + * + * @param seed See seed + */ + AnotBimpl(final long seed) { + this(ThetaUtil.computeSeedHash(seed)); + } + + /** + * Construct a new AnotB SetOperation on the java heap. + * + * @param seedHash 16 bit hash of the chosen update seed. + */ + private AnotBimpl(final short seedHash) { + seedHash_ = seedHash; + reset(); + } + + @Override + public void setA(final Sketch skA) { + if (skA == null) { + reset(); + throw new SketchesArgumentException("The input argument A must not be null"); + } + if (skA.isEmpty()) { + reset(); + return; + } + //skA is not empty + ThetaUtil.checkSeedHashes(seedHash_, skA.getSeedHash()); + + //process A + hashArr_ = getHashArrA(skA); + empty_ = false; + thetaLong_ = skA.getThetaLong(); + curCount_ = hashArr_.length; + } + + @Override + public void notB(final Sketch skB) { + if (empty_ || skB == null || skB.isEmpty()) { return; } + //local and skB is not empty + ThetaUtil.checkSeedHashes(seedHash_, skB.getSeedHash()); + + thetaLong_ = Math.min(thetaLong_, skB.getThetaLong()); + + //process B + hashArr_ = getResultHashArr(thetaLong_, curCount_, hashArr_, skB); + curCount_ = hashArr_.length; + empty_ = curCount_ == 0 && thetaLong_ == Long.MAX_VALUE; + } + + @Override + public CompactSketch getResult(final boolean reset) { + return getResult(true, null, reset); + } + + @Override + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg, + final boolean reset) { + final CompactSketch result = CompactOperations.componentsToCompact( + thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstSeg, hashArr_.clone()); + if (reset) { reset(); } + return result; + } + + @Override + public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dstOrdered, + final MemorySegment dstSeg) { + if (skA == null || skB == null) { + throw new SketchesArgumentException("Neither argument may be null"); + } + //Both skA & skB are not null + + final long minThetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong()); + + if (skA.isEmpty()) { return skA.compact(dstOrdered, dstSeg); } + //A is not Empty + ThetaUtil.checkSeedHashes(skA.getSeedHash(), seedHash_); + + if (skB.isEmpty()) { + return skA.compact(dstOrdered, dstSeg); + } + ThetaUtil.checkSeedHashes(skB.getSeedHash(), seedHash_); + //Both skA & skB are not empty + + //process A + final long[] hashArrA = getHashArrA(skA); + final int countA = hashArrA.length; + + //process B + final long[] hashArrOut = getResultHashArr(minThetaLong, countA, hashArrA, skB); //out is clone + final int countOut = hashArrOut.length; + final boolean empty = countOut == 0 && minThetaLong == Long.MAX_VALUE; + + final CompactSketch result = CompactOperations.componentsToCompact( + minThetaLong, countOut, seedHash_, empty, true, false, dstOrdered, dstSeg, hashArrOut); + return result; + } + + @Override + int getRetainedEntries() { + return curCount_; + } + + //restricted + + private static long[] getHashArrA(final Sketch skA) { //returns a new array + //Get skA cache as array + final CompactSketch cskA = skA.compact(false, null); //sorting not required + final long[] hashArrA = cskA.getCache().clone(); + return hashArrA; + } + + private static long[] getResultHashArr( //returns a new array + final long minThetaLong, + final int countA, + final long[] hashArrA, + final Sketch skB) { + + // Rebuild or get hashtable of skB + final long[] hashTableB; //read only + if (skB instanceof CompactSketch) { + hashTableB = convertToHashTable(skB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD); + } else { + hashTableB = skB.getCache(); + } + + //build temporary result arrays of skA + final long[] tmpHashArrA = new long[countA]; + + //search for non matches and build temp arrays + final int lgHTBLen = exactLog2OfLong(hashTableB.length); + int nonMatches = 0; + for (int i = 0; i < countA; i++) { + final long hash = hashArrA[i]; + if (hash != 0 && hash < minThetaLong) { //only allows hashes of A < minTheta + final int index = hashSearch(hashTableB, lgHTBLen, hash); + if (index == -1) { + tmpHashArrA[nonMatches] = hash; + nonMatches++; + } + } + } + return Arrays.copyOfRange(tmpHashArrA, 0, nonMatches); + } + + private static long[] convertToHashTable( + final Sketch sketch, + final long thetaLong, + final double rebuildThreshold) { + final int lgArrLongs = minLgHashTableSize(sketch.getRetainedEntries(true), rebuildThreshold); + final int arrLongs = 1 << lgArrLongs; + final long[] hashTable = new long[arrLongs]; + checkThetaCorruption(thetaLong); + final HashIterator it = sketch.iterator(); + while (it.next()) { + final long hash = it.get(); + if (continueCondition(thetaLong, hash) ) { + continue; + } + hashSearchOrInsert(hashTable, lgArrLongs, hash); + } + return hashTable; + } + + private void reset() { + thetaLong_ = Long.MAX_VALUE; + empty_ = true; + hashArr_ = new long[0]; + curCount_ = 0; + } + + @Override + long[] getCache() { + return hashArr_.clone(); + } + + @Override + short getSeedHash() { + return seedHash_; + } + + @Override + long getThetaLong() { + return thetaLong_; + } + + @Override + boolean isEmpty() { + return empty_; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/BitPacking.java b/src/main/java/org/apache/datasketches/theta2/BitPacking.java new file mode 100644 index 000000000..e2b6be2fd --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/BitPacking.java @@ -0,0 +1,6292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * Used as part of Theta compression. + */ +public class BitPacking { + + /** + * The bit packing operation + * @param value the value to pack + * @param bits number of bits to pack + * @param buffer the output byte array buffer + * @param bufOffset the byte offset in the buffer + * @param bitOffset the bit offset + */ + public static void packBits(final long value, int bits, final byte[] buffer, int bufOffset, final int bitOffset) { + if (bitOffset > 0) { + final int chunkBits = 8 - bitOffset; + final int mask = (1 << chunkBits) - 1; + if (bits < chunkBits) { + buffer[bufOffset] |= (value << (chunkBits - bits)) & mask; + return; + } + buffer[bufOffset++] |= (value >>> (bits - chunkBits)) & mask; + bits -= chunkBits; + } + while (bits >= 8) { + buffer[bufOffset++] = (byte)(value >>> (bits - 8)); + bits -= 8; + } + if (bits > 0) { + buffer[bufOffset] = (byte)(value << (8 - bits)); + } + } + + /** + * The unpacking operation + * @param value the output array + * @param index index of the value array + * @param bits the number of bits to unpack + * @param buffer the input packed buffer + * @param bufOffset the buffer offset + * @param bitOffset the bit offset + */ + public static void unpackBits(final long[] value, final int index, int bits, final byte[] buffer, + int bufOffset,final int bitOffset) { + final int availBits = 8 - bitOffset; + final int chunkBits = availBits <= bits ? availBits : bits; + final int mask = (1 << chunkBits) - 1; + value[index] = (buffer[bufOffset] >>> (availBits - chunkBits)) & mask; + bufOffset += availBits == chunkBits ? 1 : 0; + bits -= chunkBits; + while (bits >= 8) { + value[index] <<= 8; + value[index] |= (Byte.toUnsignedLong(buffer[bufOffset++])); + bits -= 8; + } + if (bits > 0) { + value[index] <<= bits; + value[index] |= Byte.toUnsignedLong(buffer[bufOffset]) >>> (8 - bits); + } + } + + // pack given number of bits from a block of 8 64-bit values into bytes + // we don't need 0 and 64 bits + // we assume that higher bits (which we are not packing) are zeros + // this assumption allows to avoid masking operations + + static void packBitsBlock8(final long[] values, final int i, final byte[] buf, final int off, final int bits) { + switch (bits) { + case 1: packBits1(values, i, buf, off); break; + case 2: packBits2(values, i, buf, off); break; + case 3: packBits3(values, i, buf, off); break; + case 4: packBits4(values, i, buf, off); break; + case 5: packBits5(values, i, buf, off); break; + case 6: packBits6(values, i, buf, off); break; + case 7: packBits7(values, i, buf, off); break; + case 8: packBits8(values, i, buf, off); break; + case 9: packBits9(values, i, buf, off); break; + case 10: packBits10(values, i, buf, off); break; + case 11: packBits11(values, i, buf, off); break; + case 12: packBits12(values, i, buf, off); break; + case 13: packBits13(values, i, buf, off); break; + case 14: packBits14(values, i, buf, off); break; + case 15: packBits15(values, i, buf, off); break; + case 16: packBits16(values, i, buf, off); break; + case 17: packBits17(values, i, buf, off); break; + case 18: packBits18(values, i, buf, off); break; + case 19: packBits19(values, i, buf, off); break; + case 20: packBits20(values, i, buf, off); break; + case 21: packBits21(values, i, buf, off); break; + case 22: packBits22(values, i, buf, off); break; + case 23: packBits23(values, i, buf, off); break; + case 24: packBits24(values, i, buf, off); break; + case 25: packBits25(values, i, buf, off); break; + case 26: packBits26(values, i, buf, off); break; + case 27: packBits27(values, i, buf, off); break; + case 28: packBits28(values, i, buf, off); break; + case 29: packBits29(values, i, buf, off); break; + case 30: packBits30(values, i, buf, off); break; + case 31: packBits31(values, i, buf, off); break; + case 32: packBits32(values, i, buf, off); break; + case 33: packBits33(values, i, buf, off); break; + case 34: packBits34(values, i, buf, off); break; + case 35: packBits35(values, i, buf, off); break; + case 36: packBits36(values, i, buf, off); break; + case 37: packBits37(values, i, buf, off); break; + case 38: packBits38(values, i, buf, off); break; + case 39: packBits39(values, i, buf, off); break; + case 40: packBits40(values, i, buf, off); break; + case 41: packBits41(values, i, buf, off); break; + case 42: packBits42(values, i, buf, off); break; + case 43: packBits43(values, i, buf, off); break; + case 44: packBits44(values, i, buf, off); break; + case 45: packBits45(values, i, buf, off); break; + case 46: packBits46(values, i, buf, off); break; + case 47: packBits47(values, i, buf, off); break; + case 48: packBits48(values, i, buf, off); break; + case 49: packBits49(values, i, buf, off); break; + case 50: packBits50(values, i, buf, off); break; + case 51: packBits51(values, i, buf, off); break; + case 52: packBits52(values, i, buf, off); break; + case 53: packBits53(values, i, buf, off); break; + case 54: packBits54(values, i, buf, off); break; + case 55: packBits55(values, i, buf, off); break; + case 56: packBits56(values, i, buf, off); break; + case 57: packBits57(values, i, buf, off); break; + case 58: packBits58(values, i, buf, off); break; + case 59: packBits59(values, i, buf, off); break; + case 60: packBits60(values, i, buf, off); break; + case 61: packBits61(values, i, buf, off); break; + case 62: packBits62(values, i, buf, off); break; + case 63: packBits63(values, i, buf, off); break; + default: throw new SketchesArgumentException("wrong number of bits in packBitsBlock8: " + bits); + } + } + + static void unpackBitsBlock8(final long[] values, final int i, final byte[] buf, final int off, final int bits) { + switch (bits) { + case 1: unpackBits1(values, i, buf, off); break; + case 2: unpackBits2(values, i, buf, off); break; + case 3: unpackBits3(values, i, buf, off); break; + case 4: unpackBits4(values, i, buf, off); break; + case 5: unpackBits5(values, i, buf, off); break; + case 6: unpackBits6(values, i, buf, off); break; + case 7: unpackBits7(values, i, buf, off); break; + case 8: unpackBits8(values, i, buf, off); break; + case 9: unpackBits9(values, i, buf, off); break; + case 10: unpackBits10(values, i, buf, off); break; + case 11: unpackBits11(values, i, buf, off); break; + case 12: unpackBits12(values, i, buf, off); break; + case 13: unpackBits13(values, i, buf, off); break; + case 14: unpackBits14(values, i, buf, off); break; + case 15: unpackBits15(values, i, buf, off); break; + case 16: unpackBits16(values, i, buf, off); break; + case 17: unpackBits17(values, i, buf, off); break; + case 18: unpackBits18(values, i, buf, off); break; + case 19: unpackBits19(values, i, buf, off); break; + case 20: unpackBits20(values, i, buf, off); break; + case 21: unpackBits21(values, i, buf, off); break; + case 22: unpackBits22(values, i, buf, off); break; + case 23: unpackBits23(values, i, buf, off); break; + case 24: unpackBits24(values, i, buf, off); break; + case 25: unpackBits25(values, i, buf, off); break; + case 26: unpackBits26(values, i, buf, off); break; + case 27: unpackBits27(values, i, buf, off); break; + case 28: unpackBits28(values, i, buf, off); break; + case 29: unpackBits29(values, i, buf, off); break; + case 30: unpackBits30(values, i, buf, off); break; + case 31: unpackBits31(values, i, buf, off); break; + case 32: unpackBits32(values, i, buf, off); break; + case 33: unpackBits33(values, i, buf, off); break; + case 34: unpackBits34(values, i, buf, off); break; + case 35: unpackBits35(values, i, buf, off); break; + case 36: unpackBits36(values, i, buf, off); break; + case 37: unpackBits37(values, i, buf, off); break; + case 38: unpackBits38(values, i, buf, off); break; + case 39: unpackBits39(values, i, buf, off); break; + case 40: unpackBits40(values, i, buf, off); break; + case 41: unpackBits41(values, i, buf, off); break; + case 42: unpackBits42(values, i, buf, off); break; + case 43: unpackBits43(values, i, buf, off); break; + case 44: unpackBits44(values, i, buf, off); break; + case 45: unpackBits45(values, i, buf, off); break; + case 46: unpackBits46(values, i, buf, off); break; + case 47: unpackBits47(values, i, buf, off); break; + case 48: unpackBits48(values, i, buf, off); break; + case 49: unpackBits49(values, i, buf, off); break; + case 50: unpackBits50(values, i, buf, off); break; + case 51: unpackBits51(values, i, buf, off); break; + case 52: unpackBits52(values, i, buf, off); break; + case 53: unpackBits53(values, i, buf, off); break; + case 54: unpackBits54(values, i, buf, off); break; + case 55: unpackBits55(values, i, buf, off); break; + case 56: unpackBits56(values, i, buf, off); break; + case 57: unpackBits57(values, i, buf, off); break; + case 58: unpackBits58(values, i, buf, off); break; + case 59: unpackBits59(values, i, buf, off); break; + case 60: unpackBits60(values, i, buf, off); break; + case 61: unpackBits61(values, i, buf, off); break; + case 62: unpackBits62(values, i, buf, off); break; + case 63: unpackBits63(values, i, buf, off); break; + default: throw new SketchesArgumentException("wrong number of bits unpackBitsBlock8: " + bits); + } + } + + static void packBits1(final long[] values, final int i, final byte[] buf, final int off) { + buf[off] = (byte) (values[i + 0] << 7); + buf[off] |= values[i + 1] << 6; + buf[off] |= values[i + 2] << 5; + buf[off] |= values[i + 3] << 4; + buf[off] |= values[i + 4] << 3; + buf[off] |= values[i + 5] << 2; + buf[off] |= values[i + 6] << 1; + buf[off] |= values[i + 7]; + } + + static void packBits2(final long[] values, final int i, final byte[] buf, int off) { + buf[off] = (byte) (values[i + 0] << 6); + buf[off] |= values[i + 1] << 4; + buf[off] |= values[i + 2] << 2; + buf[off++] |= values[i + 3]; + + buf[off] = (byte) (values[i + 4] << 6); + buf[off] |= values[i + 5] << 4; + buf[off] |= values[i + 6] << 2; + buf[off] |= values[i + 7]; + } + + static void packBits3(final long[] values, final int i, final byte[] buf, int off) { + buf[off] = (byte) (values[i + 0] << 5); + buf[off] |= values[i + 1] << 2; + buf[off++] |= values[i + 2] >>> 1; + + buf[off] = (byte) (values[i + 2] << 7); + buf[off] |= values[i + 3] << 4; + buf[off] |= values[i + 4] << 1; + buf[off++] |= values[i + 5] >>> 2; + + buf[off] = (byte) (values[i + 5] << 6); + buf[off] |= values[i + 6] << 3; + buf[off] |= values[i + 7]; + } + + static void packBits4(final long[] values, final int i, final byte[] buf, int off) { + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1]; + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3]; + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5]; + + buf[off] = (byte) (values[i + 6] << 4); + buf[off] |= values[i + 7]; + } + + static void packBits5(final long[] values, final int i, final byte[] buf, int off) { + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 2; + + buf[off] = (byte) (values[i + 1] << 6); + buf[off] |= values[i + 2] << 1; + buf[off++] |= values[i + 3] >>> 4; + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 1; + + buf[off] = (byte) (values[i + 4] << 7); + buf[off] |= values[i + 5] << 2; + buf[off++] |= values[i + 6] >>> 3; + + buf[off] = (byte) (values[i + 6] << 5); + buf[off] |= values[i + 7]; + } + + static void packBits6(final long[] values, final int i, final byte[] buf, int off) { + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 4; + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 2; + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3]; + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 4; + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 2; + + buf[off] = (byte) (values[i + 6] << 6); + buf[off] |= values[i + 7]; + } + + static void packBits7(final long[] values, final int i, final byte[] buf, int off) { + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 6; + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 5; + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 4; + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 3; + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 2; + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 1; + + buf[off] = (byte) (values[i + 6] << 7); + buf[off] |= values[i + 7]; + } + + static void packBits8(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0]); + buf[off++] = (byte) (values[i + 1]); + buf[off++] = (byte) (values[i + 2]); + buf[off++] = (byte) (values[i + 3]); + buf[off++] = (byte) (values[i + 4]); + buf[off++] = (byte) (values[i + 5]); + buf[off++] = (byte) (values[i + 6]); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits9(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 2; + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 3; + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 4; + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 5; + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 6; + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 7; + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits10(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 4; + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 6; + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 8; + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 4; + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 6; + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits11(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 6; + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 9; + + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 4; + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 7; + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 10; + + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 5; + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits12(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 4); + + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 8; + + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 4); + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 8; + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 4); + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 8; + + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 4); + + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits13(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 10; + + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 7; + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 12; + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 9; + + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 6; + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 11; + + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits14(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 12; + + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 10; + + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 8; + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 12; + + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 10; + + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits15(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 14; + + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 13; + + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 12; + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 11; + + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 10; + + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 9; + + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 8; + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits16(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 8); + buf[off++] = (byte) (values[i + 0]); + + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 8); + buf[off++] = (byte) (values[i + 2]); + + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 8); + buf[off++] = (byte) (values[i + 4]); + + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 8); + buf[off++] = (byte) (values[i + 6]); + + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits17(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 9); + + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 10; + + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 11; + + buf[off++] = (byte) (values[i + 2] >>> 3); + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 12; + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 13; + + buf[off++] = (byte) (values[i + 4] >>> 5); + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 14; + + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 15; + + buf[off++] = (byte) (values[i + 6] >>> 7); + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits18(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 10); + + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 12; + + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 14; + + buf[off++] = (byte) (values[i + 2] >>> 6); + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 16; + + buf[off++] = (byte) (values[i + 3] >>> 8); + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 10); + + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 12; + + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 14; + + buf[off++] = (byte) (values[i + 6] >>> 6); + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits19(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 11); + + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 14; + + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 17; + + buf[off++] = (byte) (values[i + 2] >>> 9); + + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 12; + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 15; + + buf[off++] |= values[i + 4] >>> 7; + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 18; + + buf[off++] = (byte) (values[i + 5] >>> 10); + + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 13; + + buf[off++] = (byte) (values[i + 6] >>> 5); + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits20(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 12); + + buf[off++] = (byte) (values[i + 0] >>> 4); + + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 16; + + buf[off++] = (byte) (values[i + 1] >>> 8); + + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 12); + + buf[off++] = (byte) (values[i + 2] >>> 4); + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 16; + + buf[off++] = (byte) (values[i + 3] >>> 8); + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 12); + + buf[off++] = (byte) (values[i + 4] >>> 4); + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 16; + + buf[off++] = (byte) (values[i + 5] >>> 8); + + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 12); + + buf[off++] = (byte) (values[i + 6] >>> 4); + + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits21(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 13); + + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 18; + + buf[off++] = (byte) (values[i + 1] >>> 10); + + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 15; + + buf[off++] = (byte) (values[i + 2] >>> 7); + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 20; + + buf[off++] = (byte) (values[i + 3] >>> 12); + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 17; + + buf[off++] = (byte) (values[i + 4] >>> 9); + + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 14; + + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 19; + + buf[off++] = (byte) (values[i + 6] >>> 11); + + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits22(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 14); + + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 20; + + buf[off++] = (byte) (values[i + 1] >>> 12); + + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 18; + + buf[off++] = (byte) (values[i + 2] >>> 10); + + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 16; + + buf[off++] = (byte) (values[i + 3] >>> 8); + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 14); + + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 20; + + buf[off++] = (byte) (values[i + 5] >>> 12); + + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 18; + + buf[off++] = (byte) (values[i + 6] >>> 10); + + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits23(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 15); + + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 22; + + buf[off++] = (byte) (values[i + 1] >>> 14); + + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 21; + + buf[off++] = (byte) (values[i + 2] >>> 13); + + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 20; + + buf[off++] = (byte) (values[i + 3] >>> 12); + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 19; + + buf[off++] = (byte) (values[i + 4] >>> 11); + + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 18; + + buf[off++] = (byte) (values[i + 5] >>> 10); + + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 17; + + buf[off++] = (byte) (values[i + 6] >>> 9); + + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 16; + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits24(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 16); + buf[off++] = (byte) (values[i + 0] >>> 8); + buf[off++] = (byte) (values[i + 0]); + + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 16); + buf[off++] = (byte) (values[i + 2] >>> 8); + buf[off++] = (byte) (values[i + 2]); + + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 16); + buf[off++] = (byte) (values[i + 4] >>> 8); + buf[off++] = (byte) (values[i + 4]); + + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 16); + buf[off++] = (byte) (values[i + 6] >>> 8); + buf[off++] = (byte) (values[i + 6]); + + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits25(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 17); + + buf[off++] = (byte) (values[i + 0] >>> 9); + + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 18; + + buf[off++] = (byte) (values[i + 1] >>> 10); + + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 19; + + buf[off++] = (byte) (values[i + 2] >>> 11); + + buf[off++] = (byte) (values[i + 2] >>> 3); + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 20; + + buf[off++] = (byte) (values[i + 3] >>> 12); + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 21; + + buf[off++] = (byte) (values[i + 4] >>> 13); + + buf[off++] = (byte) (values[i + 4] >>> 5); + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 22; + + buf[off++] = (byte) (values[i + 5] >>> 14); + + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 23; + + buf[off++] = (byte) (values[i + 6] >>> 15); + + buf[off++] = (byte) (values[i + 6] >>> 7); + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 24; + + buf[off++] = (byte) (values[i + 7] >>> 16); + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits26(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 18); + + buf[off++] = (byte) (values[i + 0] >>> 10); + + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 20; + + buf[off++] = (byte) (values[i + 1] >>> 12); + + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 22; + + buf[off++] = (byte) (values[i + 2] >>> 14); + + buf[off++] = (byte) (values[i + 2] >>> 6); + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 24; + + buf[off++] = (byte) (values[i + 3] >>> 16); + + buf[off++] = (byte) (values[i + 3] >>> 8); + + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 18); + + buf[off++] = (byte) (values[i + 4] >>> 10); + + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 20; + + buf[off++] = (byte) (values[i + 5] >>> 12); + + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 22; + + buf[off++] = (byte) (values[i + 6] >>> 14); + + buf[off++] = (byte) (values[i + 6] >>> 6); + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 24; + + buf[off++] = (byte) (values[i + 7] >>> 16); + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits27(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 19); + + buf[off++] = (byte) (values[i + 0] >>> 11); + + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 22; + + buf[off++] = (byte) (values[i + 1] >>> 14); + + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 25; + + buf[off++] = (byte) (values[i + 2] >>> 17); + + buf[off++] = (byte) (values[i + 2] >>> 9); + + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 20; + + buf[off++] = (byte) (values[i + 3] >>> 12); + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 23; + + buf[off++] = (byte) (values[i + 4] >>> 15); + + buf[off++] = (byte) (values[i + 4] >>> 7); + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 26; + + buf[off++] = (byte) (values[i + 5] >>> 18); + + buf[off++] = (byte) (values[i + 5] >>> 10); + + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 21; + + buf[off++] = (byte) (values[i + 6] >>> 13); + + buf[off++] = (byte) (values[i + 6] >>> 5); + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 24; + + buf[off++] = (byte) (values[i + 7] >>> 16); + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits28(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 20); + buf[off++] = (byte) (values[i + 0] >>> 12); + buf[off++] = (byte) (values[i + 0] >>> 4); + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 24; + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + buf[off++] = (byte) (values[i + 2] >>> 20); + buf[off++] = (byte) (values[i + 2] >>> 12); + buf[off++] = (byte) (values[i + 2] >>> 4); + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 24; + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + buf[off++] = (byte) (values[i + 4] >>> 20); + buf[off++] = (byte) (values[i + 4] >>> 12); + buf[off++] = (byte) (values[i + 4] >>> 4); + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 24; + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + buf[off++] = (byte) (values[i + 6] >>> 20); + buf[off++] = (byte) (values[i + 6] >>> 12); + buf[off++] = (byte) (values[i + 6] >>> 4); + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 24; + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits29(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 21); + + buf[off++] = (byte) (values[i + 0] >>> 13); + + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 26; + + buf[off++] = (byte) (values[i + 1] >>> 18); + + buf[off++] = (byte) (values[i + 1] >>> 10); + + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 23; + + buf[off++] = (byte) (values[i + 2] >>> 15); + + buf[off++] = (byte) (values[i + 2] >>> 7); + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 28; + + buf[off++] = (byte) (values[i + 3] >>> 20); + + buf[off++] = (byte) (values[i + 3] >>> 12); + + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 25; + + buf[off++] = (byte) (values[i + 4] >>> 17); + + buf[off++] = (byte) (values[i + 4] >>> 9); + + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 22; + + buf[off++] = (byte) (values[i + 5] >>> 14); + + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 27; + + buf[off++] = (byte) (values[i + 6] >>> 19); + + buf[off++] = (byte) (values[i + 6] >>> 11); + + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 24; + + buf[off++] = (byte) (values[i + 7] >>> 16); + + buf[off++] = (byte) (values[i + 7] >>> 8); + + buf[off] = (byte) (values[i + 7]); + } + + static void packBits30(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 22); + buf[off++] = (byte) (values[i + 0] >>> 14); + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 28; + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 26; + buf[off++] = (byte) (values[i + 2] >>> 18); + buf[off++] = (byte) (values[i + 2] >>> 10); + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 24; + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 22); + buf[off++] = (byte) (values[i + 4] >>> 14); + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 28; + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 26; + buf[off++] = (byte) (values[i + 6] >>> 18); + buf[off++] = (byte) (values[i + 6] >>> 10); + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 24; + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits31(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 23); + buf[off++] = (byte) (values[i + 0] >>> 15); + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 30; + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 29; + buf[off++] = (byte) (values[i + 2] >>> 21); + buf[off++] = (byte) (values[i + 2] >>> 13); + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 28; + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 27; + buf[off++] = (byte) (values[i + 4] >>> 19); + buf[off++] = (byte) (values[i + 4] >>> 11); + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 26; + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 25; + buf[off++] = (byte) (values[i + 6] >>> 17); + buf[off++] = (byte) (values[i + 6] >>> 9); + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 24; + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits32(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 24); + buf[off++] = (byte) (values[i + 0] >>> 16); + buf[off++] = (byte) (values[i + 0] >>> 8); + buf[off++] = (byte) (values[i + 0]); + + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 24); + buf[off++] = (byte) (values[i + 2] >>> 16); + buf[off++] = (byte) (values[i + 2] >>> 8); + buf[off++] = (byte) (values[i + 2]); + + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 24); + buf[off++] = (byte) (values[i + 4] >>> 16); + buf[off++] = (byte) (values[i + 4] >>> 8); + buf[off++] = (byte) (values[i + 4]); + + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 24); + buf[off++] = (byte) (values[i + 6] >>> 16); + buf[off++] = (byte) (values[i + 6] >>> 8); + buf[off++] = (byte) (values[i + 6]); + + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits33(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 25); + buf[off++] = (byte) (values[i + 0] >>> 17); + buf[off++] = (byte) (values[i + 0] >>> 9); + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 26; + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 27; + buf[off++] = (byte) (values[i + 2] >>> 19); + buf[off++] = (byte) (values[i + 2] >>> 11); + buf[off++] = (byte) (values[i + 2] >>> 3); + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 28; + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 29; + buf[off++] = (byte) (values[i + 4] >>> 21); + buf[off++] = (byte) (values[i + 4] >>> 13); + buf[off++] = (byte) (values[i + 4] >>> 5); + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 30; + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 31; + buf[off++] = (byte) (values[i + 6] >>> 23); + buf[off++] = (byte) (values[i + 6] >>> 15); + buf[off++] = (byte) (values[i + 6] >>> 7); + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits34(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 26); + buf[off++] = (byte) (values[i + 0] >>> 18); + buf[off++] = (byte) (values[i + 0] >>> 10); + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 28; + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 30; + buf[off++] = (byte) (values[i + 2] >>> 22); + buf[off++] = (byte) (values[i + 2] >>> 14); + buf[off++] = (byte) (values[i + 2] >>> 6); + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 32; + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 26); + buf[off++] = (byte) (values[i + 4] >>> 18); + buf[off++] = (byte) (values[i + 4] >>> 10); + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 28; + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 30; + buf[off++] = (byte) (values[i + 6] >>> 22); + buf[off++] = (byte) (values[i + 6] >>> 14); + buf[off++] = (byte) (values[i + 6] >>> 6); + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits35(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 27); + buf[off++] = (byte) (values[i + 0] >>> 19); + buf[off++] = (byte) (values[i + 0] >>> 11); + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 30; + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 33; + buf[off++] = (byte) (values[i + 2] >>> 25); + buf[off++] = (byte) (values[i + 2] >>> 17); + buf[off++] = (byte) (values[i + 2] >>> 9); + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 28; + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 31; + buf[off++] = (byte) (values[i + 4] >>> 23); + buf[off++] = (byte) (values[i + 4] >>> 15); + buf[off++] = (byte) (values[i + 4] >>> 7); + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 34; + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 29; + buf[off++] = (byte) (values[i + 6] >>> 21); + buf[off++] = (byte) (values[i + 6] >>> 13); + buf[off++] = (byte) (values[i + 6] >>> 5); + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits36(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 28); + buf[off++] = (byte) (values[i + 0] >>> 20); + buf[off++] = (byte) (values[i + 0] >>> 12); + buf[off++] = (byte) (values[i + 0] >>> 4); + + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 32; + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 28); + buf[off++] = (byte) (values[i + 2] >>> 20); + buf[off++] = (byte) (values[i + 2] >>> 12); + buf[off++] = (byte) (values[i + 2] >>> 4); + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 32; + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 28); + buf[off++] = (byte) (values[i + 4] >>> 20); + buf[off++] = (byte) (values[i + 4] >>> 12); + buf[off++] = (byte) (values[i + 4] >>> 4); + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 32; + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 28); + buf[off++] = (byte) (values[i + 6] >>> 20); + buf[off++] = (byte) (values[i + 6] >>> 12); + buf[off++] = (byte) (values[i + 6] >>> 4); + + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits37(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 29); + buf[off++] = (byte) (values[i + 0] >>> 21); + buf[off++] = (byte) (values[i + 0] >>> 13); + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 34; + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 31; + buf[off++] = (byte) (values[i + 2] >>> 23); + buf[off++] = (byte) (values[i + 2] >>> 15); + buf[off++] = (byte) (values[i + 2] >>> 7); + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 36; + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 33; + buf[off++] = (byte) (values[i + 4] >>> 25); + buf[off++] = (byte) (values[i + 4] >>> 17); + buf[off++] = (byte) (values[i + 4] >>> 9); + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 30; + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 35; + buf[off++] = (byte) (values[i + 6] >>> 27); + buf[off++] = (byte) (values[i + 6] >>> 19); + buf[off++] = (byte) (values[i + 6] >>> 11); + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits38(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 30); + buf[off++] = (byte) (values[i + 0] >>> 22); + buf[off++] = (byte) (values[i + 0] >>> 14); + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 36; + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 34; + buf[off++] = (byte) (values[i + 2] >>> 26); + buf[off++] = (byte) (values[i + 2] >>> 18); + buf[off++] = (byte) (values[i + 2] >>> 10); + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 32; + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 30); + buf[off++] = (byte) (values[i + 4] >>> 22); + buf[off++] = (byte) (values[i + 4] >>> 14); + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 36; + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 34; + buf[off++] = (byte) (values[i + 6] >>> 26); + buf[off++] = (byte) (values[i + 6] >>> 18); + buf[off++] = (byte) (values[i + 6] >>> 10); + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits39(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 31); + buf[off++] = (byte) (values[i + 0] >>> 23); + buf[off++] = (byte) (values[i + 0] >>> 15); + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 38; + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 37; + buf[off++] = (byte) (values[i + 2] >>> 29); + buf[off++] = (byte) (values[i + 2] >>> 21); + buf[off++] = (byte) (values[i + 2] >>> 13); + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 36; + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 35; + buf[off++] = (byte) (values[i + 4] >>> 27); + buf[off++] = (byte) (values[i + 4] >>> 19); + buf[off++] = (byte) (values[i + 4] >>> 11); + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 34; + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 33; + buf[off++] = (byte) (values[i + 6] >>> 25); + buf[off++] = (byte) (values[i + 6] >>> 17); + buf[off++] = (byte) (values[i + 6] >>> 9); + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 32; + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits40(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 32); + buf[off++] = (byte) (values[i + 0] >>> 24); + buf[off++] = (byte) (values[i + 0] >>> 16); + buf[off++] = (byte) (values[i + 0] >>> 8); + buf[off++] = (byte) (values[i + 0]); + + buf[off++] = (byte) (values[i + 1] >>> 32); + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 32); + buf[off++] = (byte) (values[i + 2] >>> 24); + buf[off++] = (byte) (values[i + 2] >>> 16); + buf[off++] = (byte) (values[i + 2] >>> 8); + buf[off++] = (byte) (values[i + 2]); + + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 32); + buf[off++] = (byte) (values[i + 4] >>> 24); + buf[off++] = (byte) (values[i + 4] >>> 16); + buf[off++] = (byte) (values[i + 4] >>> 8); + buf[off++] = (byte) (values[i + 4]); + + buf[off++] = (byte) (values[i + 5] >>> 32); + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 32); + buf[off++] = (byte) (values[i + 6] >>> 24); + buf[off++] = (byte) (values[i + 6] >>> 16); + buf[off++] = (byte) (values[i + 6] >>> 8); + buf[off++] = (byte) (values[i + 6]); + + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits41(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 33); + buf[off++] = (byte) (values[i + 0] >>> 25); + buf[off++] = (byte) (values[i + 0] >>> 17); + buf[off++] = (byte) (values[i + 0] >>> 9); + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 34; + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 35; + buf[off++] = (byte) (values[i + 2] >>> 27); + buf[off++] = (byte) (values[i + 2] >>> 19); + buf[off++] = (byte) (values[i + 2] >>> 11); + buf[off++] = (byte) (values[i + 2] >>> 3); + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 36; + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 37; + buf[off++] = (byte) (values[i + 4] >>> 29); + buf[off++] = (byte) (values[i + 4] >>> 21); + buf[off++] = (byte) (values[i + 4] >>> 13); + buf[off++] = (byte) (values[i + 4] >>> 5); + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 38; + buf[off++] = (byte) (values[i + 5] >>> 30); + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 39; + buf[off++] = (byte) (values[i + 6] >>> 31); + buf[off++] = (byte) (values[i + 6] >>> 23); + buf[off++] = (byte) (values[i + 6] >>> 15); + buf[off++] = (byte) (values[i + 6] >>> 7); + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits42(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 34); + buf[off++] = (byte) (values[i + 0] >>> 26); + buf[off++] = (byte) (values[i + 0] >>> 18); + buf[off++] = (byte) (values[i + 0] >>> 10); + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 36; + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 38; + buf[off++] = (byte) (values[i + 2] >>> 30); + buf[off++] = (byte) (values[i + 2] >>> 22); + buf[off++] = (byte) (values[i + 2] >>> 14); + buf[off++] = (byte) (values[i + 2] >>> 6); + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 40; + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 34); + buf[off++] = (byte) (values[i + 4] >>> 26); + buf[off++] = (byte) (values[i + 4] >>> 18); + buf[off++] = (byte) (values[i + 4] >>> 10); + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 36; + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 38; + buf[off++] = (byte) (values[i + 6] >>> 30); + buf[off++] = (byte) (values[i + 6] >>> 22); + buf[off++] = (byte) (values[i + 6] >>> 14); + buf[off++] = (byte) (values[i + 6] >>> 6); + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits43(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 35); + buf[off++] = (byte) (values[i + 0] >>> 27); + buf[off++] = (byte) (values[i + 0] >>> 19); + buf[off++] = (byte) (values[i + 0] >>> 11); + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 38; + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 41; + buf[off++] = (byte) (values[i + 2] >>> 33); + buf[off++] = (byte) (values[i + 2] >>> 25); + buf[off++] = (byte) (values[i + 2] >>> 17); + buf[off++] = (byte) (values[i + 2] >>> 9); + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 36; + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 39; + buf[off++] = (byte) (values[i + 4] >>> 31); + buf[off++] = (byte) (values[i + 4] >>> 23); + buf[off++] = (byte) (values[i + 4] >>> 15); + buf[off++] = (byte) (values[i + 4] >>> 7); + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 42; + buf[off++] = (byte) (values[i + 5] >>> 34); + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 37; + buf[off++] = (byte) (values[i + 6] >>> 29); + buf[off++] = (byte) (values[i + 6] >>> 21); + buf[off++] = (byte) (values[i + 6] >>> 13); + buf[off++] = (byte) (values[i + 6] >>> 5); + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits44(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 36); + buf[off++] = (byte) (values[i + 0] >>> 28); + buf[off++] = (byte) (values[i + 0] >>> 20); + buf[off++] = (byte) (values[i + 0] >>> 12); + buf[off++] = (byte) (values[i + 0] >>> 4); + + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 40; + buf[off++] = (byte) (values[i + 1] >>> 32); + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 36); + buf[off++] = (byte) (values[i + 2] >>> 28); + buf[off++] = (byte) (values[i + 2] >>> 20); + buf[off++] = (byte) (values[i + 2] >>> 12); + buf[off++] = (byte) (values[i + 2] >>> 4); + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 40; + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 36); + buf[off++] = (byte) (values[i + 4] >>> 28); + buf[off++] = (byte) (values[i + 4] >>> 20); + buf[off++] = (byte) (values[i + 4] >>> 12); + buf[off++] = (byte) (values[i + 4] >>> 4); + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 40; + buf[off++] = (byte) (values[i + 5] >>> 32); + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 36); + buf[off++] = (byte) (values[i + 6] >>> 28); + buf[off++] = (byte) (values[i + 6] >>> 20); + buf[off++] = (byte) (values[i + 6] >>> 12); + buf[off++] = (byte) (values[i + 6] >>> 4); + + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits45(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 37); + buf[off++] = (byte) (values[i + 0] >>> 29); + buf[off++] = (byte) (values[i + 0] >>> 21); + buf[off++] = (byte) (values[i + 0] >>> 13); + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 42; + buf[off++] = (byte) (values[i + 1] >>> 34); + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 39; + buf[off++] = (byte) (values[i + 2] >>> 31); + buf[off++] = (byte) (values[i + 2] >>> 23); + buf[off++] = (byte) (values[i + 2] >>> 15); + buf[off++] = (byte) (values[i + 2] >>> 7); + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 44; + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 41; + buf[off++] = (byte) (values[i + 4] >>> 33); + buf[off++] = (byte) (values[i + 4] >>> 25); + buf[off++] = (byte) (values[i + 4] >>> 17); + buf[off++] = (byte) (values[i + 4] >>> 9); + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 38; + buf[off++] = (byte) (values[i + 5] >>> 30); + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 43; + buf[off++] = (byte) (values[i + 6] >>> 35); + buf[off++] = (byte) (values[i + 6] >>> 27); + buf[off++] = (byte) (values[i + 6] >>> 19); + buf[off++] = (byte) (values[i + 6] >>> 11); + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits46(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 38); + buf[off++] = (byte) (values[i + 0] >>> 30); + buf[off++] = (byte) (values[i + 0] >>> 22); + buf[off++] = (byte) (values[i + 0] >>> 14); + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 44; + buf[off++] = (byte) (values[i + 1] >>> 36); + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 42; + buf[off++] = (byte) (values[i + 2] >>> 34); + buf[off++] = (byte) (values[i + 2] >>> 26); + buf[off++] = (byte) (values[i + 2] >>> 18); + buf[off++] = (byte) (values[i + 2] >>> 10); + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 40; + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 38); + buf[off++] = (byte) (values[i + 4] >>> 30); + buf[off++] = (byte) (values[i + 4] >>> 22); + buf[off++] = (byte) (values[i + 4] >>> 14); + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 44; + buf[off++] = (byte) (values[i + 5] >>> 36); + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 42; + buf[off++] = (byte) (values[i + 6] >>> 34); + buf[off++] = (byte) (values[i + 6] >>> 26); + buf[off++] = (byte) (values[i + 6] >>> 18); + buf[off++] = (byte) (values[i + 6] >>> 10); + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits47(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 39); + buf[off++] = (byte) (values[i + 0] >>> 31); + buf[off++] = (byte) (values[i + 0] >>> 23); + buf[off++] = (byte) (values[i + 0] >>> 15); + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 46; + buf[off++] = (byte) (values[i + 1] >>> 38); + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 45; + buf[off++] = (byte) (values[i + 2] >>> 37); + buf[off++] = (byte) (values[i + 2] >>> 29); + buf[off++] = (byte) (values[i + 2] >>> 21); + buf[off++] = (byte) (values[i + 2] >>> 13); + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 44; + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 43; + buf[off++] = (byte) (values[i + 4] >>> 35); + buf[off++] = (byte) (values[i + 4] >>> 27); + buf[off++] = (byte) (values[i + 4] >>> 19); + buf[off++] = (byte) (values[i + 4] >>> 11); + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 42; + buf[off++] = (byte) (values[i + 5] >>> 34); + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 41; + buf[off++] = (byte) (values[i + 6] >>> 33); + buf[off++] = (byte) (values[i + 6] >>> 25); + buf[off++] = (byte) (values[i + 6] >>> 17); + buf[off++] = (byte) (values[i + 6] >>> 9); + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 40; + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits48(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 40); + buf[off++] = (byte) (values[i + 0] >>> 32); + buf[off++] = (byte) (values[i + 0] >>> 24); + buf[off++] = (byte) (values[i + 0] >>> 16); + buf[off++] = (byte) (values[i + 0] >>> 8); + buf[off++] = (byte) (values[i + 0]); + + buf[off++] = (byte) (values[i + 1] >>> 40); + buf[off++] = (byte) (values[i + 1] >>> 32); + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 40); + buf[off++] = (byte) (values[i + 2] >>> 32); + buf[off++] = (byte) (values[i + 2] >>> 24); + buf[off++] = (byte) (values[i + 2] >>> 16); + buf[off++] = (byte) (values[i + 2] >>> 8); + buf[off++] = (byte) (values[i + 2]); + + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 40); + buf[off++] = (byte) (values[i + 4] >>> 32); + buf[off++] = (byte) (values[i + 4] >>> 24); + buf[off++] = (byte) (values[i + 4] >>> 16); + buf[off++] = (byte) (values[i + 4] >>> 8); + buf[off++] = (byte) (values[i + 4]); + + buf[off++] = (byte) (values[i + 5] >>> 40); + buf[off++] = (byte) (values[i + 5] >>> 32); + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 40); + buf[off++] = (byte) (values[i + 6] >>> 32); + buf[off++] = (byte) (values[i + 6] >>> 24); + buf[off++] = (byte) (values[i + 6] >>> 16); + buf[off++] = (byte) (values[i + 6] >>> 8); + buf[off++] = (byte) (values[i + 6]); + + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits49(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 41); + buf[off++] = (byte) (values[i + 0] >>> 33); + buf[off++] = (byte) (values[i + 0] >>> 25); + buf[off++] = (byte) (values[i + 0] >>> 17); + buf[off++] = (byte) (values[i + 0] >>> 9); + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 42; + buf[off++] = (byte) (values[i + 1] >>> 34); + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 43; + buf[off++] = (byte) (values[i + 2] >>> 35); + buf[off++] = (byte) (values[i + 2] >>> 27); + buf[off++] = (byte) (values[i + 2] >>> 19); + buf[off++] = (byte) (values[i + 2] >>> 11); + buf[off++] = (byte) (values[i + 2] >>> 3); + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 44; + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 45; + buf[off++] = (byte) (values[i + 4] >>> 37); + buf[off++] = (byte) (values[i + 4] >>> 29); + buf[off++] = (byte) (values[i + 4] >>> 21); + buf[off++] = (byte) (values[i + 4] >>> 13); + buf[off++] = (byte) (values[i + 4] >>> 5); + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 46; + buf[off++] = (byte) (values[i + 5] >>> 38); + buf[off++] = (byte) (values[i + 5] >>> 30); + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 47; + buf[off++] = (byte) (values[i + 6] >>> 39); + buf[off++] = (byte) (values[i + 6] >>> 31); + buf[off++] = (byte) (values[i + 6] >>> 23); + buf[off++] = (byte) (values[i + 6] >>> 15); + buf[off++] = (byte) (values[i + 6] >>> 7); + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits50(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 42); + buf[off++] = (byte) (values[i + 0] >>> 34); + buf[off++] = (byte) (values[i + 0] >>> 26); + buf[off++] = (byte) (values[i + 0] >>> 18); + buf[off++] = (byte) (values[i + 0] >>> 10); + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 44; + buf[off++] = (byte) (values[i + 1] >>> 36); + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 46; + buf[off++] = (byte) (values[i + 2] >>> 38); + buf[off++] = (byte) (values[i + 2] >>> 30); + buf[off++] = (byte) (values[i + 2] >>> 22); + buf[off++] = (byte) (values[i + 2] >>> 14); + buf[off++] = (byte) (values[i + 2] >>> 6); + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 48; + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 42); + buf[off++] = (byte) (values[i + 4] >>> 34); + buf[off++] = (byte) (values[i + 4] >>> 26); + buf[off++] = (byte) (values[i + 4] >>> 18); + buf[off++] = (byte) (values[i + 4] >>> 10); + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 44; + buf[off++] = (byte) (values[i + 5] >>> 36); + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 46; + buf[off++] = (byte) (values[i + 6] >>> 38); + buf[off++] = (byte) (values[i + 6] >>> 30); + buf[off++] = (byte) (values[i + 6] >>> 22); + buf[off++] = (byte) (values[i + 6] >>> 14); + buf[off++] = (byte) (values[i + 6] >>> 6); + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits51(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 43); + buf[off++] = (byte) (values[i + 0] >>> 35); + buf[off++] = (byte) (values[i + 0] >>> 27); + buf[off++] = (byte) (values[i + 0] >>> 19); + buf[off++] = (byte) (values[i + 0] >>> 11); + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 46; + buf[off++] = (byte) (values[i + 1] >>> 38); + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 49; + buf[off++] = (byte) (values[i + 2] >>> 41); + buf[off++] = (byte) (values[i + 2] >>> 33); + buf[off++] = (byte) (values[i + 2] >>> 25); + buf[off++] = (byte) (values[i + 2] >>> 17); + buf[off++] = (byte) (values[i + 2] >>> 9); + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 44; + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 47; + buf[off++] = (byte) (values[i + 4] >>> 39); + buf[off++] = (byte) (values[i + 4] >>> 31); + buf[off++] = (byte) (values[i + 4] >>> 23); + buf[off++] = (byte) (values[i + 4] >>> 15); + buf[off++] = (byte) (values[i + 4] >>> 7); + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 50; + buf[off++] = (byte) (values[i + 5] >>> 42); + buf[off++] = (byte) (values[i + 5] >>> 34); + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 45; + buf[off++] = (byte) (values[i + 6] >>> 37); + buf[off++] = (byte) (values[i + 6] >>> 29); + buf[off++] = (byte) (values[i + 6] >>> 21); + buf[off++] = (byte) (values[i + 6] >>> 13); + buf[off++] = (byte) (values[i + 6] >>> 5); + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits52(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 44); + buf[off++] = (byte) (values[i + 0] >>> 36); + buf[off++] = (byte) (values[i + 0] >>> 28); + buf[off++] = (byte) (values[i + 0] >>> 20); + buf[off++] = (byte) (values[i + 0] >>> 12); + buf[off++] = (byte) (values[i + 0] >>> 4); + + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 48; + buf[off++] = (byte) (values[i + 1] >>> 40); + buf[off++] = (byte) (values[i + 1] >>> 32); + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 44); + buf[off++] = (byte) (values[i + 2] >>> 36); + buf[off++] = (byte) (values[i + 2] >>> 28); + buf[off++] = (byte) (values[i + 2] >>> 20); + buf[off++] = (byte) (values[i + 2] >>> 12); + buf[off++] = (byte) (values[i + 2] >>> 4); + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 48; + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 44); + buf[off++] = (byte) (values[i + 4] >>> 36); + buf[off++] = (byte) (values[i + 4] >>> 28); + buf[off++] = (byte) (values[i + 4] >>> 20); + buf[off++] = (byte) (values[i + 4] >>> 12); + buf[off++] = (byte) (values[i + 4] >>> 4); + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 48; + buf[off++] = (byte) (values[i + 5] >>> 40); + buf[off++] = (byte) (values[i + 5] >>> 32); + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 44); + buf[off++] = (byte) (values[i + 6] >>> 36); + buf[off++] = (byte) (values[i + 6] >>> 28); + buf[off++] = (byte) (values[i + 6] >>> 20); + buf[off++] = (byte) (values[i + 6] >>> 12); + buf[off++] = (byte) (values[i + 6] >>> 4); + + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits53(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 45); + buf[off++] = (byte) (values[i + 0] >>> 37); + buf[off++] = (byte) (values[i + 0] >>> 29); + buf[off++] = (byte) (values[i + 0] >>> 21); + buf[off++] = (byte) (values[i + 0] >>> 13); + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 50; + buf[off++] = (byte) (values[i + 1] >>> 42); + buf[off++] = (byte) (values[i + 1] >>> 34); + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 47; + buf[off++] = (byte) (values[i + 2] >>> 39); + buf[off++] = (byte) (values[i + 2] >>> 31); + buf[off++] = (byte) (values[i + 2] >>> 23); + buf[off++] = (byte) (values[i + 2] >>> 15); + buf[off++] = (byte) (values[i + 2] >>> 7); + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 52; + buf[off++] = (byte) (values[i + 3] >>> 44); + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 49; + buf[off++] = (byte) (values[i + 4] >>> 41); + buf[off++] = (byte) (values[i + 4] >>> 33); + buf[off++] = (byte) (values[i + 4] >>> 25); + buf[off++] = (byte) (values[i + 4] >>> 17); + buf[off++] = (byte) (values[i + 4] >>> 9); + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 46; + buf[off++] = (byte) (values[i + 5] >>> 38); + buf[off++] = (byte) (values[i + 5] >>> 30); + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 51; + buf[off++] = (byte) (values[i + 6] >>> 43); + buf[off++] = (byte) (values[i + 6] >>> 35); + buf[off++] = (byte) (values[i + 6] >>> 27); + buf[off++] = (byte) (values[i + 6] >>> 19); + buf[off++] = (byte) (values[i + 6] >>> 11); + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits54(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 46); + buf[off++] = (byte) (values[i + 0] >>> 38); + buf[off++] = (byte) (values[i + 0] >>> 30); + buf[off++] = (byte) (values[i + 0] >>> 22); + buf[off++] = (byte) (values[i + 0] >>> 14); + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 52; + buf[off++] = (byte) (values[i + 1] >>> 44); + buf[off++] = (byte) (values[i + 1] >>> 36); + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 50; + buf[off++] = (byte) (values[i + 2] >>> 42); + buf[off++] = (byte) (values[i + 2] >>> 34); + buf[off++] = (byte) (values[i + 2] >>> 26); + buf[off++] = (byte) (values[i + 2] >>> 18); + buf[off++] = (byte) (values[i + 2] >>> 10); + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 48; + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 46); + buf[off++] = (byte) (values[i + 4] >>> 38); + buf[off++] = (byte) (values[i + 4] >>> 30); + buf[off++] = (byte) (values[i + 4] >>> 22); + buf[off++] = (byte) (values[i + 4] >>> 14); + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 52; + buf[off++] = (byte) (values[i + 5] >>> 44); + buf[off++] = (byte) (values[i + 5] >>> 36); + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 50; + buf[off++] = (byte) (values[i + 6] >>> 42); + buf[off++] = (byte) (values[i + 6] >>> 34); + buf[off++] = (byte) (values[i + 6] >>> 26); + buf[off++] = (byte) (values[i + 6] >>> 18); + buf[off++] = (byte) (values[i + 6] >>> 10); + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits55(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 47); + buf[off++] = (byte) (values[i + 0] >>> 39); + buf[off++] = (byte) (values[i + 0] >>> 31); + buf[off++] = (byte) (values[i + 0] >>> 23); + buf[off++] = (byte) (values[i + 0] >>> 15); + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 54; + buf[off++] = (byte) (values[i + 1] >>> 46); + buf[off++] = (byte) (values[i + 1] >>> 38); + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 53; + buf[off++] = (byte) (values[i + 2] >>> 45); + buf[off++] = (byte) (values[i + 2] >>> 37); + buf[off++] = (byte) (values[i + 2] >>> 29); + buf[off++] = (byte) (values[i + 2] >>> 21); + buf[off++] = (byte) (values[i + 2] >>> 13); + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 52; + buf[off++] = (byte) (values[i + 3] >>> 44); + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 51; + buf[off++] = (byte) (values[i + 4] >>> 43); + buf[off++] = (byte) (values[i + 4] >>> 35); + buf[off++] = (byte) (values[i + 4] >>> 27); + buf[off++] = (byte) (values[i + 4] >>> 19); + buf[off++] = (byte) (values[i + 4] >>> 11); + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 50; + buf[off++] = (byte) (values[i + 5] >>> 42); + buf[off++] = (byte) (values[i + 5] >>> 34); + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 49; + buf[off++] = (byte) (values[i + 6] >>> 41); + buf[off++] = (byte) (values[i + 6] >>> 33); + buf[off++] = (byte) (values[i + 6] >>> 25); + buf[off++] = (byte) (values[i + 6] >>> 17); + buf[off++] = (byte) (values[i + 6] >>> 9); + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 48; + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits56(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 48); + buf[off++] = (byte) (values[i + 0] >>> 40); + buf[off++] = (byte) (values[i + 0] >>> 32); + buf[off++] = (byte) (values[i + 0] >>> 24); + buf[off++] = (byte) (values[i + 0] >>> 16); + buf[off++] = (byte) (values[i + 0] >>> 8); + buf[off++] = (byte) (values[i + 0]); + + buf[off++] = (byte) (values[i + 1] >>> 48); + buf[off++] = (byte) (values[i + 1] >>> 40); + buf[off++] = (byte) (values[i + 1] >>> 32); + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 48); + buf[off++] = (byte) (values[i + 2] >>> 40); + buf[off++] = (byte) (values[i + 2] >>> 32); + buf[off++] = (byte) (values[i + 2] >>> 24); + buf[off++] = (byte) (values[i + 2] >>> 16); + buf[off++] = (byte) (values[i + 2] >>> 8); + buf[off++] = (byte) (values[i + 2]); + + buf[off++] = (byte) (values[i + 3] >>> 48); + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 48); + buf[off++] = (byte) (values[i + 4] >>> 40); + buf[off++] = (byte) (values[i + 4] >>> 32); + buf[off++] = (byte) (values[i + 4] >>> 24); + buf[off++] = (byte) (values[i + 4] >>> 16); + buf[off++] = (byte) (values[i + 4] >>> 8); + buf[off++] = (byte) (values[i + 4]); + + buf[off++] = (byte) (values[i + 5] >>> 48); + buf[off++] = (byte) (values[i + 5] >>> 40); + buf[off++] = (byte) (values[i + 5] >>> 32); + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 48); + buf[off++] = (byte) (values[i + 6] >>> 40); + buf[off++] = (byte) (values[i + 6] >>> 32); + buf[off++] = (byte) (values[i + 6] >>> 24); + buf[off++] = (byte) (values[i + 6] >>> 16); + buf[off++] = (byte) (values[i + 6] >>> 8); + buf[off++] = (byte) (values[i + 6]); + + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits57(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 49); + buf[off++] = (byte) (values[i + 0] >>> 41); + buf[off++] = (byte) (values[i + 0] >>> 33); + buf[off++] = (byte) (values[i + 0] >>> 25); + buf[off++] = (byte) (values[i + 0] >>> 17); + buf[off++] = (byte) (values[i + 0] >>> 9); + buf[off++] = (byte) (values[i + 0] >>> 1); + + buf[off] = (byte) (values[i + 0] << 7); + buf[off++] |= values[i + 1] >>> 50; + buf[off++] = (byte) (values[i + 1] >>> 42); + buf[off++] = (byte) (values[i + 1] >>> 34); + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 51; + buf[off++] = (byte) (values[i + 2] >>> 43); + buf[off++] = (byte) (values[i + 2] >>> 35); + buf[off++] = (byte) (values[i + 2] >>> 27); + buf[off++] = (byte) (values[i + 2] >>> 19); + buf[off++] = (byte) (values[i + 2] >>> 11); + buf[off++] = (byte) (values[i + 2] >>> 3); + + buf[off] = (byte) (values[i + 2] << 5); + buf[off++] |= values[i + 3] >>> 52; + buf[off++] = (byte) (values[i + 3] >>> 44); + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 53; + buf[off++] = (byte) (values[i + 4] >>> 45); + buf[off++] = (byte) (values[i + 4] >>> 37); + buf[off++] = (byte) (values[i + 4] >>> 29); + buf[off++] = (byte) (values[i + 4] >>> 21); + buf[off++] = (byte) (values[i + 4] >>> 13); + buf[off++] = (byte) (values[i + 4] >>> 5); + + buf[off] = (byte) (values[i + 4] << 3); + buf[off++] |= values[i + 5] >>> 54; + buf[off++] = (byte) (values[i + 5] >>> 46); + buf[off++] = (byte) (values[i + 5] >>> 38); + buf[off++] = (byte) (values[i + 5] >>> 30); + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 55; + buf[off++] = (byte) (values[i + 6] >>> 47); + buf[off++] = (byte) (values[i + 6] >>> 39); + buf[off++] = (byte) (values[i + 6] >>> 31); + buf[off++] = (byte) (values[i + 6] >>> 23); + buf[off++] = (byte) (values[i + 6] >>> 15); + buf[off++] = (byte) (values[i + 6] >>> 7); + + buf[off] = (byte) (values[i + 6] << 1); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits58(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 50); + buf[off++] = (byte) (values[i + 0] >>> 42); + buf[off++] = (byte) (values[i + 0] >>> 34); + buf[off++] = (byte) (values[i + 0] >>> 26); + buf[off++] = (byte) (values[i + 0] >>> 18); + buf[off++] = (byte) (values[i + 0] >>> 10); + buf[off++] = (byte) (values[i + 0] >>> 2); + + buf[off] = (byte) (values[i + 0] << 6); + buf[off++] |= values[i + 1] >>> 52; + buf[off++] = (byte) (values[i + 1] >>> 44); + buf[off++] = (byte) (values[i + 1] >>> 36); + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 54; + buf[off++] = (byte) (values[i + 2] >>> 46); + buf[off++] = (byte) (values[i + 2] >>> 38); + buf[off++] = (byte) (values[i + 2] >>> 30); + buf[off++] = (byte) (values[i + 2] >>> 22); + buf[off++] = (byte) (values[i + 2] >>> 14); + buf[off++] = (byte) (values[i + 2] >>> 6); + + buf[off] = (byte) (values[i + 2] << 2); + buf[off++] |= values[i + 3] >>> 56; + buf[off++] = (byte) (values[i + 3] >>> 48); + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 50); + buf[off++] = (byte) (values[i + 4] >>> 42); + buf[off++] = (byte) (values[i + 4] >>> 34); + buf[off++] = (byte) (values[i + 4] >>> 26); + buf[off++] = (byte) (values[i + 4] >>> 18); + buf[off++] = (byte) (values[i + 4] >>> 10); + buf[off++] = (byte) (values[i + 4] >>> 2); + + buf[off] = (byte) (values[i + 4] << 6); + buf[off++] |= values[i + 5] >>> 52; + buf[off++] = (byte) (values[i + 5] >>> 44); + buf[off++] = (byte) (values[i + 5] >>> 36); + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 54; + buf[off++] = (byte) (values[i + 6] >>> 46); + buf[off++] = (byte) (values[i + 6] >>> 38); + buf[off++] = (byte) (values[i + 6] >>> 30); + buf[off++] = (byte) (values[i + 6] >>> 22); + buf[off++] = (byte) (values[i + 6] >>> 14); + buf[off++] = (byte) (values[i + 6] >>> 6); + + buf[off] = (byte) (values[i + 6] << 2); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits59(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 51); + buf[off++] = (byte) (values[i + 0] >>> 43); + buf[off++] = (byte) (values[i + 0] >>> 35); + buf[off++] = (byte) (values[i + 0] >>> 27); + buf[off++] = (byte) (values[i + 0] >>> 19); + buf[off++] = (byte) (values[i + 0] >>> 11); + buf[off++] = (byte) (values[i + 0] >>> 3); + + buf[off] = (byte) (values[i + 0] << 5); + buf[off++] |= values[i + 1] >>> 54; + buf[off++] = (byte) (values[i + 1] >>> 46); + buf[off++] = (byte) (values[i + 1] >>> 38); + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 57; + buf[off++] = (byte) (values[i + 2] >>> 49); + buf[off++] = (byte) (values[i + 2] >>> 41); + buf[off++] = (byte) (values[i + 2] >>> 33); + buf[off++] = (byte) (values[i + 2] >>> 25); + buf[off++] = (byte) (values[i + 2] >>> 17); + buf[off++] = (byte) (values[i + 2] >>> 9); + buf[off++] = (byte) (values[i + 2] >>> 1); + + buf[off] = (byte) (values[i + 2] << 7); + buf[off++] |= values[i + 3] >>> 52; + buf[off++] = (byte) (values[i + 3] >>> 44); + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 55; + buf[off++] = (byte) (values[i + 4] >>> 47); + buf[off++] = (byte) (values[i + 4] >>> 39); + buf[off++] = (byte) (values[i + 4] >>> 31); + buf[off++] = (byte) (values[i + 4] >>> 23); + buf[off++] = (byte) (values[i + 4] >>> 15); + buf[off++] = (byte) (values[i + 4] >>> 7); + + buf[off] = (byte) (values[i + 4] << 1); + buf[off++] |= values[i + 5] >>> 58; + buf[off++] = (byte) (values[i + 5] >>> 50); + buf[off++] = (byte) (values[i + 5] >>> 42); + buf[off++] = (byte) (values[i + 5] >>> 34); + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 53; + buf[off++] = (byte) (values[i + 6] >>> 45); + buf[off++] = (byte) (values[i + 6] >>> 37); + buf[off++] = (byte) (values[i + 6] >>> 29); + buf[off++] = (byte) (values[i + 6] >>> 21); + buf[off++] = (byte) (values[i + 6] >>> 13); + buf[off++] = (byte) (values[i + 6] >>> 5); + + buf[off] = (byte) (values[i + 6] << 3); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits60(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 52); + buf[off++] = (byte) (values[i + 0] >>> 44); + buf[off++] = (byte) (values[i + 0] >>> 36); + buf[off++] = (byte) (values[i + 0] >>> 28); + buf[off++] = (byte) (values[i + 0] >>> 20); + buf[off++] = (byte) (values[i + 0] >>> 12); + buf[off++] = (byte) (values[i + 0] >>> 4); + + buf[off] = (byte) (values[i + 0] << 4); + buf[off++] |= values[i + 1] >>> 56; + buf[off++] = (byte) (values[i + 1] >>> 48); + buf[off++] = (byte) (values[i + 1] >>> 40); + buf[off++] = (byte) (values[i + 1] >>> 32); + buf[off++] = (byte) (values[i + 1] >>> 24); + buf[off++] = (byte) (values[i + 1] >>> 16); + buf[off++] = (byte) (values[i + 1] >>> 8); + buf[off++] = (byte) (values[i + 1]); + + buf[off++] = (byte) (values[i + 2] >>> 52); + buf[off++] = (byte) (values[i + 2] >>> 44); + buf[off++] = (byte) (values[i + 2] >>> 36); + buf[off++] = (byte) (values[i + 2] >>> 28); + buf[off++] = (byte) (values[i + 2] >>> 20); + buf[off++] = (byte) (values[i + 2] >>> 12); + buf[off++] = (byte) (values[i + 2] >>> 4); + + buf[off] = (byte) (values[i + 2] << 4); + buf[off++] |= values[i + 3] >>> 56; + buf[off++] = (byte) (values[i + 3] >>> 48); + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 52); + buf[off++] = (byte) (values[i + 4] >>> 44); + buf[off++] = (byte) (values[i + 4] >>> 36); + buf[off++] = (byte) (values[i + 4] >>> 28); + buf[off++] = (byte) (values[i + 4] >>> 20); + buf[off++] = (byte) (values[i + 4] >>> 12); + buf[off++] = (byte) (values[i + 4] >>> 4); + + buf[off] = (byte) (values[i + 4] << 4); + buf[off++] |= values[i + 5] >>> 56; + buf[off++] = (byte) (values[i + 5] >>> 48); + buf[off++] = (byte) (values[i + 5] >>> 40); + buf[off++] = (byte) (values[i + 5] >>> 32); + buf[off++] = (byte) (values[i + 5] >>> 24); + buf[off++] = (byte) (values[i + 5] >>> 16); + buf[off++] = (byte) (values[i + 5] >>> 8); + buf[off++] = (byte) (values[i + 5]); + + buf[off++] = (byte) (values[i + 6] >>> 52); + buf[off++] = (byte) (values[i + 6] >>> 44); + buf[off++] = (byte) (values[i + 6] >>> 36); + buf[off++] = (byte) (values[i + 6] >>> 28); + buf[off++] = (byte) (values[i + 6] >>> 20); + buf[off++] = (byte) (values[i + 6] >>> 12); + buf[off++] = (byte) (values[i + 6] >>> 4); + + buf[off] = (byte) (values[i + 6] << 4); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits61(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 53); + buf[off++] = (byte) (values[i + 0] >>> 45); + buf[off++] = (byte) (values[i + 0] >>> 37); + buf[off++] = (byte) (values[i + 0] >>> 29); + buf[off++] = (byte) (values[i + 0] >>> 21); + buf[off++] = (byte) (values[i + 0] >>> 13); + buf[off++] = (byte) (values[i + 0] >>> 5); + + buf[off] = (byte) (values[i + 0] << 3); + buf[off++] |= values[i + 1] >>> 58; + buf[off++] = (byte) (values[i + 1] >>> 50); + buf[off++] = (byte) (values[i + 1] >>> 42); + buf[off++] = (byte) (values[i + 1] >>> 34); + buf[off++] = (byte) (values[i + 1] >>> 26); + buf[off++] = (byte) (values[i + 1] >>> 18); + buf[off++] = (byte) (values[i + 1] >>> 10); + buf[off++] = (byte) (values[i + 1] >>> 2); + + buf[off] = (byte) (values[i + 1] << 6); + buf[off++] |= values[i + 2] >>> 55; + buf[off++] = (byte) (values[i + 2] >>> 47); + buf[off++] = (byte) (values[i + 2] >>> 39); + buf[off++] = (byte) (values[i + 2] >>> 31); + buf[off++] = (byte) (values[i + 2] >>> 23); + buf[off++] = (byte) (values[i + 2] >>> 15); + buf[off++] = (byte) (values[i + 2] >>> 7); + + buf[off] = (byte) (values[i + 2] << 1); + buf[off++] |= values[i + 3] >>> 60; + buf[off++] = (byte) (values[i + 3] >>> 52); + buf[off++] = (byte) (values[i + 3] >>> 44); + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 57; + buf[off++] = (byte) (values[i + 4] >>> 49); + buf[off++] = (byte) (values[i + 4] >>> 41); + buf[off++] = (byte) (values[i + 4] >>> 33); + buf[off++] = (byte) (values[i + 4] >>> 25); + buf[off++] = (byte) (values[i + 4] >>> 17); + buf[off++] = (byte) (values[i + 4] >>> 9); + buf[off++] = (byte) (values[i + 4] >>> 1); + + buf[off] = (byte) (values[i + 4] << 7); + buf[off++] |= values[i + 5] >>> 54; + buf[off++] = (byte) (values[i + 5] >>> 46); + buf[off++] = (byte) (values[i + 5] >>> 38); + buf[off++] = (byte) (values[i + 5] >>> 30); + buf[off++] = (byte) (values[i + 5] >>> 22); + buf[off++] = (byte) (values[i + 5] >>> 14); + buf[off++] = (byte) (values[i + 5] >>> 6); + + buf[off] = (byte) (values[i + 5] << 2); + buf[off++] |= values[i + 6] >>> 59; + buf[off++] = (byte) (values[i + 6] >>> 51); + buf[off++] = (byte) (values[i + 6] >>> 43); + buf[off++] = (byte) (values[i + 6] >>> 35); + buf[off++] = (byte) (values[i + 6] >>> 27); + buf[off++] = (byte) (values[i + 6] >>> 19); + buf[off++] = (byte) (values[i + 6] >>> 11); + buf[off++] = (byte) (values[i + 6] >>> 3); + + buf[off] = (byte) (values[i + 6] << 5); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits62(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 54); + buf[off++] = (byte) (values[i + 0] >>> 46); + buf[off++] = (byte) (values[i + 0] >>> 38); + buf[off++] = (byte) (values[i + 0] >>> 30); + buf[off++] = (byte) (values[i + 0] >>> 22); + buf[off++] = (byte) (values[i + 0] >>> 14); + buf[off++] = (byte) (values[i + 0] >>> 6); + + buf[off] = (byte) (values[i + 0] << 2); + buf[off++] |= values[i + 1] >>> 60; + buf[off++] = (byte) (values[i + 1] >>> 52); + buf[off++] = (byte) (values[i + 1] >>> 44); + buf[off++] = (byte) (values[i + 1] >>> 36); + buf[off++] = (byte) (values[i + 1] >>> 28); + buf[off++] = (byte) (values[i + 1] >>> 20); + buf[off++] = (byte) (values[i + 1] >>> 12); + buf[off++] = (byte) (values[i + 1] >>> 4); + + buf[off] = (byte) (values[i + 1] << 4); + buf[off++] |= values[i + 2] >>> 58; + buf[off++] = (byte) (values[i + 2] >>> 50); + buf[off++] = (byte) (values[i + 2] >>> 42); + buf[off++] = (byte) (values[i + 2] >>> 34); + buf[off++] = (byte) (values[i + 2] >>> 26); + buf[off++] = (byte) (values[i + 2] >>> 18); + buf[off++] = (byte) (values[i + 2] >>> 10); + buf[off++] = (byte) (values[i + 2] >>> 2); + + buf[off] = (byte) (values[i + 2] << 6); + buf[off++] |= values[i + 3] >>> 56; + buf[off++] = (byte) (values[i + 3] >>> 48); + buf[off++] = (byte) (values[i + 3] >>> 40); + buf[off++] = (byte) (values[i + 3] >>> 32); + buf[off++] = (byte) (values[i + 3] >>> 24); + buf[off++] = (byte) (values[i + 3] >>> 16); + buf[off++] = (byte) (values[i + 3] >>> 8); + buf[off++] = (byte) (values[i + 3]); + + buf[off++] = (byte) (values[i + 4] >>> 54); + buf[off++] = (byte) (values[i + 4] >>> 46); + buf[off++] = (byte) (values[i + 4] >>> 38); + buf[off++] = (byte) (values[i + 4] >>> 30); + buf[off++] = (byte) (values[i + 4] >>> 22); + buf[off++] = (byte) (values[i + 4] >>> 14); + buf[off++] = (byte) (values[i + 4] >>> 6); + + buf[off] = (byte) (values[i + 4] << 2); + buf[off++] |= values[i + 5] >>> 60; + buf[off++] = (byte) (values[i + 5] >>> 52); + buf[off++] = (byte) (values[i + 5] >>> 44); + buf[off++] = (byte) (values[i + 5] >>> 36); + buf[off++] = (byte) (values[i + 5] >>> 28); + buf[off++] = (byte) (values[i + 5] >>> 20); + buf[off++] = (byte) (values[i + 5] >>> 12); + buf[off++] = (byte) (values[i + 5] >>> 4); + + buf[off] = (byte) (values[i + 5] << 4); + buf[off++] |= values[i + 6] >>> 58; + buf[off++] = (byte) (values[i + 6] >>> 50); + buf[off++] = (byte) (values[i + 6] >>> 42); + buf[off++] = (byte) (values[i + 6] >>> 34); + buf[off++] = (byte) (values[i + 6] >>> 26); + buf[off++] = (byte) (values[i + 6] >>> 18); + buf[off++] = (byte) (values[i + 6] >>> 10); + buf[off++] = (byte) (values[i + 6] >>> 2); + + buf[off] = (byte) (values[i + 6] << 6); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) (values[i + 7]); + } + + static void packBits63(final long[] values, final int i, final byte[] buf, int off) { + buf[off++] = (byte) (values[i + 0] >>> 55); + buf[off++] = (byte) (values[i + 0] >>> 47); + buf[off++] = (byte) (values[i + 0] >>> 39); + buf[off++] = (byte) (values[i + 0] >>> 31); + buf[off++] = (byte) (values[i + 0] >>> 23); + buf[off++] = (byte) (values[i + 0] >>> 15); + buf[off++] = (byte) (values[i + 0] >>> 7); + + buf[off] = (byte) (values[i + 0] << 1); + buf[off++] |= values[i + 1] >>> 62; + buf[off++] = (byte) (values[i + 1] >>> 54); + buf[off++] = (byte) (values[i + 1] >>> 46); + buf[off++] = (byte) (values[i + 1] >>> 38); + buf[off++] = (byte) (values[i + 1] >>> 30); + buf[off++] = (byte) (values[i + 1] >>> 22); + buf[off++] = (byte) (values[i + 1] >>> 14); + buf[off++] = (byte) (values[i + 1] >>> 6); + + buf[off] = (byte) (values[i + 1] << 2); + buf[off++] |= values[i + 2] >>> 61; + buf[off++] = (byte) (values[i + 2] >>> 53); + buf[off++] = (byte) (values[i + 2] >>> 45); + buf[off++] = (byte) (values[i + 2] >>> 37); + buf[off++] = (byte) (values[i + 2] >>> 29); + buf[off++] = (byte) (values[i + 2] >>> 21); + buf[off++] = (byte) (values[i + 2] >>> 13); + buf[off++] = (byte) (values[i + 2] >>> 5); + + buf[off] = (byte) (values[i + 2] << 3); + buf[off++] |= values[i + 3] >>> 60; + buf[off++] = (byte) (values[i + 3] >>> 52); + buf[off++] = (byte) (values[i + 3] >>> 44); + buf[off++] = (byte) (values[i + 3] >>> 36); + buf[off++] = (byte) (values[i + 3] >>> 28); + buf[off++] = (byte) (values[i + 3] >>> 20); + buf[off++] = (byte) (values[i + 3] >>> 12); + buf[off++] = (byte) (values[i + 3] >>> 4); + + buf[off] = (byte) (values[i + 3] << 4); + buf[off++] |= values[i + 4] >>> 59; + buf[off++] = (byte) (values[i + 4] >>> 51); + buf[off++] = (byte) (values[i + 4] >>> 43); + buf[off++] = (byte) (values[i + 4] >>> 35); + buf[off++] = (byte) (values[i + 4] >>> 27); + buf[off++] = (byte) (values[i + 4] >>> 19); + buf[off++] = (byte) (values[i + 4] >>> 11); + buf[off++] = (byte) (values[i + 4] >>> 3); + + buf[off] = (byte) (values[i + 4] << 5); + buf[off++] |= values[i + 5] >>> 58; + buf[off++] = (byte) (values[i + 5] >>> 50); + buf[off++] = (byte) (values[i + 5] >>> 42); + buf[off++] = (byte) (values[i + 5] >>> 34); + buf[off++] = (byte) (values[i + 5] >>> 26); + buf[off++] = (byte) (values[i + 5] >>> 18); + buf[off++] = (byte) (values[i + 5] >>> 10); + buf[off++] = (byte) (values[i + 5] >>> 2); + + buf[off] = (byte) (values[i + 5] << 6); + buf[off++] |= values[i + 6] >>> 57; + buf[off++] = (byte) (values[i + 6] >>> 49); + buf[off++] = (byte) (values[i + 6] >>> 41); + buf[off++] = (byte) (values[i + 6] >>> 33); + buf[off++] = (byte) (values[i + 6] >>> 25); + buf[off++] = (byte) (values[i + 6] >>> 17); + buf[off++] = (byte) (values[i + 6] >>> 9); + buf[off++] = (byte) (values[i + 6] >>> 1); + + buf[off] = (byte) (values[i + 6] << 7); + buf[off++] |= values[i + 7] >>> 56; + buf[off++] = (byte) (values[i + 7] >>> 48); + buf[off++] = (byte) (values[i + 7] >>> 40); + buf[off++] = (byte) (values[i + 7] >>> 32); + buf[off++] = (byte) (values[i + 7] >>> 24); + buf[off++] = (byte) (values[i + 7] >>> 16); + buf[off++] = (byte) (values[i + 7] >>> 8); + buf[off] = (byte) values[i + 7]; + } + + static void unpackBits1(final long[] values, final int i, final byte[] buf, final int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off]) >>> 7) & 1; + values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 1; + values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 5) & 1; + values[i + 3] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 1; + values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 3) & 1; + values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 1; + values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 1; + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 1; + } + + static void unpackBits2(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 3; + values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 3; + values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 3; + values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 3; + values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 3; + values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 3; + values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 3; + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 3; + } + + static void unpackBits3(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 5; + values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 7; + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + values[i + 3] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 7; + values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 7; + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 3) & 7; + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 7; + } + + static void unpackBits4(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 4; + values[i + 1] = Byte.toUnsignedLong(buf[off++]) & 0xf; + values[i + 2] = Byte.toUnsignedLong(buf[off]) >>> 4; + values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 0xf; + values[i + 4] = Byte.toUnsignedLong(buf[off]) >>> 4; + values[i + 5] = Byte.toUnsignedLong(buf[off++]) & 0xf; + values[i + 6] = Byte.toUnsignedLong(buf[off]) >>> 4; + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0xf; + } + + static void unpackBits5(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 0x1f; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 0x1f; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x1f; + } + + static void unpackBits6(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 0x3f; + + values[i + 4] = Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x3f; + } + + static void unpackBits7(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x7f; + } + + static void unpackBits8(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]); + values[i + 1] = Byte.toUnsignedLong(buf[off++]); + values[i + 2] = Byte.toUnsignedLong(buf[off++]); + values[i + 3] = Byte.toUnsignedLong(buf[off++]); + values[i + 4] = Byte.toUnsignedLong(buf[off++]); + values[i + 5] = Byte.toUnsignedLong(buf[off++]); + values[i + 6] = Byte.toUnsignedLong(buf[off++]); + values[i + 7] = Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits9(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits10(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits11(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits12(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits13(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits14(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits15(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits16(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]); + values[i + 1] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]); + values[i + 3] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]); + values[i + 5] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]); + values[i + 7] = Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits17(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 11; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 13; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 15; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits18(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 14; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 14; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits19(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 17; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 15; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 13; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits20(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits21(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 15; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 17; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 19; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits22(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 18; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 18; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits23(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 21; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 19; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 17; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits24(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]); + values[i + 1] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]); + values[i + 3] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]); + values[i + 5] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]); + values[i + 7] = Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits25(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 19; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 21; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 23; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits26(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 22; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 22; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits27(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 25; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 23; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 21; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits28(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits29(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 23; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 25; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 27; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits30(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 26; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 26; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits31(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 29; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 27; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 25; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits32(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]); + values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]); + values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]); + values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]); + values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits33(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 27; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 29; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 30; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 31; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 32; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits34(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 30; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 30; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]); + } + + static void unpackBits35(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 33; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 31; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 29; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits36(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits37(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 31; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 33; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 30; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 35; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits38(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 34; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 34; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits39(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 37; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 35; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 33; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits40(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]); + values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]); + values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]); + values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]); + values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits41(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 35; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 37; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 38; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 39; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits42(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 38; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 38; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits43(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 41; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 39; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 42; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 37; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits44(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits45(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 42; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 39; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 41; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 38; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 43; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits46(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 44; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 42; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 44; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 42; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits47(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 46; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 45; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 43; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 42; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 41; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits48(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]); + values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]); + values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]); + values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]); + values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits49(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 42; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 43; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 45; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 46; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 47; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits50(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 44; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 46; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 44; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 46; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits51(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 46; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 49; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 47; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 50; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 45; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits52(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits53(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 45; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 50; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 47; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 52; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 49; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 46; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 51; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits54(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 52; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 50; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 52; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 50; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]); + } + + static void unpackBits55(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 47; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 54; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 53; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 45; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 52; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 51; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 50; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 49; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits56(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]); + values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]); + values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]); + values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]); + values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits57(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 49; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 50; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 51; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 52; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 53; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 45; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 54; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 55; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 47; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits58(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 52; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 54; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 56; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 52; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 54; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]); + } + + static void unpackBits59(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 51; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 54; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 57; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 49; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 52; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 55; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 47; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 58; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 53; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 45; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits60(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits61(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 53; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 45; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 58; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 55; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 47; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 60; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 57; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 49; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 54; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 59; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 51; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits62(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 54; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 60; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 58; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 56; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]); + + values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 54; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 60; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 58; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + + static void unpackBits63(final long[] values, final int i, final byte[] buf, int off) { + values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 55; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 47; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 39; + values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; + values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; + values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; + + values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 62; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 54; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; + values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; + values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; + values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; + + values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 61; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 53; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 45; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37; + values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; + values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; + values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; + + values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 60; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 52; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; + values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; + values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; + values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; + + values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 59; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 51; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 43; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35; + values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; + values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; + values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; + + values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 58; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 50; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; + values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; + values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; + values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; + + values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 57; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 49; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 41; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33; + values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; + values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; + values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; + + values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 56; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; + values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; + values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; + values[i + 7] |= Byte.toUnsignedLong(buf[off]); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java new file mode 100644 index 000000000..81a985922 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +/* + * This is to uncompress serial version 4 sketch incrementally + */ +class BytesCompactCompressedHashIterator implements HashIterator { + private byte[] bytes; + private int offset; + private int entryBits; + private int numEntries; + private int index; + private long previous; + private int offsetBits; + private long[] buffer; + private boolean isBlockMode; + + BytesCompactCompressedHashIterator( + final byte[] bytes, + final int offset, + final int entryBits, + final int numEntries + ) { + this.bytes = bytes; + this.offset = offset; + this.entryBits = entryBits; + this.numEntries = numEntries; + index = -1; + previous = 0; + offsetBits = 0; + buffer = new long[8]; + isBlockMode = numEntries >= 8; + } + + @Override + public long get() { + return buffer[index & 7]; + } + + @Override + public boolean next() { + if (++index == numEntries) { return false; } + if (isBlockMode) { + if ((index & 7) == 0) { + if (numEntries - index >= 8) { + unpack8(); + } else { + isBlockMode = false; + unpack1(); + } + } + } else { + unpack1(); + } + return true; + } + + private void unpack1() { + final int i = index & 7; + BitPacking.unpackBits(buffer, i, entryBits, bytes, offset, offsetBits); + offset += (offsetBits + entryBits) >>> 3; + offsetBits = (offsetBits + entryBits) & 7; + buffer[i] += previous; + previous = buffer[i]; + } + + private void unpack8() { + BitPacking.unpackBitsBlock8(buffer, 0, bytes, offset, entryBits); + offset += entryBits; + for (int i = 0; i < 8; i++) { + buffer[i] += previous; + previous = buffer[i]; + } + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java new file mode 100644 index 000000000..9a4754574 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import org.apache.datasketches.common.ByteArrayUtil; + +/* + * This is to iterate over serial version 3 sketch representation + */ +class BytesCompactHashIterator implements HashIterator { + final private byte[] bytes; + final private int offset; + final private int numEntries; + private int index; + + BytesCompactHashIterator( + final byte[] bytes, + final int offset, + final int numEntries + ) { + this.bytes = bytes; + this.offset = offset; + this.numEntries = numEntries; + index = -1; + } + + @Override + public long get() { + return ByteArrayUtil.getLongLE(bytes, offset + index * Long.BYTES); + } + + @Override + public boolean next() { + return ++index < numEntries; + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/CompactOperations.java b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java new file mode 100644 index 000000000..ab342a1f9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java @@ -0,0 +1,388 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; +import static org.apache.datasketches.theta2.PreambleUtil.insertP; +import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; + +/** + * @author Lee Rhodes + */ +final class CompactOperations { + + private CompactOperations() {} + + static CompactSketch componentsToCompact( //No error checking + final long thetaLong, + final int curCount, + final short seedHash, + final boolean srcEmpty, + final boolean srcCompact, + final boolean srcOrdered, + final boolean dstOrdered, + final MemorySegment dstWSeg, + final long[] hashArr) //may not be compacted, ordered or unordered, may be null + { + final boolean direct = dstWSeg != null; + final boolean empty = srcEmpty || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); + final boolean single = (curCount == 1) && (thetaLong == Long.MAX_VALUE); + final long[] hashArrOut; + if (!srcCompact) { + hashArrOut = CompactOperations.compactCache(hashArr, curCount, thetaLong, dstOrdered); + } else { + hashArrOut = hashArr; + } + if (!srcOrdered && dstOrdered && !empty && !single) { + Arrays.sort(hashArrOut); + } + //Note: for empty or single we always output the ordered form. + final boolean dstOrderedOut = (empty || single) ? true : dstOrdered; + if (direct) { + final int preLongs = computeCompactPreLongs(empty, curCount, thetaLong); + int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK; //always LE + flags |= empty ? EMPTY_FLAG_MASK : 0; + flags |= dstOrderedOut ? ORDERED_FLAG_MASK : 0; + flags |= single ? SINGLEITEM_FLAG_MASK : 0; + + final MemorySegment seg = + loadCompactMemory(hashArrOut, seedHash, curCount, thetaLong, dstWSeg, (byte)flags, preLongs); + return new DirectCompactSketch(seg); + + } else { //Heap + if (empty) { + return EmptyCompactSketch.getInstance(); + } + if (single) { + return new SingleItemSketch(hashArrOut[0], seedHash); + } + return new HeapCompactSketch(hashArrOut, empty, seedHash, curCount, thetaLong, dstOrderedOut); + } + } + + /** + * Heapify or convert a source Theta Sketch MemorySegment image into a heap or target MemorySegment CompactSketch. + * This assumes hashSeed is OK; serVer = 3. + * @param srcSeg the given input source MemorySegment image. Can be Read Only. + * @param dstOrdered the desired ordering of the resulting CompactSketch + * @param dstWSeg Used for the target CompactSketch if it is MemorySegment-based. Must be Writable. + * @return a CompactSketch of the correct form. + */ + @SuppressWarnings("unused") + static CompactSketch memoryToCompact( + final MemorySegment srcSeg, + final boolean dstOrdered, + final MemorySegment dstWSeg) + { + //extract Pre0 fields and Flags from srcMem + final int srcPreLongs = extractPreLongs(srcSeg); + final int srcSerVer = extractSerVer(srcSeg); //not used + final int srcFamId = extractFamilyID(srcSeg); + final int srcLgArrLongs = extractLgArrLongs(srcSeg); + final int srcFlags = extractFlags(srcSeg); + final short srcSeedHash = (short) extractSeedHash(srcSeg); + + //srcFlags + final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0; + final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0; + final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0; + final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0; + final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0; + + final boolean single = srcSingleFlag + || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags); + + //extract pre1 and pre2 fields + final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcSeg) : 0; + final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcSeg) : Long.MAX_VALUE; + + //do some basic checks ... + if (srcEmptyFlag) { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); } + if (single) { assert (curCount == 1) && (thetaLong == Long.MAX_VALUE); } + checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag); + + //dispatch empty and single cases + //Note: for empty and single we always output the ordered form. + final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered; + if (srcEmptyFlag) { + if (dstWSeg != null) { + MemorySegment.copy(EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); + return new DirectCompactSketch(dstWSeg); + } else { + return EmptyCompactSketch.getInstance(); + } + } + if (single) { + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, srcPreLongs << 3); + final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash); + if (dstWSeg != null) { + MemorySegment.copy(sis.toByteArray(), 0, dstWSeg, JAVA_BYTE, 0, 16); + return new DirectCompactSketch(dstWSeg); + } else { //heap + return sis; + } + } + + //extract hashArr > 1 + final long[] hashArr; + if (srcCompactFlag) { + hashArr = new long[curCount]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, hashArr, 0, curCount); + } else { //update sketch, thus hashTable form + final int srcCacheLen = 1 << srcLgArrLongs; + final long[] tempHashArr = new long[srcCacheLen]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, tempHashArr, 0, srcCacheLen); + hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut); + } + + final int flagsOut = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK + | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0); + + //load the destination. + if (dstWSeg != null) { + final MemorySegment tgtSeg = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstWSeg, + (byte)flagsOut, srcPreLongs); + return new DirectCompactSketch(tgtSeg); + } else { //heap + return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong, + dstOrderedOut); + } + } + + private static final void checkFamilyAndFlags( + final int srcFamId, + final boolean srcCompactFlag, + final boolean srcReadOnlyFlag) { + final Family srcFamily = Family.idToFamily(srcFamId); + if (srcCompactFlag) { + if ((srcFamily == Family.COMPACT) && srcReadOnlyFlag) { return; } + } else { + if (srcFamily == Family.ALPHA) { return; } + if (srcFamily == Family.QUICKSELECT) { return; } + } + throw new SketchesArgumentException( + "Possible Corruption: Family does not match flags: Family: " + + srcFamily.toString() + + ", Compact Flag: " + srcCompactFlag + + ", ReadOnly Flag: " + srcReadOnlyFlag); + } + + //All arguments must be valid and correct including flags. + // Used as helper to create byte arrays as well as loading Memory for direct compact sketches + //Input must be writable, return can be Read Only + static final MemorySegment loadCompactMemory( + final long[] compactHashArr, + final short seedHash, + final int curCount, + final long thetaLong, + final MemorySegment dstWSeg, + final byte flags, + final int preLongs) + { + assert (dstWSeg != null) && (compactHashArr != null); + final int outLongs = preLongs + curCount; + final int outBytes = outLongs << 3; + final int dstBytes = (int) dstWSeg.byteSize(); + if (outBytes > dstBytes) { + throw new SketchesArgumentException("Insufficient Memory: " + dstBytes + + ", Need: " + outBytes); + } + final byte famID = (byte) Family.COMPACT.getID(); + + //Caution: The following loads directly into Memory without creating a heap byte[] first, + // which would act as a pre-clearing, initialization mechanism. So it is important to make sure + // that all fields are initialized, even those that are not used by the CompactSketch. + // Otherwise, uninitialized fields could be filled with off-heap garbage, which could cause + // other problems downstream if those fields are not filtered out first. + // As written below, all fields are initialized avoiding an extra copy. + + //The first 8 bytes (pre0) + insertPreLongs(dstWSeg, preLongs); //RF not used = 0 + insertSerVer(dstWSeg, SER_VER); + insertFamilyID(dstWSeg, famID); + //The following initializes the lgNomLongs and lgArrLongs to 0. + //They are not used in CompactSketches. + dstWSeg.set(JAVA_SHORT_UNALIGNED, LG_NOM_LONGS_BYTE, (short)0); + insertFlags(dstWSeg, flags); + insertSeedHash(dstWSeg, seedHash); + + if ((preLongs == 1) && (curCount == 1)) { //singleItem, theta = 1.0 + dstWSeg.set(JAVA_LONG_UNALIGNED, 8, compactHashArr[0]); + return dstWSeg; + } + if (preLongs > 1) { + insertCurCount(dstWSeg, curCount); + insertP(dstWSeg, (float) 1.0); + } + if (preLongs > 2) { + insertThetaLong(dstWSeg, thetaLong); + } + if (curCount > 0) { //theta could be < 1.0. + //dstWSeg.putLongArray(preLongs << 3, compactHashArr, 0, curCount); + MemorySegment.copy(compactHashArr, 0, dstWSeg, JAVA_LONG_UNALIGNED, preLongs << 3, curCount); + } + return dstWSeg; //if prelongs == 3 & curCount == 0, theta could be < 1.0. This can be RO + } + + /** + * Copies then compacts, cleans, and may sort the resulting array. + * The source cache can be a hash table with interstitial zeros or + * "dirty" values, which are hash values greater than theta. + * These can be generated by the Alpha sketch. + * @param srcCache anything + * @param curCount must be correct + * @param thetaLong The correct + * thetaLong. + * @param dstOrdered true if output array must be sorted + * @return the compacted array. + */ + static final long[] compactCache(final long[] srcCache, final int curCount, + final long thetaLong, final boolean dstOrdered) { + if (curCount == 0) { + return new long[0]; + } + final long[] cacheOut = new long[curCount]; + final int len = srcCache.length; + int j = 0; + for (int i = 0; i < len; i++) { //scan the full srcCache + final long v = srcCache[i]; + if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values + cacheOut[j++] = v; + } + if (j < curCount) { + throw new SketchesStateException( + "Possible Corruption: curCount parameter is incorrect."); + } + if (dstOrdered && (curCount > 1)) { + Arrays.sort(cacheOut); + } + return cacheOut; + } + + /* + * The truth table for empty, curCount and theta when compacting is as follows: + *
+   * Num Theta CurCount Empty State    Name, Comments
+   *  0    1.0     0      T     OK     EMPTY: The Normal Empty State
+   *  1    1.0     0      F   Internal This can occur internally as the result of an intersection of two exact,
+   *                                   disjoint sets, or AnotB of two exact, identical sets. There is no probability
+   *                                   distribution, so this is converted internally to EMPTY {1.0, 0, T}.
+   *                                   This is handled in SetOperation.createCompactSketch().
+   *  2    1.0    !0      T   Error    Empty=T and curCount !0 should never coexist.
+   *                                   This is checked in all compacting operations.
+   *  3    1.0    !0      F     OK     EXACT: This corresponds to a sketch in exact mode
+   *  4   <1.0     0      T   Internal This can be an initial UpdateSketch state if p < 1.0,
+   *                                   so change theta to 1.0. Return {Th = 1.0, 0, T}.
+   *                                   This is handled in UpdateSketch.compact() and toByteArray().
+   *  5   <1.0     0      F     OK     This can result from set operations
+   *  6   <1.0    !0      T   Error    Empty=T and curCount !0 should never coexist.
+   *                                   This is checked in all compacting operations.
+   *  7   <1.0    !0      F     OK     This corresponds to a sketch in estimation mode
+   * 
+ * #4 is handled by correctThetaOnCompat(boolean, int) (below). + * #2 & #6 handled by checkIllegalCurCountAndEmpty(boolean, int) + */ + + /** + * This corrects a temporary anomalous condition where compact() is called on an UpdateSketch + * that was initialized with p < 1.0 and update() was never called. In this case Theta < 1.0, + * curCount = 0, and empty = true. The correction is to change Theta to 1.0, which makes the + * returning sketch empty. This should only be used in the compaction or serialization of an + * UpdateSketch. + * @param empty the given empty state + * @param curCount the given curCount + * @param thetaLong the given thetaLong + * @return thetaLong + */ + static final long correctThetaOnCompact(final boolean empty, final int curCount, + final long thetaLong) { //handles #4 above + return (empty && (curCount == 0)) ? Long.MAX_VALUE : thetaLong; + } + + /** + * This checks for the illegal condition where curCount > 0 and the state of + * empty = true. This check can be used anywhere a sketch is returned or a sketch is created + * from complete arguments. + * @param empty the given empty state + * @param curCount the given current count + */ //This handles #2 and #6 above + static final void checkIllegalCurCountAndEmpty(final boolean empty, final int curCount) { + if (empty && (curCount != 0)) { //this handles #2 and #6 above + throw new SketchesStateException("Illegal State: Empty=true and Current Count != 0."); + } + } + + /** + * This compute number of preamble longs for a compact sketch based on empty, + * curCount and thetaLong. + * This also accommodates for EmptyCompactSketch and SingleItemSketch. + * @param empty The given empty state + * @param curCount The given current count (retained entries) + * @param thetaLong the current thetaLong + * @return the number of preamble longs + */ + static final int computeCompactPreLongs(final boolean empty, final int curCount, + final long thetaLong) { + return (thetaLong < Long.MAX_VALUE) ? 3 : empty ? 1 : (curCount > 1) ? 2 : 1; + } + + /** + * This checks for the singleItem Compact Sketch. + * @param empty the given empty state + * @param curCount the given curCount + * @param thetaLong the given thetaLong + * @return true if notEmpty, curCount = 1 and theta = 1.0; + */ + static final boolean isSingleItem(final boolean empty, final int curCount, + final long thetaLong) { + return !empty && (curCount == 1) && (thetaLong == Long.MAX_VALUE); + } +} + diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java new file mode 100644 index 000000000..0498eed34 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java @@ -0,0 +1,478 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; +import static org.apache.datasketches.common.Family.idToFamily; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT; +import static org.apache.datasketches.theta2.PreambleUtil.extractEntryBitsV4; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractNumEntriesBytesV4; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLongV4; +import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; +import static org.apache.datasketches.theta2.SingleItemSketch.otherCheckForSingleItem; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The parent class of all the CompactSketches. CompactSketches are never created directly. + * They are created as a result of the compact() method of an UpdateSketch, a result of a + * getResult() of a SetOperation, or from a heapify method. + * + *

A CompactSketch is the simplest form of a Theta Sketch. It consists of a compact list + * (i.e., no intervening spaces) of hash values, which may be ordered or not, a value for theta + * and a seed hash. A CompactSketch is immutable (read-only), + * and the space required when stored is only the space required for the hash values and 8 to 24 + * bytes of preamble. An empty CompactSketch consumes only 8 bytes.

+ * + * @author Lee Rhodes + */ +public abstract class CompactSketch extends Sketch { + + /** + * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch. + * + *

The resulting sketch will not retain any link to the source MemorySegment and all of its data will be + * copied to the heap CompactSketch.

+ * + *

This method assumes that the sketch image was created with the correct hash seed, so it is not checked. + * The resulting on-heap CompactSketch will be given the seedHash derived from the given sketch image. + * However, Serial Version 1 sketch images do not have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.

+ * + * @param srcSeg an image of a CompactSketch. + * @return a CompactSketch on the heap. + */ + public static CompactSketch heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + } + + /** + * Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch. + * + *

The resulting sketch will not retain any link to the source Memory and all of its data will be + * copied to the heap CompactSketch.

+ * + *

This method checks if the given expectedSeed was used to create the source Memory image. + * However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

+ * + * @param srcSeg an image of a CompactSketch that was created using the given expectedSeed. + * See Memory. + * @param expectedSeed the seed used to validate the given Memory image. + * See Update Hash Seed. + * @return a CompactSketch on the heap. + */ + public static CompactSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + return heapify(srcSeg, expectedSeed, true); + } + + private static CompactSketch heapify(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int serVer = extractSerVer(srcSeg); + final int familyID = extractFamilyID(srcSeg); + final Family family = idToFamily(familyID); + if (family != Family.COMPACT) { + throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); + } + if (serVer == 4) { + return heapifyV4(srcSeg, seed, enforceSeed); + } + if (serVer == 3) { + final int flags = extractFlags(srcSeg); + final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0; + final boolean empty = (flags & EMPTY_FLAG_MASK) != 0; + if (enforceSeed && !empty) { PreambleUtil.checkMemorySeedHash(srcSeg, seed); } + return CompactOperations.memoryToCompact(srcSeg, srcOrdered, null); + } + //not SerVer 3, assume compact stored form + final short seedHash = ThetaUtil.computeSeedHash(seed); + if (serVer == 1) { + return ForwardCompatibility.heapify1to3(srcSeg, seedHash); + } + if (serVer == 2) { + return ForwardCompatibility.heapify2to3(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); + } + throw new SketchesArgumentException("Unknown Serialization Version: " + serVer); + } + + /** + * Wrap takes the CompactSketch image in given MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + *

Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a heapify operation. + * These early versions were never designed to "wrap".

+ * + *

Wrapping any subclass of this class that is empty or contains only a single item will + * result in heapified forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.

+ * + *

This method assumes that the sketch image was created with the correct hash seed, so it is not checked. + * However, Serial Version 1 sketch images do not have a seedHash field, + * so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.

+ * + * @param srcSeg an image of a Sketch. + * @return a CompactSketch backed by the given MemorySegment except as above. + */ + public static CompactSketch wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + } + + /** + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + *

Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a heapify operation. + * These early versions were never designed to "wrap".

+ * + *

Wrapping any subclass of this class that is empty or contains only a single item will + * result in heapified forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.

+ * + *

This method checks if the given expectedSeed was used to create the source MemorySegment image. + * However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

+ * + * @param srcSeg an image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return a CompactSketch backed by the given MemorySegment except as above. + */ + public static CompactSketch wrap(final MemorySegment srcSeg, final long expectedSeed) { + return wrap(srcSeg, expectedSeed, true); + } + + private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int serVer = extractSerVer(srcSeg); + final int familyID = extractFamilyID(srcSeg); + final Family family = Family.idToFamily(familyID); + if (family != Family.COMPACT) { + throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); + } + final short seedHash = ThetaUtil.computeSeedHash(seed); + + if (serVer == 4) { + return DirectCompactCompressedSketch.wrapInstance(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); + } + else if (serVer == 3) { + if (PreambleUtil.isEmptyFlag(srcSeg)) { + return EmptyCompactSketch.getHeapInstance(srcSeg); + } + if (otherCheckForSingleItem(srcSeg)) { + return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); + } + //not empty & not singleItem + final int flags = extractFlags(srcSeg); + final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; + if (!compactFlag) { + throw new SketchesArgumentException( + "Corrupted: COMPACT family sketch image must have compact flag set"); + } + final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; + if (!readOnly) { + throw new SketchesArgumentException( + "Corrupted: COMPACT family sketch image must have Read-Only flag set"); + } + return DirectCompactSketch.wrapInstance(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); + } //end of serVer 3 + else if (serVer == 1) { + return ForwardCompatibility.heapify1to3(srcSeg, seedHash); + } + else if (serVer == 2) { + return ForwardCompatibility.heapify2to3(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); + } + throw new SketchesArgumentException( + "Corrupted: Serialization Version " + serVer + " not recognized."); + } + + /** + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + *

Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a heapify operation. + * These early versions were never designed to "wrap".

+ * + *

Wrapping any subclass of this class that is empty or contains only a single item will + * result in heapified forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.

+ * + *

This method checks if the DEFAULT_UPDATE_SEED was used to create the source MemorySegment image. + * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.

+ * + * @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED. + * + * @return a CompactSketch backed by the given MemorySegment except as above. + */ + public static CompactSketch wrap(final byte[] bytes) { + return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false); + } + + /** + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + *

Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a heapify operation. + * These early versions were never designed to "wrap".

+ * + *

Wrapping any subclass of this class that is empty or contains only a single item will + * result in heapified forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.

+ * + *

This method checks if the given expectedSeed was used to create the source MemorySegment image. + * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, + * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

+ * + * @param bytes a byte array image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return a CompactSketch backed by the given MemorySegment except as above. + */ + public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) { + return wrap(bytes, expectedSeed, true); + } + + private static CompactSketch wrap(final byte[] bytes, final long seed, final boolean enforceSeed) { + final int serVer = bytes[PreambleUtil.SER_VER_BYTE]; + final int familyId = bytes[PreambleUtil.FAMILY_BYTE]; + final Family family = Family.idToFamily(familyId); + if (family != Family.COMPACT) { + throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); + } + final short seedHash = ThetaUtil.computeSeedHash(seed); + if (serVer == 4) { + return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash); + } else if (serVer == 3) { + final int flags = bytes[FLAGS_BYTE]; + if ((flags & EMPTY_FLAG_MASK) > 0) { + return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes)); + } + final int preLongs = bytes[PREAMBLE_LONGS_BYTE]; + if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) { + return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); + } + //not empty & not singleItem + final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; + if (!compactFlag) { + throw new SketchesArgumentException( + "Corrupted: COMPACT family sketch image must have compact flag set"); + } + final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; + if (!readOnly) { + throw new SketchesArgumentException( + "Corrupted: COMPACT family sketch image must have Read-Only flag set"); + } + return WrappedCompactSketch.wrapInstance(bytes, + enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); + } else if (serVer == 1) { + return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash); + } else if (serVer == 2) { + return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes), + enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); + } + throw new SketchesArgumentException( + "Corrupted: Serialization Version " + serVer + " not recognized."); + } + + //Sketch Overrides + + @Override + public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstMem); + + @Override + public int getCompactBytes() { + return getCurrentBytes(); + } + + @Override + int getCurrentDataLongs() { + return getRetainedEntries(true); + } + + @Override + public Family getFamily() { + return Family.COMPACT; + } + + @Override + public boolean isCompact() { + return true; + } + + @Override + public double getEstimate() { + return Sketch.estimate(getThetaLong(), getRetainedEntries()); + } + + /** + * gets the sketch as a compressed byte array + * @return the sketch as a compressed byte array + */ + public byte[] toByteArrayCompressed() { + if (!isOrdered() || getRetainedEntries() == 0 || (getRetainedEntries() == 1 && !isEstimationMode())) { + return toByteArray(); + } + return toByteArrayV4(); + } + + private int computeMinLeadingZeros() { + // compression is based on leading zeros in deltas between ordered hash values + // assumes ordered sketch + long previous = 0; + long ored = 0; + final HashIterator it = iterator(); + while (it.next()) { + final long delta = it.get() - previous; + ored |= delta; + previous = it.get(); + } + return Long.numberOfLeadingZeros(ored); + } + + private byte[] toByteArrayV4() { + final int preambleLongs = isEstimationMode() ? 2 : 1; + final int entryBits = 64 - computeMinLeadingZeros(); + final int compressedBits = entryBits * getRetainedEntries(); + + // store num_entries as whole bytes since whole-byte blocks will follow (most probably) + final int numEntriesBytes = wholeBytesToHoldBits(32 - Integer.numberOfLeadingZeros(getRetainedEntries())); + + final int sizeBytes = preambleLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(compressedBits); + final byte[] bytes = new byte[sizeBytes]; + final MemorySegment wseg = MemorySegment.ofArray(bytes); + int offsetBytes = 0; + wseg.set(JAVA_BYTE, offsetBytes++, (byte) preambleLongs); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) 4); // to do: add constant + wseg.set(JAVA_BYTE, offsetBytes++, (byte) Family.COMPACT.getID()); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) entryBits); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) numEntriesBytes); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK)); + wseg.set(JAVA_SHORT_UNALIGNED, offsetBytes, getSeedHash()); + offsetBytes += Short.BYTES; + if (isEstimationMode()) { + wseg.set(JAVA_LONG_UNALIGNED, offsetBytes, getThetaLong()); + offsetBytes += Long.BYTES; + } + int numEntries = getRetainedEntries(); + for (int i = 0; i < numEntriesBytes; i++) { + wseg.set(JAVA_BYTE, offsetBytes++, (byte) (numEntries & 0xff)); + numEntries >>>= 8; + } + long previous = 0; + final long[] deltas = new long[8]; + final HashIterator it = iterator(); + int i; + for (i = 0; i + 7 < getRetainedEntries(); i += 8) { + for (int j = 0; j < 8; j++) { + it.next(); + deltas[j] = it.get() - previous; + previous = it.get(); + } + BitPacking.packBitsBlock8(deltas, 0, bytes, offsetBytes, entryBits); + offsetBytes += entryBits; + } + int offsetBits = 0; + for (; i < getRetainedEntries(); i++) { + it.next(); + final long delta = it.get() - previous; + previous = it.get(); + BitPacking.packBits(delta, entryBits, bytes, offsetBytes, offsetBits); + offsetBytes += (offsetBits + entryBits) >>> 3; + offsetBits = (offsetBits + entryBits) & 7; + } + return bytes; + } + + private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int preLongs = extractPreLongs(srcSeg); + final int entryBits = extractEntryBitsV4(srcSeg); + final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg); + final short seedHash = (short) extractSeedHash(srcSeg); + if (enforceSeed) { PreambleUtil.checkMemorySeedHash(srcSeg, seed); } + int offsetBytes = 8; + long theta = Long.MAX_VALUE; + if (preLongs > 1) { + theta = extractThetaLongV4(srcSeg); + offsetBytes += Long.BYTES; + } + int numEntries = 0; + for (int i = 0; i < numEntriesBytes; i++) { + numEntries |= Byte.toUnsignedInt(srcSeg.get(JAVA_BYTE, offsetBytes++)) << (i << 3); + } + final long[] entries = new long[numEntries]; + final byte[] bytes = new byte[entryBits]; // temporary buffer for unpacking + int i; + for (i = 0; i + 7 < numEntries; i += 8) { + MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, entryBits); + BitPacking.unpackBitsBlock8(entries, i, bytes, 0, entryBits); + offsetBytes += entryBits; + } + if (i < numEntries) { + MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits)); + int offsetBits = 0; + offsetBytes = 0; + for (; i < numEntries; i++) { + BitPacking.unpackBits(entries, i, entryBits, bytes, offsetBytes, offsetBits); + offsetBytes += (offsetBits + entryBits) >>> 3; + offsetBits = (offsetBits + entryBits) & 7; + } + } + // undo deltas + long previous = 0; + for (i = 0; i < numEntries; i++) { + entries[i] += previous; + previous = entries[i]; + } + return new HeapCompactSketch(entries, false, seedHash, numEntries, theta, true); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java new file mode 100644 index 000000000..2d529c4ce --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Background propagation thread. Propagates a given sketch or a hash value from local threads + * buffers into the shared sketch which stores the most up-to-date estimation of number of unique + * items. This propagation is done at the background by dedicated threads, which allows + * application threads to continue updating their local buffer. + * + * @author eshcar + */ +class ConcurrentBackgroundThetaPropagation implements Runnable { + + // Shared sketch to absorb the data + private final ConcurrentSharedThetaSketch sharedThetaSketch; + + // Propagation flag of local buffer that is being processed. + // It is the synchronization primitive to coordinate the work of the propagation with the + // local buffer. Updated when the propagation completes. + private final AtomicBoolean localPropagationInProgress; + + // Sketch to be propagated to shared sketch. Can be null if only a single hash is propagated + private final Sketch sketchIn; + + // Hash of the datum to be propagated to shared sketch. Can be ConcurrentSharedThetaSketch.NOT_SINGLE_HASH + // if the data is propagated through a sketch. + private final long singleHash; + + // The propagation epoch. The data can be propagated only within the context of this epoch. + // The data should not be propagated if this epoch is not equal to the + // shared sketch epoch. + private final long epoch; + + ConcurrentBackgroundThetaPropagation(final ConcurrentSharedThetaSketch sharedThetaSketch, + final AtomicBoolean localPropagationInProgress, final Sketch sketchIn, final long singleHash, + final long epoch) { + this.sharedThetaSketch = sharedThetaSketch; + this.localPropagationInProgress = localPropagationInProgress; + this.sketchIn = sketchIn; + this.singleHash = singleHash; + this.epoch = epoch; + } + + /** + * Propagation protocol: + * 1) validate propagation is executed at the context of the right epoch, otherwise abort + * 2) handle propagation: either of a single hash or of a sketch + * 3) complete propagation: ping local buffer + */ + @Override + public void run() { + // 1) validate propagation is executed at the context of the right epoch, otherwise abort + if (!sharedThetaSketch.validateEpoch(epoch)) { + // invalid epoch - should not propagate + sharedThetaSketch.endPropagation(null, false); + return; + } + + // 2) handle propagation: either of a single hash or of a sketch + if (singleHash != ConcurrentSharedThetaSketch.NOT_SINGLE_HASH) { + sharedThetaSketch.propagate(singleHash); + } else if (sketchIn != null) { + final long volTheta = sharedThetaSketch.getVolatileTheta(); + assert volTheta <= sketchIn.getThetaLong() : + "volTheta = " + volTheta + ", bufTheta = " + sketchIn.getThetaLong(); + + // propagate values from input sketch one by one + final long[] cacheIn = sketchIn.getCache(); + + if (sketchIn.isOrdered()) { //Ordered compact, Use early stop + for (final long hashIn : cacheIn) { + if (hashIn >= volTheta) { + break; //early stop + } + sharedThetaSketch.propagate(hashIn); + } + } else { //not ordered, also may have zeros (gaps) in the array. + for (final long hashIn : cacheIn) { + if (hashIn > 0) { + sharedThetaSketch.propagate(hashIn); + } + } + } + } + + // 3) complete propagation: ping local buffer + sharedThetaSketch.endPropagation(localPropagationInProgress, false); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java new file mode 100644 index 000000000..af5917123 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; + +import java.lang.foreign.MemorySegment; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SuppressFBWarnings; + +/** + * A concurrent shared sketch that is based on DirectQuickSelectSketch. + * It reflects all data processed by a single or multiple update threads, and can serve queries at + * any time. + * Background propagation threads are used to propagate data from thread local buffers into this + * sketch which stores the most up-to-date estimation of number of unique items. + * + * @author eshcar + * @author Lee Rhodes + */ +final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch + implements ConcurrentSharedThetaSketch { + + // The propagation thread + private ExecutorService executorService_; + + // A flag to coordinate between several eager propagation threads + private final AtomicBoolean sharedPropagationInProgress_; + + // Theta value of concurrent sketch + private volatile long volatileThetaLong_; + + // A snapshot of the estimated number of unique entries + private volatile double volatileEstimate_; + + // Num of retained entries in which the sketch toggles from sync (exact) mode to async + // propagation mode + private final long exactLimit_; + + // An epoch defines an interval between two resets. A propagation invoked at epoch i cannot + // affect the sketch at epoch j > i. + private volatile long epoch_; + + /** + * Construct a new sketch instance and initialize the given Memory as its backing store. + * + * @param lgNomLongs See lgNomLongs. + * @param seed See Update Hash Seed. + * @param maxConcurrencyError the max error value including error induced by concurrency. + * @param dstSeg the given MemorySegment object destination. It cannot be null. + */ + ConcurrentDirectQuickSelectSketch(final int lgNomLongs, final long seed, + final double maxConcurrencyError, final MemorySegment dstSeg) { + super(lgNomLongs, seed, 1.0F, //p + ResizeFactor.X1, //rf, + dstSeg, false); //unionGadget + + volatileThetaLong_ = Long.MAX_VALUE; + volatileEstimate_ = 0; + exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), + maxConcurrencyError); + sharedPropagationInProgress_ = new AtomicBoolean(false); + epoch_ = 0; + initBgPropagationService(); + } + + ConcurrentDirectQuickSelectSketch(final UpdateSketch sketch, final long seed, + final double maxConcurrencyError, final MemorySegment dstSeg) { + super(sketch.getLgNomLongs(), seed, 1.0F, //p + ResizeFactor.X1, //rf, + dstSeg, + false); //unionGadget + + exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), + maxConcurrencyError); + sharedPropagationInProgress_ = new AtomicBoolean(false); + epoch_ = 0; + initBgPropagationService(); + for (final long hashIn : sketch.getCache()) { + propagate(hashIn); + } + wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, sketch.getThetaLong()); + updateVolatileTheta(); + updateEstimationSnapshot(); + } + + //Sketch overrides + + @Override + public double getEstimate() { + return volatileEstimate_; + } + + @Override + public boolean isEstimationMode() { + return (getRetainedEntries(false) > exactLimit_) || super.isEstimationMode(); + } + + @Override + public byte[] toByteArray() { + while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free + final byte[] res = super.toByteArray(); + sharedPropagationInProgress_.set(false); + return res; + } + + //UpdateSketch overrides + + @Override + public UpdateSketch rebuild() { + super.rebuild(); + updateEstimationSnapshot(); + return this; + } + + /** + * {@inheritDoc} + * Takes care of mutual exclusion with propagation thread. + */ + @Override + public void reset() { + advanceEpoch(); + super.reset(); + volatileThetaLong_ = Long.MAX_VALUE; + volatileEstimate_ = 0; + } + + @Override + UpdateReturnState hashUpdate(final long hash) { + final String msg = "No update method should be called directly to a shared theta sketch." + + " Updating the shared sketch is only permitted through propagation from local sketches."; + throw new UnsupportedOperationException(msg); + } + + //ConcurrentSharedThetaSketch declarations + + @Override + public long getExactLimit() { + return exactLimit_; + } + + @Override + public boolean startEagerPropagation() { + while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free + return (!isEstimationMode());// no eager propagation is allowed in estimation mode + } + + @Override + public void endPropagation(final AtomicBoolean localPropagationInProgress, final boolean isEager) { + //update volatile theta, uniques estimate and propagation flag + updateVolatileTheta(); + updateEstimationSnapshot(); + if (isEager) { + sharedPropagationInProgress_.set(false); + } + if (localPropagationInProgress != null) { + localPropagationInProgress.set(false); //clear local propagation flag + } + } + + @Override + public long getVolatileTheta() { + return volatileThetaLong_; + } + + @Override + public void awaitBgPropagationTermination() { + try { + executorService_.shutdown(); + while (!executorService_.awaitTermination(1, TimeUnit.MILLISECONDS)) { + Thread.sleep(1); + } + } catch (final InterruptedException e) { + e.printStackTrace(); + } + } + + @Override + public final void initBgPropagationService() { + executorService_ = ConcurrentPropagationService.getExecutorService(Thread.currentThread().threadId()); + } + + @Override + public boolean propagate(final AtomicBoolean localPropagationInProgress, + final Sketch sketchIn, final long singleHash) { + final long epoch = epoch_; + if ((singleHash != NOT_SINGLE_HASH) // namely, is a single hash and + && (getRetainedEntries(false) < exactLimit_)) { // a small sketch then propagate myself (blocking) + if (!startEagerPropagation()) { + endPropagation(localPropagationInProgress, true); + return false; + } + if (!validateEpoch(epoch)) { + endPropagation(null, true); // do not change local flag + return true; + } + propagate(singleHash); + endPropagation(localPropagationInProgress, true); + return true; + } + // otherwise, be nonblocking, let background thread do the work + final ConcurrentBackgroundThetaPropagation job = new ConcurrentBackgroundThetaPropagation( + this, localPropagationInProgress, sketchIn, singleHash, epoch); + executorService_.execute(job); + return true; + } + + @Override + public void propagate(final long singleHash) { + super.hashUpdate(singleHash); + } + + @Override + public void updateEstimationSnapshot() { + volatileEstimate_ = super.getEstimate(); + } + + @Override + public void updateVolatileTheta() { + volatileThetaLong_ = getThetaLong(); + } + + @Override + public boolean validateEpoch(final long epoch) { + return epoch_ == epoch; + } + + //Restricted + + /** + * Advances the epoch while there is no background propagation + * This ensures a propagation invoked before the reset cannot affect the sketch after the reset + * is completed. Ignore VO_VOLATILE_INCREMENT findbugs warning, it is False Positive. + */ + @SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "Likely False Positive, Fix Later") + private void advanceEpoch() { + awaitBgPropagationTermination(); + startEagerPropagation(); + ConcurrentPropagationService.resetExecutorService(Thread.currentThread().threadId()); + //no inspection NonAtomicOperationOnVolatileField + // this increment of a volatile field is done within the scope of the propagation + // synchronization and hence is done by a single thread. + epoch_++; + endPropagation(null, true); + initBgPropagationService(); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java new file mode 100644 index 000000000..56e254b51 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SuppressFBWarnings; + +/** + * A concurrent shared sketch that is based on HeapQuickSelectSketch. + * It reflects all data processed by a single or multiple update threads, and can serve queries at + * any time. + * Background propagation threads are used to propagate data from thread local buffers into this + * sketch which stores the most up-to-date estimation of number of unique items. + * + * @author eshcar + * @author Lee Rhodes + */ +final class ConcurrentHeapQuickSelectSketch extends HeapQuickSelectSketch + implements ConcurrentSharedThetaSketch { + + // The propagation thread + private volatile ExecutorService executorService_; + + //A flag to coordinate between several eager propagation threads + private final AtomicBoolean sharedPropagationInProgress_; + + // Theta value of concurrent sketch + private volatile long volatileThetaLong_; + + // A snapshot of the estimated number of unique entries + private volatile double volatileEstimate_; + + // Num of retained entries in which the sketch toggles from sync (exact) mode to async + // propagation mode + private final long exactLimit_; + + // An epoch defines an interval between two resets. A propagation invoked at epoch i cannot + // affect the sketch at epoch j > i. + private volatile long epoch_; + + /** + * Construct a new sketch instance on the java heap. + * + * @param lgNomLongs See lgNomLongs. + * @param seed See seed + * @param maxConcurrencyError the max error value including error induced by concurrency + * + */ + ConcurrentHeapQuickSelectSketch(final int lgNomLongs, final long seed, + final double maxConcurrencyError) { + super(lgNomLongs, seed, 1.0F, //p + ResizeFactor.X1, //rf, + false); //unionGadget + + volatileThetaLong_ = Long.MAX_VALUE; + volatileEstimate_ = 0; + exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), + maxConcurrencyError); + sharedPropagationInProgress_ = new AtomicBoolean(false); + epoch_ = 0; + initBgPropagationService(); + } + + ConcurrentHeapQuickSelectSketch(final UpdateSketch sketch, final long seed, + final double maxConcurrencyError) { + super(sketch.getLgNomLongs(), seed, 1.0F, //p + ResizeFactor.X1, //rf, + false); //unionGadget + + exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), + maxConcurrencyError); + sharedPropagationInProgress_ = new AtomicBoolean(false); + epoch_ = 0; + initBgPropagationService(); + for (final long hashIn : sketch.getCache()) { + propagate(hashIn); + } + thetaLong_ = sketch.getThetaLong(); + updateVolatileTheta(); + updateEstimationSnapshot(); + } + + //Sketch overrides + + @Override + public double getEstimate() { + return volatileEstimate_; + } + + @Override + public boolean isEstimationMode() { + return (getRetainedEntries(false) > exactLimit_) || super.isEstimationMode(); + } + + @Override + public byte[] toByteArray() { + while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free + final byte[] res = super.toByteArray(); + sharedPropagationInProgress_.set(false); + return res; + } + + //UpdateSketch overrides + + @Override + public UpdateSketch rebuild() { + super.rebuild(); + updateEstimationSnapshot(); + return this; + } + + /** + * {@inheritDoc} + * Takes care of mutual exclusion with propagation thread. + */ + @Override + public void reset() { + advanceEpoch(); + super.reset(); + volatileThetaLong_ = Long.MAX_VALUE; + volatileEstimate_ = 0; + } + + @Override + UpdateReturnState hashUpdate(final long hash) { + final String msg = "No update method should be called directly to a shared theta sketch." + + " Updating the shared sketch is only permitted through propagation from local sketches."; + throw new UnsupportedOperationException(msg); + } + + //ConcurrentSharedThetaSketch declarations + + @Override + public long getExactLimit() { + return exactLimit_; + } + + @Override + public boolean startEagerPropagation() { + while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free + return (!isEstimationMode());// no eager propagation is allowed in estimation mode + } + + @Override + public void endPropagation(final AtomicBoolean localPropagationInProgress, final boolean isEager) { + //update volatile theta, uniques estimate and propagation flag + updateVolatileTheta(); + updateEstimationSnapshot(); + if (isEager) { + sharedPropagationInProgress_.set(false); + } + if (localPropagationInProgress != null) { + localPropagationInProgress.set(false); //clear local propagation flag + } + } + + @Override + public long getVolatileTheta() { + return volatileThetaLong_; + } + + @Override + public void awaitBgPropagationTermination() { + try { + executorService_.shutdown(); + while (!executorService_.awaitTermination(1, TimeUnit.MILLISECONDS)) { + Thread.sleep(1); + } + } catch (final InterruptedException e) { + e.printStackTrace(); + } + } + + @Override + public void initBgPropagationService() { + executorService_ = ConcurrentPropagationService.getExecutorService(Thread.currentThread().threadId()); + } + + @Override + public boolean propagate(final AtomicBoolean localPropagationInProgress, + final Sketch sketchIn, final long singleHash) { + final long epoch = epoch_; + if ((singleHash != NOT_SINGLE_HASH) //namely, is a single hash and + && (getRetainedEntries(false) < exactLimit_)) { //a small sketch then propagate myself (blocking) + if (!startEagerPropagation()) { + endPropagation(localPropagationInProgress, true); + return false; + } + if (!validateEpoch(epoch)) { + endPropagation(null, true); // do not change local flag + return true; + } + propagate(singleHash); + endPropagation(localPropagationInProgress, true); + return true; + } + // otherwise, be nonblocking, let background thread do the work + final ConcurrentBackgroundThetaPropagation job = new ConcurrentBackgroundThetaPropagation( + this, localPropagationInProgress, sketchIn, singleHash, epoch); + executorService_.execute(job); + return true; + } + + @Override + public void propagate(final long singleHash) { + super.hashUpdate(singleHash); + } + + @Override + public void updateEstimationSnapshot() { + volatileEstimate_ = super.getEstimate(); + } + + @Override + public void updateVolatileTheta() { + volatileThetaLong_ = getThetaLong(); + } + + @Override + public boolean validateEpoch(final long epoch) { + return epoch_ == epoch; + } + + //Restricted + + /** + * Advances the epoch while there is no background propagation + * This ensures a propagation invoked before the reset cannot affect the sketch after the reset + * is completed. + */ + @SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "Likely False Positive, Fix Later") + private void advanceEpoch() { + awaitBgPropagationTermination(); + startEagerPropagation(); + ConcurrentPropagationService.resetExecutorService(Thread.currentThread().threadId()); + //no inspection NonAtomicOperationOnVolatileField + // this increment of a volatile field is done within the scope of the propagation + // synchronization and hence is done by a single thread + // Ignore a FindBugs warning + epoch_++; + endPropagation(null, true); + initBgPropagationService(); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java new file mode 100644 index 000000000..ab1d41a65 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentBufferInserted; +import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentPropagated; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; + +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.thetacommon.HashOperations; + +/** + * This is a theta filtering, bounded size buffer that operates in the context of a single writing + * thread. When the buffer becomes full its content is propagated into the shared sketch, which + * may be on a different thread. The limit on the buffer size is configurable. A bound of size 1 + * allows the combination of buffers and shared sketch to maintain an error bound in real-time + * that is close to the error bound of a sequential theta sketch. Allowing larger buffer sizes + * enables amortization of the cost propagations and substantially improves overall system throughput. + * The error caused by the buffering is essentially a perspective of time and synchronization + * and not really a true error. At the end of a stream, after all the buffers have synchronized with + * the shared sketch, there is no additional error. + * Propagation is done either synchronously by the updating thread, or asynchronously by a + * background propagation thread. + * + *

This is a buffer, not a sketch, and it extends the HeapQuickSelectSketch + * in order to leverage some of the sketch machinery to make its work simple. However, if this + * buffer receives a query, like getEstimate(), the correct answer does not come from the super + * HeapQuickSelectSketch, which knows nothing about the concurrency relationship to the + * shared concurrent sketch, it must come from the shared concurrent sketch. As a result nearly all + * of the inherited sketch methods are redirected to the shared concurrent sketch. + * + * @author eshcar + * @author Lee Rhodes + */ +final class ConcurrentHeapThetaBuffer extends HeapQuickSelectSketch { + + // Shared sketch consisting of the global sample set and theta value. + private final ConcurrentSharedThetaSketch shared; + + // A flag indicating whether the shared sketch is in shared mode and requires eager propagation + // Initially this is true. Once it is set to false (estimation mode) it never flips back. + private boolean isExactMode; + + // A flag to indicate if we expect the propagated data to be ordered + private final boolean propagateOrderedCompact; + + // Propagation flag is set to true while propagation is in progress (or pending). + // It is the synchronization primitive to coordinate the work with the propagation thread. + private final AtomicBoolean localPropagationInProgress; + + ConcurrentHeapThetaBuffer(final int lgNomLongs, final long seed, + final ConcurrentSharedThetaSketch shared, final boolean propagateOrderedCompact, + final int maxNumLocalThreads) { + super(computeLogBufferSize(lgNomLongs, shared.getExactLimit(), maxNumLocalThreads), + seed, 1.0F, //p + ResizeFactor.X1, //rf + false); //not a union gadget + + this.shared = shared; + isExactMode = true; + this.propagateOrderedCompact = propagateOrderedCompact; + localPropagationInProgress = new AtomicBoolean(false); + } + + private static int computeLogBufferSize(final int lgNomLongs, final long exactSize, + final int maxNumLocalBuffers) { + return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers))); + } + + //concurrent restricted methods + + /** + * Propagates a single hash value to the shared sketch + * + * @param hash to be propagated + */ + private boolean propagateToSharedSketch(final long hash) { + //no inspection StatementWithEmptyBody + while (localPropagationInProgress.get()) { + } //busy wait until previous propagation completed + localPropagationInProgress.set(true); + final boolean res = shared.propagate(localPropagationInProgress, null, hash); + //in this case the parent empty_ and curCount_ were not touched + thetaLong_ = shared.getVolatileTheta(); + return res; + } + + /** + * Propagates the content of the buffer as a sketch to the shared sketch + */ + private void propagateToSharedSketch() { + //no inspection StatementWithEmptyBody + while (localPropagationInProgress.get()) { + } //busy wait until previous propagation completed + + final CompactSketch compactSketch = compact(propagateOrderedCompact, null); + localPropagationInProgress.set(true); + shared.propagate(localPropagationInProgress, compactSketch, + ConcurrentSharedThetaSketch.NOT_SINGLE_HASH); + super.reset(); + thetaLong_ = shared.getVolatileTheta(); + } + + //Public Sketch overrides proxies to shared concurrent sketch + + @Override + public int getCompactBytes() { + return shared.getCompactBytes(); + } + + @Override + public int getCurrentBytes() { + return shared.getCurrentBytes(); + } + + @Override + public double getEstimate() { + return shared.getEstimate(); + } + + @Override + public double getLowerBound(final int numStdDev) { + return shared.getLowerBound(numStdDev); + } + + @Override + public double getUpperBound(final int numStdDev) { + return shared.getUpperBound(numStdDev); + } + + @Override + public boolean hasMemorySegment() { + return false; + } + + @Override + public boolean isDirect() { + return false; + } + + @Override + public boolean isEmpty() { + return shared.isEmpty(); + } + + @Override + public boolean isEstimationMode() { + return shared.isEstimationMode(); + } + + //End of proxies + + @Override + public byte[] toByteArray() { + throw new UnsupportedOperationException("Local theta buffer need not be serialized"); + } + + //Public UpdateSketch overrides + + @Override + public void reset() { + super.reset(); + isExactMode = true; + localPropagationInProgress.set(false); + } + + //Restricted UpdateSketch overrides + + /** + * Updates buffer with given hash value. + * Triggers propagation to shared sketch if buffer is full. + * + * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. + * A negative hash value will throw an exception. + * @return + * See Update Return State + */ + @Override + UpdateReturnState hashUpdate(final long hash) { + if (isExactMode) { + isExactMode = !shared.isEstimationMode(); + } + HashOperations.checkHashCorruption(hash); + if ((getHashTableThreshold() == 0) || isExactMode ) { + //The over-theta and zero test + if (HashOperations.continueCondition(getThetaLong(), hash)) { + return RejectedOverTheta; //signal that hash was rejected due to theta or zero. + } + if (propagateToSharedSketch(hash)) { + return ConcurrentPropagated; + } + } + final UpdateReturnState state = super.hashUpdate(hash); + if (isOutOfSpace(getRetainedEntries(true) + 1)) { + propagateToSharedSketch(); + return ConcurrentPropagated; + } + if (state == UpdateReturnState.InsertedCountIncremented) { + return ConcurrentBufferInserted; + } + return state; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java new file mode 100644 index 000000000..92ca954fa --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.apache.datasketches.common.SuppressFBWarnings; + +/** + * Pool of threads to serve all propagation tasks in the system. + * + * @author Eshcar Hillel + */ +final class ConcurrentPropagationService { + + static int NUM_POOL_THREADS = 3; // Default: 3 threads + private static volatile ConcurrentPropagationService instance = null; // Singleton + private static ExecutorService[] propagationExecutorService = null; + + @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "Fix later") + private ConcurrentPropagationService() { + propagationExecutorService = new ExecutorService[NUM_POOL_THREADS]; + } + + //Factory: Get the singleton + @SuppressFBWarnings(value = "SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA", justification = "Fix later") + private static ConcurrentPropagationService getInstance() { + if (instance == null) { + synchronized (ConcurrentPropagationService.class) { + if (instance == null) { + instance = new ConcurrentPropagationService(); //SpotBugs: SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA + } + } + } + return instance; + } + + public static ExecutorService getExecutorService(final long id) { + return getInstance().initExecutorService((int) id % NUM_POOL_THREADS); + } + + @SuppressWarnings("static-access") + public static ExecutorService resetExecutorService(final long id) { + return getInstance().propagationExecutorService[(int) id % NUM_POOL_THREADS] = null; + } + + @SuppressWarnings("static-method") + private ExecutorService initExecutorService(final int i) { + if (propagationExecutorService[i] == null) { + propagationExecutorService[i] = Executors.newSingleThreadExecutor(); + } + return propagationExecutorService[i]; + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java new file mode 100644 index 000000000..5bf147049 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import java.lang.foreign.MemorySegment; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.datasketches.common.Util; + +/** + * An internal interface to define the API of a concurrent shared theta sketch. + * It reflects all data processed by a single or multiple update threads, and can serve queries at + * any time. + * + * @author eshcar + */ +interface ConcurrentSharedThetaSketch { + + long NOT_SINGLE_HASH = -1L; + double MIN_ERROR = 0.0000001; + + static long computeExactLimit(final long k, final double error) { + return 2 * Math.min(k, (long) Math.ceil(1.0 / Math.pow(Math.max(error,MIN_ERROR), 2.0))); + } + + /** + * Returns flip point (number of updates) from exact to estimate mode. + * @return flip point from exact to estimate mode + */ + long getExactLimit(); + + /** + * Ensures mutual exclusion. No other thread can update the shared sketch while propagation is + * in progress + * @return true if eager propagation was started + */ + boolean startEagerPropagation(); + + /** + * Completes the propagation: end mutual exclusion block. + * Notifies the local thread the propagation is completed + * + * @param localPropagationInProgress the synchronization primitive through which propagator + * notifies local thread the propagation is completed + * @param isEager true if the propagation is in eager mode + */ + void endPropagation(AtomicBoolean localPropagationInProgress, boolean isEager); + + /** + * Returns the value of the volatile theta managed by the shared sketch + * @return the value of the volatile theta managed by the shared sketch + */ + long getVolatileTheta(); + + /** + * Awaits termination of background (lazy) propagation tasks + */ + void awaitBgPropagationTermination(); + + /** + * Init background (lazy) propagation service + */ + void initBgPropagationService(); + + /** + * (Eager) Propagates the given sketch or hash value into this sketch + * @param localPropagationInProgress the flag to be updated when propagation is done + * @param sketchIn any Theta sketch with the data + * @param singleHash a single hash value + * @return true if propagation successfully started + */ + boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch sketchIn, + final long singleHash); + + /** + * (Lazy/Eager) Propagates the given hash value into this sketch + * @param singleHash a single hash value + */ + void propagate(final long singleHash); + + /** + * Updates the estimation of the number of unique entries by capturing a snapshot of the sketch + * data, namely, volatile theta and the num of valid entries in the sketch + */ + void updateEstimationSnapshot(); + + /** + * Updates the value of the volatile theta by extracting it from the underlying sketch managed + * by the shared sketch + */ + void updateVolatileTheta(); + + /** + * Validates the shared sketch is in the context of the given epoch + * + * @param epoch the epoch number to be validates + * @return true iff the shared sketch is in the context of the given epoch + */ + boolean validateEpoch(long epoch); + + //The following mirrors are public methods that already exist on the "extends" side of the dual + // inheritance. They are provided here to allow casts to this interface access + // to these methods without having to cast back to the extended parent class. + // + //This allows an internal class to cast either the Concurrent Direct or Concurrent Heap + //shared class to this interface and have access to the above special concurrent methods as + //well as the methods below. + // + //For the external user all of the below methods can be obtained by casting the shared + //sketch to UpdateSketch. However, these methods here also act as an alias so that an + //attempt to access these methods from the local buffer will be diverted to the shared + //sketch. + + //From Sketch and MemoryStatus + + int getCompactBytes(); + + int getCurrentBytes(); + + double getEstimate(); + + double getLowerBound(int numStdDev); + + double getUpperBound(int numStdDev); + + /** + * Returns true if this object's internal data is backed by a Memory object, + * which may be on-heap or off-heap. + * @return true if this object's internal data is backed by a Memory object. + */ + boolean hasMemorySegment(); + + /** + * Returns true if this object's internal data is backed by direct (off-heap) Memory. + * @return true if this object's internal data is backed by direct (off-heap) Memory. + */ + boolean isDirect(); + + /** + * Returns true if the two given MemorySegments refer to the same backing resource, + * which is either an off-heap memory location and size, or the same on-heap array object. + * + *

This is a convenient delegate of + * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}

+ * + * @param seg1 The first given MemorySegment + * @param seg2 The second given MemorySegment + * @return true if both MemorySegments are determined to be the same backing memory. + */ + default boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { + return Util.isSameResource(seg1, seg2); + } + + boolean isEmpty(); + + boolean isEstimationMode(); + + byte[] toByteArray(); + + int getRetainedEntries(boolean valid); + + CompactSketch compact(); + + CompactSketch compact(boolean ordered, MemorySegment wseg); + + UpdateSketch rebuild(); + + void reset(); +} + diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java new file mode 100644 index 000000000..9be51c379 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.extractEntryBitsV4; +import static org.apache.datasketches.theta2.PreambleUtil.extractNumEntriesBytesV4; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLongV4; +import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. + * + *

This sketch can only be associated with a Serialization Version 4 format binary image.

+ * + *

This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ */ +class DirectCompactCompressedSketch extends DirectCompactSketch { + /** + * Construct this sketch with the given MemorySegment. + * @param seg Read-only MemorySegment object. + */ + DirectCompactCompressedSketch(final MemorySegment seg) { + super(seg); + } + + /** + * Wraps the given MemorySegment, which must be a SerVer 4 compressed CompactSketch image. + * Must check the validity of the Memory before calling. + * @param srcSeg The source MemorySegment + * @param seedHash The update seedHash. + * See Seed Hash. + * @return this sketch + */ + static DirectCompactCompressedSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { + ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + return new DirectCompactCompressedSketch(srcSeg); + } + + //Sketch Overrides + + @Override + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg != null) { + MemorySegment.copy(seg_, 0, dstSeg, 0, getCurrentBytes()); + return new DirectCompactSketch(dstSeg); + } + return CompactSketch.heapify(seg_); + } + + @Override + public int getCurrentBytes() { + final int preLongs = extractPreLongs(seg_); + final int entryBits = extractEntryBitsV4(seg_); + final int numEntriesBytes = extractNumEntriesBytesV4(seg_); + return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits); + } + + private static final int START_PACKED_DATA_EXACT_MODE = 8; + private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; + + @Override + public int getRetainedEntries(final boolean valid) { //compact is always valid + // number of entries is stored using variable length encoding + // most significant bytes with all zeros are not stored + // one byte in the preamble has the number of non-zero bytes used + final int preLongs = extractPreLongs(seg_); // if > 1 then the second long has theta + final int numEntriesBytes = extractNumEntriesBytesV4(seg_); + int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE; + int numEntries = 0; + for (int i = 0; i < numEntriesBytes; i++) { + numEntries |= Byte.toUnsignedInt(seg_.get(JAVA_BYTE, offsetBytes++)) << (i << 3); + } + return numEntries; + } + + @Override + public long getThetaLong() { + final int preLongs = extractPreLongs(seg_); + return (preLongs > 1) ? extractThetaLongV4(seg_) : Long.MAX_VALUE; + } + + @Override + public boolean isEmpty() { + return false; + } + + @Override + public boolean isOrdered() { + return true; + } + + @Override + public HashIterator iterator() { + return new MemoryCompactCompressedHashIterator( + seg_, + (extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) + + extractNumEntriesBytesV4(seg_), + extractEntryBitsV4(seg_), + getRetainedEntries() + ); + } + + //restricted methods + + @Override + long[] getCache() { + final int numEntries = getRetainedEntries(); + final long[] cache = new long[numEntries]; + int i = 0; + final HashIterator it = iterator(); + while (it.next()) { + cache[i++] = it.get(); + } + return cache; + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java new file mode 100644 index 000000000..188f2cd73 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; +import static org.apache.datasketches.theta2.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.SingleItemSketch.otherCheckForSingleItem; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered + * or unordered. It is not empty, not a single item. + * + *

This sketch can only be associated with a Serialization Version 3 format binary image.

+ * + *

This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ * + * @author Lee Rhodes + */ +class DirectCompactSketch extends CompactSketch { + final MemorySegment seg_; + + /** + * Construct this sketch with the given MemorySegment. + * @param seg Read-only MemorySegment object with the order bit properly set. + */ + DirectCompactSketch(final MemorySegment seg) { + seg_ = seg; + } + + /** + * Wraps the given MemorySegment, which must be a SerVer 3, CompactSketch image. + * Must check the validity of the MemorySegment before calling. The order bit must be set properly. + * @param srcSeg the given MemorySegment + * @param seedHash The update seedHash. + * See Seed Hash. + * @return this sketch + */ + static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { + ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + return new DirectCompactSketch(srcSeg); + } + + //Sketch Overrides + + @Override + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + return memoryToCompact(seg_, dstOrdered, dstSeg); + } + + @Override + public int getCurrentBytes() { + if (otherCheckForSingleItem(seg_)) { return 16; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); + return (preLongs + curCount) << 3; + } + + @Override + public int getRetainedEntries(final boolean valid) { //compact is always valid + if (otherCheckForSingleItem(seg_)) { return 1; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); + return curCount; + } + + @Override + public long getThetaLong() { + final int preLongs = extractPreLongs(seg_); + return (preLongs > 2) ? extractThetaLong(seg_) : Long.MAX_VALUE; + } + + @Override + public boolean hasMemorySegment() { + return seg_ != null; + } + + @Override + public boolean isDirect() { + return hasMemorySegment() ? seg_.isNative() : false; + } + + @Override + public boolean isEmpty() { + final boolean emptyFlag = PreambleUtil.isEmptyFlag(seg_); + final long thetaLong = getThetaLong(); + final int curCount = getRetainedEntries(true); + return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); + } + + @Override + public boolean isOrdered() { + return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0; + } + + @Override + public HashIterator iterator() { + return new MemoryHashIterator(seg_, getRetainedEntries(true), getThetaLong()); + } + + @Override + public byte[] toByteArray() { + checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries()); + final int outBytes = getCurrentBytes(); + final byte[] byteArrOut = new byte[outBytes]; + MemorySegment.copy(seg_, JAVA_BYTE, 0, byteArrOut, 0, outBytes); + return byteArrOut; + } + + //restricted methods + + @Override + long[] getCache() { + if (otherCheckForSingleItem(seg_)) { return new long[] { seg_.get(JAVA_LONG_UNALIGNED, 8) }; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); + if (curCount > 0) { + final long[] cache = new long[curCount]; + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, preLongs << 3, cache, 0, curCount); + return cache; + } + return new long[0]; + } + + @Override + int getCompactPreambleLongs() { + return extractPreLongs(seg_); + } + + @Override + int getCurrentPreambleLongs() { + return extractPreLongs(seg_); + } + + @Override + MemorySegment getMemorySegment() { + return seg_; + } + + @Override + short getSeedHash() { + return (short) extractSeedHash(seg_); + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java new file mode 100644 index 000000000..32ae0d14d --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java @@ -0,0 +1,339 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; +import static org.apache.datasketches.common.Util.newHeapSegment; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT; +import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgNomLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor; +import static org.apache.datasketches.theta2.PreambleUtil.insertP; +import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong; +import static org.apache.datasketches.theta2.Rebuilder.actLgResizeFactor; +import static org.apache.datasketches.theta2.Rebuilder.moveAndResize; +import static org.apache.datasketches.theta2.Rebuilder.quickSelectAndRebuild; +import static org.apache.datasketches.theta2.Rebuilder.resize; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedRebuilt; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The default Theta Sketch using the QuickSelect algorithm. + * This subclass implements methods, which affect the state (update, rebuild, reset) + * + *

This implementation uses data in a given Memory that is owned and managed by the caller. + * This Memory can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ * + * @author Lee Rhodes + * @author Kevin Lang + */ +class DirectQuickSelectSketch extends DirectQuickSelectSketchR { + + private DirectQuickSelectSketch( + final long seed, + final MemorySegment wseg) { + super(seed, wseg); + } + + /** + * Construct a new sketch instance and initialize the given MemorySegment as its backing store. + * + * @param lgNomLongs See lgNomLongs. + * @param seed See Update Hash Seed. + * @param p + * See Sampling Probability, p + * @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid + * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the + * dstMem must be large enough for a full sketch. + * See Resize Factor + * @param memReqSvr the given MemoryRequestServer + * @param dstSeg the given Memory object destination. It cannot be null. + * It will be cleared prior to use. + * @param unionGadget true if this sketch is implementing the Union gadget function. + * Otherwise, it is behaving as a normal QuickSelectSketch. + */ + DirectQuickSelectSketch( + final int lgNomLongs, + final long seed, + final float p, + final ResizeFactor rf, + final MemorySegment dstSeg, + final boolean unionGadget) { + this( + checkMemSize(lgNomLongs, rf, dstSeg, unionGadget), + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + lgNomLongs, + seed, + p, + rf, + dstSeg, + unionGadget); + } + + private DirectQuickSelectSketch( + final boolean secure, //required part of Finalizer Attack prevention + final int lgNomLongs, + final long seed, + final float p, + final ResizeFactor rf, + final MemorySegment dstSeg, + final boolean unionGadget) { + super(seed, dstSeg); + //Choose family, preambleLongs + final Family family; + final int preambleLongs; + if (unionGadget) { + preambleLongs = Family.UNION.getMinPreLongs(); + family = Family.UNION; + } + else { + preambleLongs = Family.QUICKSELECT.getMinPreLongs(); + family = Family.QUICKSELECT; + } + + //Choose RF, minReqBytes, lgArrLongs. + final int lgRF = rf.lg(); + final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; + + //@formatter:off + //Build preamble + insertPreLongs(dstSeg, preambleLongs); //byte 0 + insertLgResizeFactor(dstSeg, lgRF); //byte 0 + insertSerVer(dstSeg, SER_VER); //byte 1 + insertFamilyID(dstSeg, family.getID()); //byte 2 + insertLgNomLongs(dstSeg, lgNomLongs); //byte 3 + insertLgArrLongs(dstSeg, lgArrLongs); //byte 4 + //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4 + insertFlags(dstSeg, EMPTY_FLAG_MASK); //byte 5 + insertSeedHash(dstSeg, ThetaUtil.computeSeedHash(seed)); //bytes 6,7 + insertCurCount(dstSeg, 0); //bytes 8-11 + insertP(dstSeg, p); //bytes 12-15 + final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); + insertThetaLong(dstSeg, thetaLong); //bytes 16-23 + if (unionGadget) { + insertUnionThetaLong(dstSeg, thetaLong); + } + //@formatter:on + + //clear hash table area + dstSeg.asSlice(preambleLongs << 3, Long.BYTES << lgArrLongs).fill((byte)0); + + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); + } + + private static final boolean checkMemSize( + final int lgNomLongs, final ResizeFactor rf, final MemorySegment dstSeg, final boolean unionGadget) { + final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); + final int lgRF = rf.lg(); + final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; + final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + final long curMemCapBytes = dstSeg.byteSize(); + if (curMemCapBytes < minReqBytes) { + throw new SketchesArgumentException( + "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); + } + return true; + } + + /** + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. + * @param srcSeg The given MemorySegment object must be in hash table form and not read only. + * @param seed See Update Hash Seed + * @return instance of this sketch + */ + static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + //If incorrect it sets it to X2 which always works. + insertLgResizeFactor(srcSeg, ResizeFactor.X2.lg()); + } + + final DirectQuickSelectSketch dqss = + new DirectQuickSelectSketch(seed, srcSeg); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); + return dqss; + } + + /** + * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. This does NO validity checking of the given Memory. + * @param srcSeg The given MemorySegment must be in hash table form and not read only. + * @param seed See Update Hash Seed + * @return instance of this sketch + */ + static DirectQuickSelectSketch fastWritableWrap(final MemorySegment srcSeg, final long seed) { + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + final DirectQuickSelectSketch dqss = + new DirectQuickSelectSketch(seed, srcSeg); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); + return dqss; + } + + //Sketch + + //UpdateSketch + + @Override + public UpdateSketch rebuild() { + final int lgNomLongs = getLgNomLongs(); + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + if (getRetainedEntries(true) > (1 << lgNomLongs)) { + quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); + } + return this; + } + + @Override + public void reset() { + //clear hash table + //hash table size and hashTableThreshold stays the same + //lgArrLongs stays the same + //thetaLongs resets to p + final int arrLongs = 1 << getLgArrLongs(); + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int preBytes = preambleLongs << 3; + wseg_.asSlice(preBytes, arrLongs * 8L).fill((byte)0); + //flags: bigEndian = readOnly = compact = ordered = false; empty = true. + wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); + wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + final float p = wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); + final long thetaLong = (long) (p * LONG_MAX_VALUE_AS_DOUBLE); + wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); + } + + //restricted methods + + @Override + UpdateReturnState hashUpdate(final long hash) { + HashOperations.checkHashCorruption(hash); + + wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) (wseg_.get(JAVA_BYTE, FLAGS_BYTE) & ~EMPTY_FLAG_MASK)); + final long thetaLong = getThetaLong(); + final int lgNomLongs = getLgNomLongs(); + //The over-theta test + if (HashOperations.continueCondition(thetaLong, hash)) { + return RejectedOverTheta; //signal that hash was rejected due to theta or zero. + } + + final int lgArrLongs = getLgArrLongs(); + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + + //The duplicate test + final int index = + HashOperations.hashSearchOrInsertMemory(wseg_, lgArrLongs, hash, preambleLongs << 3); + if (index >= 0) { + return RejectedDuplicate; //Duplicate, not inserted + } + //insertion occurred, increment curCount + final int curCount = getRetainedEntries(true) + 1; + wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); //update curCount + + if (isOutOfSpace(curCount)) { //we need to do something, we are out of space + + if (lgArrLongs > lgNomLongs) { //at full size, rebuild + //Assumes no dirty values, changes thetaLong, curCount_ + assert (lgArrLongs == (lgNomLongs + 1)) : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs; + //rebuild, refresh curCount based on # values in the hashtable. + quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); + return InsertedCountIncrementedRebuilt; + } //end of rebuild, exit + + else { //Not at full size, resize. Should not get here if lgRF = 0 and memCap is too small. + final int lgRF = getLgRF(); + final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF); + int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1); + + if (actLgRF > 0) { //Expand in current Memory + //lgArrLongs will change; thetaLong, curCount will not + resize(wseg_, preambleLongs, lgArrLongs, tgtLgArrLongs); + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); + return InsertedCountIncrementedResized; + } //end of Expand in current memory, exit. + + else { + //Request more memory, then resize. lgArrLongs will change; thetaLong, curCount will not + final int preBytes = preambleLongs << 3; + tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1); + final int tgtArrBytes = 8 << tgtLgArrLongs; + final int reqBytes = tgtArrBytes + preBytes; + + //memReqSvr_ = (memReqSvr_ == null) ? wseg_.getMemoryRequestServer() : memReqSvr_; + //if (memReqSvr_ == null) { //in case the MRS is not enabled or null. + // throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); + //} + //final MemorySegment newDstSeg = memReqSvr_.request(wseg_, reqBytes); + + final MemorySegment newDstSeg = newHeapSegment(reqBytes); + + moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong); + wseg_ = newDstSeg; + + hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); + return InsertedCountIncrementedResized; + } //end of Request more memory to resize + } //end of resize + } //end of isOutOfSpace + return InsertedCountIncremented; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java new file mode 100644 index 000000000..33d371554 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; +import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; +import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_RESIZE_FACTOR_BIT; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT; +import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The default Theta Sketch using the QuickSelect algorithm. + * This is the read-only implementation with non-functional methods, which affect the state. + * + *

This implementation uses data in a given Memory that is owned and managed by the caller. + * This Memory can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ * + * @author Lee Rhodes + * @author Kevin Lang + */ +class DirectQuickSelectSketchR extends UpdateSketch { + static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space + final long seed_; //provided, kept only on heap, never serialized. + int hashTableThreshold_; //computed, kept only on heap, never serialized. + MemorySegment wseg_; //A WritableMemory for child class, but no write methods here + + //only called by DirectQuickSelectSketch and below + DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) { + seed_ = seed; + wseg_ = wseg; + } + + /** + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. + * @param srcSeg the source MemorySegment. + * The given MemorySegment object must be in hash table form and not read only. + * @param seed See Update Hash Seed + * @return instance of this sketch + */ + static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + + final DirectQuickSelectSketchR dqssr = + new DirectQuickSelectSketchR(seed, srcSeg); + dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); + return dqssr; + } + + /** + * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. This does NO validity checking of the given MemorySegment. + * @param srcSeg The given MemorySegment object must be in hash table form and not read only. + * @param seed See Update Hash Seed + * @return instance of this sketch + */ + static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, final long seed) { + final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; + final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; + + final DirectQuickSelectSketchR dqss = + new DirectQuickSelectSketchR(seed, srcSeg); + dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); + return dqss; + } + + //Sketch + + @Override + public int getCurrentBytes() { + //not compact + final byte lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE); + final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int lengthBytes = (preLongs + (1 << lgArrLongs)) << 3; + return lengthBytes; + } + + @Override + public double getEstimate() { + final int curCount = extractCurCount(wseg_); + final long thetaLong = extractThetaLong(wseg_); + return Sketch.estimate(thetaLong, curCount); + } + + @Override + public Family getFamily() { + final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; + return Family.idToFamily(familyID); + } + + @Override + public int getRetainedEntries(final boolean valid) { //always valid + return wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); + } + + @Override + public long getThetaLong() { + return isEmpty() ? Long.MAX_VALUE : wseg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); + } + + @Override + public boolean hasMemorySegment() { + return wseg_ != null; + } + + @Override + public boolean isDirect() { + return hasMemorySegment() ? wseg_.isNative() : false; + } + + @Override + public boolean isEmpty() { + return PreambleUtil.isEmptyFlag(wseg_); + } + + @Override + public HashIterator iterator() { + return new MemoryHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); + } + + @Override + public byte[] toByteArray() { //MY_FAMILY is stored in wmem_ + checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wseg_)); + final int lengthBytes = getCurrentBytes(); + final byte[] byteArray = new byte[lengthBytes]; + final MemorySegment seg = MemorySegment.ofArray(byteArray); + MemorySegment.copy(wseg_, 0, seg, 0, lengthBytes); + final long thetaLong = + correctThetaOnCompact(isEmpty(), extractCurCount(wseg_), extractThetaLong(wseg_)); + insertThetaLong(wseg_, thetaLong); + return byteArray; + } + + //UpdateSketch + + @Override + public final int getLgNomLongs() { + return PreambleUtil.extractLgNomLongs(wseg_); + } + + @Override + float getP() { + return wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); + } + + @Override + public ResizeFactor getResizeFactor() { + return ResizeFactor.getRF(getLgRF()); + } + + @Override + long getSeed() { + return seed_; + } + + @Override + public UpdateSketch rebuild() { + throw new SketchesReadOnlyException(); + } + + @Override + public void reset() { + throw new SketchesReadOnlyException(); + } + + //restricted methods + + @Override + long[] getCache() { + final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final long[] cacheArr = new long[1 << lgArrLongs]; + final MemorySegment seg = MemorySegment.ofArray(cacheArr); + MemorySegment.copy(wseg_, preambleLongs << 3, seg, 0, 8 << lgArrLongs); + return cacheArr; + } + + @Override + int getCompactPreambleLongs() { + return computeCompactPreLongs(isEmpty(), getRetainedEntries(true), getThetaLong()); + } + + @Override + int getCurrentPreambleLongs() { + return PreambleUtil.extractPreLongs(wseg_); + } + + @Override + MemorySegment getMemorySegment() { + return wseg_; + } + + @Override + short getSeedHash() { + return (short) PreambleUtil.extractSeedHash(wseg_); + } + + @Override + boolean isDirty() { + return false; //Always false for QuickSelectSketch + } + + @Override + boolean isOutOfSpace(final int numEntries) { + return numEntries > hashTableThreshold_; + } + + @Override + int getLgArrLongs() { + return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; + } + + int getLgRF() { //only Direct needs this + return (wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; + } + + @Override + UpdateReturnState hashUpdate(final long hash) { + throw new SketchesReadOnlyException(); + } + + /** + * Returns the cardinality limit given the current size of the hash table array. + * + * @param lgNomLongs See lgNomLongs. + * @param lgArrLongs See lgArrLongs. + * @return the hash table threshold + */ + @SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments") + protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + //SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, + //but this allows us to tune these constants for different sketches. + final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; + return (int) (fraction * (1 << lgArrLongs)); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java new file mode 100644 index 000000000..c9c6dd609 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * Singleton empty CompactSketch. + * + * @author Lee Rhodes + */ +final class EmptyCompactSketch extends CompactSketch { + + //For backward compatibility, a candidate long must have Flags= compact, read-only, + // COMPACT-Family=3, SerVer=3, PreLongs=1, and be exactly 8 bytes long. The seedHash is ignored. + // NOTE: The empty and ordered flags may or may not be set + private static final long EMPTY_SKETCH_MASK = 0X00_00_EB_00_00_FF_FF_FFL; + private static final long EMPTY_SKETCH_TEST = 0X00_00_0A_00_00_03_03_01L; + //When returning a byte array the empty and ordered bits are also set + static final byte[] EMPTY_COMPACT_SKETCH_ARR = { 1, 3, 3, 0, 0, 0x1E, 0, 0 }; + private static final EmptyCompactSketch EMPTY_COMPACT_SKETCH = new EmptyCompactSketch(); + + private EmptyCompactSketch() {} + + static synchronized EmptyCompactSketch getInstance() { + return EMPTY_COMPACT_SKETCH; + } + + //This should be a heapify + static synchronized EmptyCompactSketch getHeapInstance(final MemorySegment srcSeg) { + final long pre0 = srcSeg.get(JAVA_LONG_UNALIGNED, 0); + if (testCandidatePre0(pre0)) { + return EMPTY_COMPACT_SKETCH; + } + final long maskedPre0 = pre0 & EMPTY_SKETCH_MASK; + throw new SketchesArgumentException("Input Memory does not match required Preamble. " + + "Memory Pre0: " + Long.toHexString(maskedPre0) + + ", required Pre0: " + Long.toHexString(EMPTY_SKETCH_TEST)); + } + + @Override + // This returns with ordered flag = true independent of dstOrdered. + // This is required for fast detection. + // The hashSeed is ignored and set == 0. + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { + if (dstWSeg == null) { return EmptyCompactSketch.getInstance(); } + //dstWSeg.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8); + MemorySegment.copy(EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); + return new DirectCompactSketch(dstWSeg); + } + + //static + + static boolean testCandidatePre0(final long candidate) { + return (candidate & EMPTY_SKETCH_MASK) == EMPTY_SKETCH_TEST; + } + + @Override + public int getCurrentBytes() { + return 8; + } + + @Override + public double getEstimate() { return 0; } + + @Override + public int getRetainedEntries(final boolean valid) { + return 0; + } + + @Override + public long getThetaLong() { + return Long.MAX_VALUE; + } + + @Override + public boolean isEmpty() { + return true; + } + + @Override + public boolean isOrdered() { + return true; + } + + @Override + public HashIterator iterator() { + return new HeapCompactHashIterator(new long[0]); + } + + /** + * Returns 8 bytes representing a CompactSketch that the following flags set: + * ordered, compact, empty, readOnly. The SerVer is 3, the Family is COMPACT(3), + * and the PreLongs = 1. The seedHash is zero. + */ + @Override + public byte[] toByteArray() { + return EMPTY_COMPACT_SKETCH_ARR; + } + + @Override + long[] getCache() { + return new long[0]; + } + + @Override + int getCompactPreambleLongs() { + return 1; + } + + @Override + int getCurrentPreambleLongs() { + return 1; + } + + @Override + MemorySegment getMemorySegment() { + return null; + } + + @Override + short getSeedHash() { + return 0; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java new file mode 100644 index 000000000..9791a7902 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * Used to convert older serialization versions 1 and 2 to version 3. The Serialization + * Version is the version of the sketch binary image format and should not be confused with the + * version number of the Open Source DataSketches Library. + * + * @author Lee Rhodes + */ +final class ForwardCompatibility { + + /** + * Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch. + * Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored + * in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will + * be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit. + * + * @param srcSeg the image of a SerVer 1 sketch + * + * @param seedHash See Seed Hash. + * The seedHash that matches the seedHash of the original seed used to construct the sketch. + * Note: SerVer 1 sketches do not have the concept of the SeedHash, so the seedHash provided here + * MUST be derived from the actual seed that was used when the SerVer 1 sketches were built. + * @return a SerVer 3 {@link CompactSketch}. + */ + static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short seedHash) { + final int segCap = (int) srcSeg.byteSize(); + final int preLongs = extractPreLongs(srcSeg); //always 3 for serVer 1 + if (preLongs != 3) { + throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs); + } + final int familyId = extractFamilyID(srcSeg); //1,2,3 + if ((familyId < 1) || (familyId > 3)) { + throw new SketchesArgumentException("Family ID (Sketch Type) must be 1 to 3: " + familyId); + } + final int curCount = extractCurCount(srcSeg); + final long thetaLong = extractThetaLong(srcSeg); + final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE); + + if (empty || (segCap <= 24)) { //return empty + return EmptyCompactSketch.getInstance(); + } + + final int reqCap = (curCount + preLongs) << 3; + validateInputSize(reqCap, segCap); + + if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) { + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); + return new SingleItemSketch(hash, seedHash); + } + //theta < 1.0 and/or curCount > 1 + + final long[] compactOrderedCache = new long[curCount]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); + return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); + } + + /** + * Convert a serialization version (SerVer) 2 sketch to a SerVer 3 HeapCompactOrderedSketch. + * Note: SerVer 2 sketches can have metadata-longs of 1,2 or 3 and are always stored + * in a compact ordered form (not as a hash table), but with 4 different sketch types. + * @param srcSeg the image of a SerVer 2 sketch + * @param seedHash See Seed Hash. + * The seed used for building the sketch image in srcMem + * @return a SerVer 3 HeapCompactOrderedSketch + */ + static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short seedHash) { + final int segCap = (int) srcSeg.byteSize(); + final int preLongs = extractPreLongs(srcSeg); //1,2 or 3 + final int familyId = extractFamilyID(srcSeg); //1,2,3,4 + if ((familyId < 1) || (familyId > 4)) { + throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId); + } + int reqBytesIn = 8; + int curCount = 0; + long thetaLong = Long.MAX_VALUE; + if (preLongs == 1) { + reqBytesIn = 8; + validateInputSize(reqBytesIn, segCap); + return EmptyCompactSketch.getInstance(); + } + if (preLongs == 2) { //includes pre0 + count, no theta (== 1.0) + reqBytesIn = preLongs << 3; + validateInputSize(reqBytesIn, segCap); + curCount = extractCurCount(srcSeg); + if (curCount == 0) { + return EmptyCompactSketch.getInstance(); + } + if (curCount == 1) { + reqBytesIn = (preLongs + 1) << 3; + validateInputSize(reqBytesIn, segCap); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); + return new SingleItemSketch(hash, seedHash); + } + //curCount > 1 + reqBytesIn = (curCount + preLongs) << 3; + validateInputSize(reqBytesIn, segCap); + final long[] compactOrderedCache = new long[curCount]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); + return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong,true); + } + if (preLongs == 3) { //pre0 + count + theta + reqBytesIn = (preLongs) << 3; // + validateInputSize(reqBytesIn, segCap); + curCount = extractCurCount(srcSeg); + thetaLong = extractThetaLong(srcSeg); + if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) { + return EmptyCompactSketch.getInstance(); + } + if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) { + reqBytesIn = (preLongs + 1) << 3; + validateInputSize(reqBytesIn, segCap); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); + return new SingleItemSketch(hash, seedHash); + } + //curCount > 1 and/or theta < 1.0 + reqBytesIn = (curCount + preLongs) << 3; + validateInputSize(reqBytesIn, segCap); + final long[] compactOrderedCache = new long[curCount]; + //srcSeg.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); + return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); + } + throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs); + } + + private static final void validateInputSize(final int reqBytesIn, final int memCap) { + if (reqBytesIn > memCap) { + throw new SketchesArgumentException( + "Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn + + ", bytesIn: " + memCap); + } + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/HashIterator.java b/src/main/java/org/apache/datasketches/theta2/HashIterator.java new file mode 100644 index 000000000..c8cf9aba9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HashIterator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +/** + * This is used to iterate over the retained hash values of the Theta sketch. + * @author Lee Rhodes + */ +public interface HashIterator { + + /** + * Gets the hash value + * @return the hash value + */ + long get(); + + /** + * Returns true at the next hash value in sequence. + * If false, the iteration is done. + * @return true at the next hash value in sequence. + */ + boolean next(); +} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java new file mode 100644 index 000000000..5bc11d712 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java @@ -0,0 +1,601 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.sqrt; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; +import static org.apache.datasketches.common.Util.checkBounds; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; +import static org.apache.datasketches.theta2.PreambleUtil.extractP; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountNotIncremented; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; +import static org.apache.datasketches.thetacommon.HashOperations.STRIDE_MASK; + +import java.lang.foreign.MemorySegment; +import java.util.Objects; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * This sketch uses the + * Theta Sketch Framework + * and the + * Alpha TCF algorithm + * with a single cache. + * + * @author Lee Rhodes + * @author Kevin Lang + */ +final class HeapAlphaSketch extends HeapUpdateSketch { + private static final int ALPHA_MIN_LG_NOM_LONGS = 9; //The smallest Log2 k allowed => 512. + private final double alpha_; // computed from lgNomLongs + private final long split1_; // computed from alpha and p + + private int lgArrLongs_; + private int hashTableThreshold_; //never serialized + private int curCount_ = 0; + private long thetaLong_; + private boolean empty_ = true; + + private long[] cache_; + private boolean dirty_ = false; + + private HeapAlphaSketch(final int lgNomLongs, final long seed, final float p, + final ResizeFactor rf, final double alpha, final long split1) { + super(lgNomLongs, seed, p, rf); + alpha_ = alpha; + split1_ = split1; + } + + /** + * Get a new sketch instance on the java heap. + * + * @param lgNomLongs See lgNomLongs + * @param seed See Update Hash Seed + * @param p See Sampling Probability, p + * @param rf See Resize Factor + * @return instance of this sketch + */ + static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, final float p, + final ResizeFactor rf) { + + if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) { + throw new SketchesArgumentException( + "This sketch requires a minimum nominal entries of " + (1 << ALPHA_MIN_LG_NOM_LONGS)); + } + + final double nomLongs = (1L << lgNomLongs); + final double alpha = nomLongs / (nomLongs + 1.0); + final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); + + final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, rf, alpha, split1); + + final int lgArrLongs = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); + has.lgArrLongs_ = lgArrLongs; + has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + has.curCount_ = 0; + has.thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); + has.empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false; + has.cache_ = new long[1 << lgArrLongs]; + return has; + } + + /** + * Heapify a sketch from a MemorySegment object containing sketch data. + * @param srcSeg The source MemorySegment object. + * It must have a size of at least 24 bytes. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return instance of this sketch + */ + static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { + Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); + checkBounds(0, 24, srcSeg.byteSize()); + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + checkAlphaFamily(srcSeg, preambleLongs, lgNomLongs); + checkMemIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); + + final float p = extractP(srcSeg); //bytes 12-15 + final int memlgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor memRF = ResizeFactor.getRF(memlgRF); + + final double nomLongs = (1L << lgNomLongs); + final double alpha = nomLongs / (nomLongs + 1.0); + final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); + + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + memRF = ResizeFactor.X2; //X2 always works. + } + + final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, memRF, alpha, split1); + has.lgArrLongs_ = lgArrLongs; + has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); + has.curCount_ = extractCurCount(srcSeg); + has.thetaLong_ = extractThetaLong(srcSeg); + has.empty_ = PreambleUtil.isEmptyFlag(srcSeg); + has.cache_ = new long[1 << lgArrLongs]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table + return has; + } + + //Sketch + + @Override + public Family getFamily() { + return Family.ALPHA; + } + + @Override + public HashIterator iterator() { + return new HeapHashIterator(cache_, thetaLong_); + } + + @Override + public double getEstimate() { + return (thetaLong_ > split1_) + ? Sketch.estimate(thetaLong_, curCount_) + : (1 << lgNomLongs_) * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong_); + } + + @Override + public double getLowerBound(final int numStdDev) { + if ((numStdDev < 1) || (numStdDev > 3)) { + throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3."); + } + double lb; + if (isEstimationMode()) { + final int validCount = getRetainedEntries(true); + if (validCount > 0) { + final double est = getEstimate(); + final double var = getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), validCount); + lb = est - (numStdDev * sqrt(var)); + lb = max(lb, 0.0); + } + else { + lb = 0.0; + } + } + else { + lb = curCount_; + } + return lb; + } + + @Override + public int getRetainedEntries(final boolean valid) { + if (curCount_ > 0) { + if (valid && isDirty()) { + final int curCount = HashOperations.countPart(getCache(), getLgArrLongs(), getThetaLong()); + return curCount; + } + } + return curCount_; + } + + @Override + public long getThetaLong() { + return thetaLong_; + } + + @Override + public double getUpperBound(final int numStdDev) { + if ((numStdDev < 1) || (numStdDev > 3)) { + throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3."); + } + if (isEstimationMode()) { + final double var = + getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), getRetainedEntries(true)); + return getEstimate() + (numStdDev * sqrt(var)); + } + return curCount_; + } + + @Override + public boolean isEmpty() { + return empty_; + } + + /* + * Alpha Sketch Preamble Layout ( same as Theta UpdateSketch ) + *
+   * Long || Start Byte Adr:
+   * Adr:
+   *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |        0           |
+   *  0   ||    Seed Hash    | Flags  |  LgArr | LgNom  | FamID  | SerVer | lgRF | PreLongs=3  |
+   *
+   *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+   *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
+   *
+   *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
+   *  2   ||---------------------------------Theta---------------------------------------------|
+   * 
+ */ + + @Override + public byte[] toByteArray() { + return toByteArray(Family.ALPHA.getMinPreLongs(), (byte) Family.ALPHA.getID()); + } + + //UpdateSketch + + @Override + public UpdateSketch rebuild() { + if (isDirty()) { + rebuildDirty(); + } + return this; + } + + @Override + public final void reset() { + final int lgArrLongs = + ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, getResizeFactor().lg(), ThetaUtil.MIN_LG_ARR_LONGS); + if (lgArrLongs == lgArrLongs_) { + final int arrLongs = cache_.length; + assert (1 << lgArrLongs_) == arrLongs; + java.util.Arrays.fill(cache_, 0L); + } + else { + cache_ = new long[1 << lgArrLongs]; + lgArrLongs_ = lgArrLongs; + } + hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); + empty_ = true; + curCount_ = 0; + thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE); + dirty_ = false; + } + + //restricted methods + + @Override + int getCompactPreambleLongs() { + return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_); + } + + @Override + int getCurrentPreambleLongs() { + return Family.ALPHA.getMinPreLongs(); + } + + @Override + MemorySegment getMemorySegment() { + return null; + } + + @Override + long[] getCache() { + return cache_; + } + + @Override + boolean isDirty() { + return dirty_; + } + + @Override + boolean isOutOfSpace(final int numEntries) { + return numEntries > hashTableThreshold_; + } + + @Override + int getLgArrLongs() { + return lgArrLongs_; + } + + @Override + UpdateReturnState hashUpdate(final long hash) { + HashOperations.checkHashCorruption(hash); + empty_ = false; + + //The over-theta test + if (HashOperations.continueCondition(thetaLong_, hash)) { + return RejectedOverTheta; //signal that hash was rejected due to theta. + } + + //The duplicate/inserted tests + if (dirty_) { //may have dirty values, must be at tgt size + return enhancedHashInsert(cache_, hash); + } + + //NOT dirty, the other duplicate or inserted test + if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) { + return UpdateReturnState.RejectedDuplicate; + } + //insertion occurred, must increment + curCount_++; + final int r = (thetaLong_ > split1_) ? 0 : 1; //are we in sketch mode? (i.e., seen k+1 inserts?) + if (r == 0) { //not yet sketch mode (has not seen k+1 inserts), but could be sampling + if (curCount_ > (1 << lgNomLongs_)) { // > k + //Reached the k+1 insert. Must be at tgt size or larger. + //Transition to Sketch Mode. Happens only once. + //Decrement theta, make dirty, don't bother check size, already not-empty. + thetaLong_ = (long) (thetaLong_ * alpha_); + dirty_ = true; //now may have dirty values + } + else { + //inserts (not entries!) <= k. It may not be at tgt size. + //Check size, don't decrement theta. cnt already ++, empty_ already false; + if (isOutOfSpace(curCount_)) { + resizeClean(); //not dirty, not at tgt size. + } + } + } + else { //r > 0: sketch mode and not dirty (e.g., after a rebuild). + //dec theta, make dirty, cnt already ++, must be at tgt size or larger. check for rebuild + assert (lgArrLongs_ > lgNomLongs_) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_; + thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta + dirty_ = true; //now may have dirty values + if (isOutOfSpace(curCount_)) { + rebuildDirty(); // at tgt size and maybe dirty + } + } + return UpdateReturnState.InsertedCountIncremented; + } + + /** + * Enhanced Knuth-style Open Addressing, Double Hash insert. + * The insertion process will overwrite an already existing, dirty (over-theta) value if one is + * found in the search. + * If an empty cell is found first, it will be inserted normally. + * + * @param hashTable the hash table to insert into + * @param hash must not be 0. If not a duplicate, it will be inserted into the hash array + * @return See Update Return State + */ + final UpdateReturnState enhancedHashInsert(final long[] hashTable, final long hash) { + final int arrayMask = (1 << lgArrLongs_) - 1; // arrayLongs -1 + // make odd and independent of curProbe: + final int stride = (2 * (int) ((hash >>> lgArrLongs_) & STRIDE_MASK)) + 1; + int curProbe = (int) (hash & arrayMask); + long curTableHash = hashTable[curProbe]; + final int loopIndex = curProbe; + + // This is the enhanced part + // Search for duplicate or zero, or opportunity to replace garbage. + while ((curTableHash != hash) && (curTableHash != 0)) { + // curHash is not a duplicate and not zero + + if (curTableHash >= thetaLong_) { // curTableHash is garbage, do enhanced insert + final int rememberPos = curProbe; // remember its position. + // Now we must make sure there are no duplicates in this search path, + // so we keep searching + curProbe = (curProbe + stride) & arrayMask; // move forward + curTableHash = hashTable[curProbe]; + while ((curTableHash != hash) && (curTableHash != 0)) { + curProbe = (curProbe + stride) & arrayMask; + curTableHash = hashTable[curProbe]; + } + // curTableHash is a duplicate or zero + if (curTableHash == hash) { + return RejectedDuplicate; // duplicate, just return + } + assert (curTableHash == 0); // must be zero + // Now that we know there are no duplicates we can + // go back and insert at first garbage value position + hashTable[rememberPos] = hash; + thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta + dirty_ = true; //the decremented theta could have produced a new dirty value + return InsertedCountNotIncremented; + } + + // curTableHash was not a duplicate, not zero, and NOT garbage, + // so we keep searching + assert (curTableHash < thetaLong_); + curProbe = (curProbe + stride) & arrayMask; + curTableHash = hashTable[curProbe]; + + // ensure no infinite loop + if (curProbe == loopIndex) { + throw new SketchesArgumentException("No empty slot in table!"); + } + // end of Enhanced insert + } // end while and search + + // curTableHash is a duplicate or zero and NOT garbage + if (curTableHash == hash) { + return RejectedDuplicate; // duplicate, just return + } + // must be zero, so insert and increment + assert (curTableHash == 0); + hashTable[curProbe] = hash; + thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta + dirty_ = true; //the decremented theta could have produced a new dirty value + if (++curCount_ > hashTableThreshold_) { + rebuildDirty(); //at tgt size and maybe dirty + } + return InsertedCountIncremented; + } + + //At tgt size or greater + //Checks for rare lockup condition + // Used by hashUpdate(), rebuild() + private final void rebuildDirty() { + final int curCountBefore = curCount_; + forceRebuildDirtyCache(); //changes curCount_ only + if (curCountBefore == curCount_) { + //clean but unsuccessful at reducing count, must take drastic measures, very rare. + forceResizeCleanCache(1); + } + } + + //curCount > hashTableThreshold + //Checks for rare lockup condition + // Used by hashUpdate() + private final void resizeClean() { + //must resize, but are we at tgt size? + final int lgTgtLongs = lgNomLongs_ + 1; + if (lgTgtLongs > lgArrLongs_) { + //not yet at tgt size + final ResizeFactor rf = getResizeFactor(); + final int lgDeltaLongs = lgTgtLongs - lgArrLongs_; //must be > 0 + final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0 + forceResizeCleanCache(lgResizeFactor); + } + else { + //at tgt size or larger, no dirty values, must take drastic measures, very rare. + forceResizeCleanCache(1); + } + } + + //Force resize. Changes lgArrLongs_ only. Theta doesn't change, count doesn't change. + // Used by rebuildDirty(), resizeClean() + private final void forceResizeCleanCache(final int lgResizeFactor) { + assert (!dirty_); // Should never be dirty before a resize. + lgArrLongs_ += lgResizeFactor; // new tgt size + final long[] tgtArr = new long[1 << lgArrLongs_]; + final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); + assert (curCount_ == newCount); + curCount_ = newCount; + cache_ = tgtArr; + hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); + } + + //Cache stays the same size. Must be dirty. Theta doesn't change, count will change. + // Used by rebuildDirtyAtTgtSize() + private final void forceRebuildDirtyCache() { + final long[] tgtArr = new long[1 << lgArrLongs_]; + curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); + cache_ = tgtArr; + dirty_ = false; + //hashTableThreshold stays the same + } + + // @formatter:off + /** + * Computes an estimate of the error variance based on Historic Inverse Probability (HIP) + * estimators. See Cohen: All-Distances Sketches, Revisited: HIP Estimators for Massive Graph + * Analysis, Nov 2014. + *
+   * Table of sketch states and how Upper and Lower Bounds are computed
+   *
+   * Theta P    Count  Empty  EstMode Est   UB  LB   Comments
+   * 1.0   1.0  0      T      F       0     0   0    Empty Sketch-mode only sketch
+   * 1.0   1.0  N      F      F       N     N   N    Degenerate Sketch-mode only sketch
+   * <1.0  1.0  -      F      T       est   HIP HIP  Normal Sketch-mode only sketch
+   *  P    <1.0 0      T      F       0     0   0    Virgin sampling sketch
+   *  P    <1.0 N      F      T       est   HIP HIP  Degenerate sampling sketch
+   *  <P   <1.0 N      F      T       est   HIP HIP  Sampling sketch also in sketch-mode
+   * 
+ * @param k alias for nominal entries. + * @param p See Sampling Probability, p. + * @param alpha the value of alpha for this sketch + * @param theta See theta. + * @param count the current valid count. + * @return the variance. + */ + // @formatter:on + private static final double getVariance(final double k, final double p, final double alpha, + final double theta, final int count) { + final double kPlus1 = k + 1.0; + final double y = 1.0 / p; + final double ySq = y * y; + final double ySqMinusY = ySq - y; + final int r = getR(theta, alpha, p); + final double result; + if (r == 0) { + result = count * ySqMinusY; + } + else if (r == 1) { + result = kPlus1 * ySqMinusY; //term1 + } + else { //r > 1 + final double b = 1.0 / alpha; + final double bSq = b * b; + final double x = p / theta; + final double xSq = x * x; + final double term1 = kPlus1 * ySqMinusY; + final double term2 = y / (1.0 - bSq); + final double term3 = (((y * bSq) - (y * xSq) - b - bSq) + x + (x * b)); + result = term1 + (term2 * term3); + } + final double term4 = (1 - theta) / (theta * theta); + return result + term4; + } + + /** + * Computes whether there have been 0, 1, or 2 or more actual insertions into the cache in a + * numerically safe way. + * @param theta See Theta. + * @param alpha internal computed value alpha. + * @param p See Sampling Probability, p. + * @return R. + */ + private static final int getR(final double theta, final double alpha, final double p) { + final double split1 = (p * (alpha + 1.0)) / 2.0; + if (theta > split1) { return 0; } + if (theta > (alpha * split1)) { return 1; } + return 2; + } + + /** + * Returns the cardinality limit given the current size of the hash table array. + * + * @param lgNomLongs See lgNomLongs. + * @param lgArrLongs See lgArrLongs. + * @return the hash table threshold + */ + private static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; + return (int) Math.floor(fraction * (1 << lgArrLongs)); + } + + static void checkAlphaFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { + //Check Family + final int familyID = extractFamilyID(seg); //byte 2 + final Family family = Family.idToFamily(familyID); + if (family.equals(Family.ALPHA)) { + if (preambleLongs != Family.ALPHA.getMinPreLongs()) { + throw new SketchesArgumentException( + "Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs); + } + } + else { + throw new SketchesArgumentException( + "Possible corruption: Invalid Family: " + family.toString()); + } + + //Check lgNomLongs + if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) { + throw new SketchesArgumentException( + "Possible corruption: This sketch requires a minimum nominal entries of " + + (1 << ALPHA_MIN_LG_NOM_LONGS)); + } + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java new file mode 100644 index 000000000..b10ffcaaf --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +class HeapCompactHashIterator implements HashIterator { + private long[] cache; + private int index; + + HeapCompactHashIterator(final long[] cache) { + this.cache = cache; + index = -1; + } + + @Override + public long get() { + return cache[index]; + } + + @Override + public boolean next() { + return ++index < cache.length; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java new file mode 100644 index 000000000..065213191 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; +import static org.apache.datasketches.theta2.CompactOperations.componentsToCompact; +import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; +import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; +import static org.apache.datasketches.theta2.CompactOperations.isSingleItem; +import static org.apache.datasketches.theta2.CompactOperations.loadCompactMemory; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK; + +import java.lang.foreign.MemorySegment; + +/** + * Parent class of the Heap Compact Sketches. + * + * @author Lee Rhodes + */ +class HeapCompactSketch extends CompactSketch { + private final long thetaLong_; //computed + private final int curCount_; + private final int preLongs_; //computed + private final short seedHash_; + private final boolean empty_; + private final boolean ordered_; + private final boolean singleItem_; + private final long[] cache_; + + /** + * Constructs this sketch from correct, valid components. + * @param cache in compact form + * @param empty The correct Empty. + * @param seedHash The correct + * Seed Hash. + * @param curCount correct value + * @param thetaLong The correct + * thetaLong. + */ + HeapCompactSketch(final long[] cache, final boolean empty, final short seedHash, + final int curCount, final long thetaLong, final boolean ordered) { + seedHash_ = seedHash; + curCount_ = curCount; + empty_ = empty; + ordered_ = ordered; + cache_ = cache; + //computed + thetaLong_ = correctThetaOnCompact(empty, curCount, thetaLong); + preLongs_ = computeCompactPreLongs(empty, curCount, thetaLong); //considers singleItem + singleItem_ = isSingleItem(empty, curCount, thetaLong); + checkIllegalCurCountAndEmpty(empty, curCount); + } + + //Sketch + + @Override + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } + return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), + true, ordered_, dstOrdered, dstSeg, getCache().clone()); + } + + @Override + public int getCurrentBytes() { + return (preLongs_ + curCount_) << 3; + } + + @Override + public int getRetainedEntries(final boolean valid) { + return curCount_; + } + + @Override + public long getThetaLong() { + return thetaLong_; + } + + @Override + public boolean isEmpty() { + return empty_; + } + + @Override + public boolean isOrdered() { + return ordered_; + } + + @Override + public HashIterator iterator() { + return new HeapCompactHashIterator(cache_); + } + + //restricted methods + + @Override + long[] getCache() { + return cache_; + } + + @Override + int getCompactPreambleLongs() { + return preLongs_; + } + + @Override + int getCurrentPreambleLongs() { //already compact; ignored + return preLongs_; + } + + @Override + MemorySegment getMemorySegment() { + return null; + } + + @Override + short getSeedHash() { + return seedHash_; + } + + //use of Memory is convenient. The byteArray and Memory are loaded simultaneously. + @Override + public byte[] toByteArray() { + final int bytes = getCurrentBytes(); + final byte[] byteArray = new byte[bytes]; + final MemorySegment dstSeg = MemorySegment.ofArray(byteArray); + final int emptyBit = isEmpty() ? EMPTY_FLAG_MASK : 0; + final int orderedBit = ordered_ ? ORDERED_FLAG_MASK : 0; + final int singleItemBit = singleItem_ ? SINGLEITEM_FLAG_MASK : 0; + final byte flags = (byte) (emptyBit | READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK + | orderedBit | singleItemBit); + final int preLongs = getCompactPreambleLongs(); + loadCompactMemory(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), + dstSeg, flags, preLongs); + return byteArray; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java new file mode 100644 index 000000000..c2b098c25 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +/** + * @author Lee Rhodes + */ +class HeapHashIterator implements HashIterator { + private long[] cache; + private long thetaLong; + private int index; + private long hash; + + HeapHashIterator(final long[] cache, final long thetaLong) { + this.cache = cache; + this.thetaLong = thetaLong; + index = -1; + hash = 0; + } + + @Override + public long get() { + return hash; + } + + @Override + public boolean next() { + while (++index < cache.length) { + hash = cache[index]; + if ((hash != 0) && (hash < thetaLong)) { + return true; + } + } + return false; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java new file mode 100644 index 000000000..3096e5e1a --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; +import static org.apache.datasketches.theta2.PreambleUtil.extractP; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedRebuilt; +import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; +import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * @author Lee Rhodes + * @author Kevin Lang + */ +class HeapQuickSelectSketch extends HeapUpdateSketch { + private final Family MY_FAMILY; + + private final int preambleLongs_; + private int lgArrLongs_; + private int hashTableThreshold_; //never serialized + int curCount_; + long thetaLong_; + boolean empty_; + + private long[] cache_; + + private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float p, + final ResizeFactor rf, final int preambleLongs, final Family family) { + super(lgNomLongs, seed, p, rf); + preambleLongs_ = preambleLongs; + MY_FAMILY = family; + } + + /** + * Construct a new sketch instance on the java heap. + * + * @param lgNomLongs See lgNomLongs. + * @param seed See seed + * @param p See Sampling Probability, p + * @param rf See Resize Factor + * @param unionGadget true if this sketch is implementing the Union gadget function. + * Otherwise, it is behaving as a normal QuickSelectSketch. + */ + HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float p, + final ResizeFactor rf, final boolean unionGadget) { + super(lgNomLongs, seed, p, rf); + + //Choose family, preambleLongs + if (unionGadget) { + preambleLongs_ = Family.UNION.getMinPreLongs(); + MY_FAMILY = Family.UNION; + } + else { + preambleLongs_ = Family.QUICKSELECT.getMinPreLongs(); + MY_FAMILY = Family.QUICKSELECT; + } + + lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); + hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_); + curCount_ = 0; + thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); + empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false; + cache_ = new long[1 << lgArrLongs_]; + } + + /** + * Heapify a sketch from a MemorySegment UpdateSketch or Union object + * containing sketch data. + * @param srcSeg The source MemorySegment object. + * @param seed See seed + * @return instance of this sketch + */ + static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + + final float p = extractP(srcSeg); //bytes 12-15 + final int memlgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor memRF = ResizeFactor.getRF(memlgRF); + final int familyID = extractFamilyID(srcSeg); + final Family family = Family.idToFamily(familyID); + + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + memRF = ResizeFactor.X2; //X2 always works. + } + + final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF, + preambleLongs, family); + hqss.lgArrLongs_ = lgArrLongs; + hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); + hqss.curCount_ = extractCurCount(srcSeg); + hqss.thetaLong_ = extractThetaLong(srcSeg); + hqss.empty_ = PreambleUtil.isEmptyFlag(srcSeg); + hqss.cache_ = new long[1 << lgArrLongs]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table + return hqss; + } + + //Sketch + + @Override + public double getEstimate() { + return Sketch.estimate(thetaLong_, curCount_); + } + + @Override + public Family getFamily() { + return MY_FAMILY; + } + + @Override + public int getRetainedEntries(final boolean valid) { + return curCount_; + } + + @Override + public long getThetaLong() { + return empty_ ? Long.MAX_VALUE : thetaLong_; + } + + @Override + public boolean isEmpty() { + return empty_; + } + + @Override + public HashIterator iterator() { + return new HeapHashIterator(cache_, thetaLong_); + } + + @Override + public byte[] toByteArray() { + return toByteArray(preambleLongs_, (byte) MY_FAMILY.getID()); + } + + //UpdateSketch + + @Override + public UpdateSketch rebuild() { + if (getRetainedEntries(true) > (1 << getLgNomLongs())) { + quickSelectAndRebuild(); + } + return this; + } + + @Override + public void reset() { + final ResizeFactor rf = getResizeFactor(); + final int lgArrLongsSM = ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); + if (lgArrLongsSM == lgArrLongs_) { + final int arrLongs = cache_.length; + assert (1 << lgArrLongs_) == arrLongs; + java.util.Arrays.fill(cache_, 0L); + } + else { + cache_ = new long[1 << lgArrLongsSM]; + lgArrLongs_ = lgArrLongsSM; + } + hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); + empty_ = true; + curCount_ = 0; + thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE); + } + + //restricted methods + + @Override + long[] getCache() { + return cache_; + } + + @Override + int getCompactPreambleLongs() { + return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_); + } + + @Override + int getCurrentPreambleLongs() { + return preambleLongs_; + } + + //only used by ConcurrentHeapThetaBuffer & Test + int getHashTableThreshold() { + return hashTableThreshold_; + } + + @Override + int getLgArrLongs() { + return lgArrLongs_; + } + + @Override + MemorySegment getMemorySegment() { + return null; + } + + @Override + UpdateReturnState hashUpdate(final long hash) { + HashOperations.checkHashCorruption(hash); + empty_ = false; + + //The over-theta test + if (HashOperations.continueCondition(thetaLong_, hash)) { + return RejectedOverTheta; //signal that hash was rejected due to theta. + } + + //The duplicate test + if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) { + return RejectedDuplicate; //Duplicate, not inserted + } + //insertion occurred, must increment curCount + curCount_++; + + if (isOutOfSpace(curCount_)) { //we need to do something, we are out of space + //must rebuild or resize + if (lgArrLongs_ <= lgNomLongs_) { //resize + resizeCache(); + return InsertedCountIncrementedResized; + } + //Already at tgt size, must rebuild + assert (lgArrLongs_ == (lgNomLongs_ + 1)) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_; + quickSelectAndRebuild(); //Changes thetaLong_, curCount_, reassigns cache + return InsertedCountIncrementedRebuilt; + } + return InsertedCountIncremented; + } + + @Override + boolean isDirty() { + return false; + } + + @Override + boolean isOutOfSpace(final int numEntries) { + return numEntries > hashTableThreshold_; + } + + //Must resize. Changes lgArrLongs_, cache_, hashTableThreshold; + // theta and count don't change. + // Used by hashUpdate() + private final void resizeCache() { + final ResizeFactor rf = getResizeFactor(); + final int lgMaxArrLongs = lgNomLongs_ + 1; + final int lgDeltaLongs = lgMaxArrLongs - lgArrLongs_; + final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0 + lgArrLongs_ += lgResizeFactor; // new arr size + + final long[] tgtArr = new long[1 << lgArrLongs_]; + final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); + + assert newCount == curCount_; //Assumes no dirty values. + curCount_ = newCount; + + cache_ = tgtArr; + hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); + } + + //array stays the same size. Changes theta and thus count + private final void quickSelectAndRebuild() { + final int arrLongs = 1 << lgArrLongs_; // generally 2 * k, + + final int pivot = (1 << lgNomLongs_) + 1; // pivot for QS = k + 1 + + thetaLong_ = selectExcludingZeros(cache_, curCount_, pivot); //messes up the cache_ + + // now we rebuild to clean up dirty data, update count, reconfigure as a hash table + final long[] tgtArr = new long[arrLongs]; + curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); + cache_ = tgtArr; + //hashTableThreshold stays the same + } + + /** + * Returns the cardinality limit given the current size of the hash table array. + * + * @param lgNomLongs See lgNomLongs. + * @param lgArrLongs See lgArrLongs. + * @return the hash table threshold + */ + private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { + final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; + return (int) (fraction * (1 << lgArrLongs)); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java new file mode 100644 index 000000000..87737bfa8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; +import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgNomLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor; +import static org.apache.datasketches.theta2.PreambleUtil.insertP; +import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The parent class for Heap Updatable Theta Sketches. + * + * @author Lee Rhodes + */ +abstract class HeapUpdateSketch extends UpdateSketch { + final int lgNomLongs_; + private final long seed_; + private final float p_; + private final ResizeFactor rf_; + + HeapUpdateSketch(final int lgNomLongs, final long seed, final float p, final ResizeFactor rf) { + lgNomLongs_ = Math.max(lgNomLongs, ThetaUtil.MIN_LG_NOM_LONGS); + seed_ = seed; + p_ = p; + rf_ = rf; + } + + //Sketch + + @Override + public int getCurrentBytes() { + final int preLongs = getCurrentPreambleLongs(); + final int dataLongs = getCurrentDataLongs(); + return (preLongs + dataLongs) << 3; + } + + //UpdateSketch + + @Override + public final int getLgNomLongs() { + return lgNomLongs_; + } + + @Override + float getP() { + return p_; + } + + @Override + public ResizeFactor getResizeFactor() { + return rf_; + } + + @Override + long getSeed() { + return seed_; + } + + //restricted methods + + @Override + short getSeedHash() { + return ThetaUtil.computeSeedHash(getSeed()); + } + + //Used by HeapAlphaSketch and HeapQuickSelectSketch / Theta UpdateSketch + byte[] toByteArray(final int preLongs, final byte familyID) { + if (isDirty()) { rebuild(); } + checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries(true)); + final int preBytes = (preLongs << 3) & 0X3F; //24 bytes + final int dataBytes = getCurrentDataLongs() << 3; + final byte[] byteArrOut = new byte[preBytes + dataBytes]; + + final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); + + //preamble first 8 bytes. Note: only compact can be reduced to 8 bytes. + final int lgRf = getResizeFactor().lg() & 0x3; + insertPreLongs(segOut, preLongs); //byte 0 low 6 bits + insertLgResizeFactor(segOut, lgRf); //byte 0 high 2 bits + insertSerVer(segOut, SER_VER); //byte 1 + insertFamilyID(segOut, familyID); //byte 2 + insertLgNomLongs(segOut, getLgNomLongs()); //byte 3 + insertLgArrLongs(segOut, getLgArrLongs()); //byte 4 + insertSeedHash(segOut, getSeedHash()); //bytes 6 & 7 + + insertCurCount(segOut, this.getRetainedEntries(true)); + insertP(segOut, getP()); + final long thetaLong = + correctThetaOnCompact(isEmpty(), getRetainedEntries(true), getThetaLong()); + insertThetaLong(segOut, thetaLong); + + //Flags: BigEnd=0, ReadOnly=0, Empty=X, compact=0, ordered=0 + final byte flags = isEmpty() ? (byte) EMPTY_FLAG_MASK : 0; + insertFlags(segOut, flags); + + //Data + final int arrLongs = 1 << getLgArrLongs(); + final long[] cache = getCache(); + //segOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut + + MemorySegment.copy(cache, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, arrLongs); + return byteArrOut; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java new file mode 100644 index 000000000..685dd3eac --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/Intersection.java @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.Util.floorPowerOf2; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The API for intersection operations + * + * @author Lee Rhodes + */ +public abstract class Intersection extends SetOperation { + + @Override + public Family getFamily() { + return Family.INTERSECTION; + } + + /** + * Gets the result of this operation as an ordered CompactSketch on the Java heap. + * This does not disturb the underlying data structure of this intersection. + * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an + * exception will be thrown. This is because a virgin Intersection object represents the + * Universal Set, which has an infinite number of values. + * @return the result of this operation as an ordered CompactSketch on the Java heap + */ + public CompactSketch getResult() { + return getResult(true, null); + } + + /** + * Gets the result of this operation as a CompactSketch in the given dstMem. + * This does not disturb the underlying data structure of this intersection. + * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an + * exception will be thrown. This is because a virgin Intersection object represents the + * Universal Set, which has an infinite number of values. + * + *

Note that presenting an intersection with an empty sketch sets the internal + * state of the intersection to empty = true, and current count = 0. This is consistent with + * the mathematical definition of the intersection of any set with the empty set is + * always empty.

+ * + *

Presenting an intersection with a null argument will throw an exception.

+ * + * @param dstOrdered + * See Destination Ordered + * + * @param dstSeg the destination MemorySegment. + * + * @return the result of this operation as a CompactSketch stored in the given dstMem, + * which can be either on or off-heap.. + */ + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); + + /** + * Returns true if there is a valid intersection result available + * @return true if there is a valid intersection result available + */ + public abstract boolean hasResult(); + + /** + * Resets this Intersection for stateful operations only. + * The seed remains intact, otherwise reverts to + * the Universal Set: theta = 1.0, no retained data and empty = false. + */ + public abstract void reset(); + + /** + * Serialize this intersection to a byte array form. + * @return byte array of this intersection + */ + public abstract byte[] toByteArray(); + + /** + * Intersect the given sketch with the internal state. + * This method can be repeatedly called. + * If the given sketch is null the internal state becomes the empty sketch. + * Theta will become the minimum of thetas seen so far. + * @param sketchIn the given sketch + */ + public abstract void intersect(Sketch sketchIn); + + /** + * Perform intersect set operation on the two given sketch arguments and return the result as an + * ordered CompactSketch on the heap. + * @param a The first sketch argument + * @param b The second sketch argument + * @return an ordered CompactSketch on the heap + */ + public CompactSketch intersect(final Sketch a, final Sketch b) { + return intersect(a, b, true, null); + } + + /** + * Perform intersect set operation on the two given sketches and return the result as a + * CompactSketch. + * @param a The first sketch argument + * @param b The second sketch argument + * @param dstOrdered + * See Destination Ordered. + * @param dstSeg the destination MemorySegment. + * @return the result as a CompactSketch. + */ + public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered, + MemorySegment dstSeg); + + // Restricted + + /** + * Returns the maximum lgArrLongs given the capacity of the MemorySegment. + * @param dstSeg the given MemorySegment + * @return the maximum lgArrLongs given the capacity of the MemorySegment + */ + protected static int getMaxLgArrLongs(final MemorySegment dstSeg) { + final int preBytes = CONST_PREAMBLE_LONGS << 3; + final long cap = dstSeg.byteSize(); + return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3); + } + + protected static void checkMinSizeMemory(final MemorySegment seg) { + final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << ThetaUtil.MIN_LG_ARR_LONGS);//280 + final long cap = seg.byteSize(); + if (cap < minBytes) { + throw new SketchesArgumentException( + "MemorySegment must be at least " + minBytes + " bytes. Actual capacity: " + cap); + } + } + + /** + * Compact first 2^lgArrLongs of given array + * @param srcCache anything + * @param lgArrLongs The correct + * lgArrLongs. + * @param curCount must be correct + * @param thetaLong The correct + * thetaLong. + * @param dstOrdered true if output array must be sorted + * @return the compacted array + */ //Only used in IntersectionImpl & Test + static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs, + final int curCount, final long thetaLong, final boolean dstOrdered) { + if (curCount == 0) { + return new long[0]; + } + final long[] cacheOut = new long[curCount]; + final int len = 1 << lgArrLongs; + int j = 0; + for (int i = 0; i < len; i++) { + final long v = srcCache[i]; + if (v <= 0L || v >= thetaLong ) { continue; } + cacheOut[j++] = v; + } + assert curCount == j; + if (dstOrdered) { + Arrays.sort(cacheOut); + } + return cacheOut; + } + + protected static void memChecks(final MemorySegment srcSeg) { + //Get Preamble + //Note: Intersection does not use lgNomLongs (or k), per se. + //seedHash loaded and checked in private constructor + final int preLongs = extractPreLongs(srcSeg); + final int serVer = extractSerVer(srcSeg); + final int famID = extractFamilyID(srcSeg); + final boolean empty = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; + final int curCount = extractCurCount(srcSeg); + //Checks + if (preLongs != CONST_PREAMBLE_LONGS) { + throw new SketchesArgumentException( + "MemorySegment PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs); + } + if (serVer != SER_VER) { + throw new SketchesArgumentException("Serialization Version must equal " + SER_VER); + } + Family.INTERSECTION.checkFamilyID(famID); + if (empty) { + if (curCount != 0) { + throw new SketchesArgumentException( + "srcMem empty state inconsistent with curCount: " + empty + "," + curCount); + } + //empty = true AND curCount_ = 0: OK + } //else empty = false, curCount could be anything + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java new file mode 100644 index 000000000..ba7bcd8ac --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -0,0 +1,561 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.common.Util.clearBits; +import static org.apache.datasketches.common.Util.setBits; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT; +import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; +import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertP; +import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.setEmpty; +import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemory; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Intersection operation for Theta Sketches. + * + *

This implementation uses data either on-heap or off-heap in a given MemorySegment + * that is owned and managed by the caller. + * The off-heap MemorySegment, which if managed properly, will greatly reduce the need for + * the JVM to perform garbage collection.

+ * + * @author Lee Rhodes + * @author Kevin Lang + */ +class IntersectionImpl extends Intersection { + protected final short seedHash_; + protected final boolean readOnly_; //True if this sketch is to be treated as read only + protected final MemorySegment wseg_; + protected final int maxLgArrLongs_; //only used with MemorySegment, not serialized + + //Note: Intersection does not use lgNomLongs or k, per se. + protected int lgArrLongs_; //current size of hash table + protected int curCount_; //curCount of HT, if < 0 means Universal Set (US) is true + protected long thetaLong_; + protected boolean empty_; //A virgin intersection represents the Universal Set, so empty is FALSE! + protected long[] hashTable_; //retained entries of the intersection, on-heap only. + + /** + * Constructor: Sets the class finals and computes, sets and checks the seedHash. + * @param wseg Can be either a Source(e.g. wrap) or Destination (new Direct) MemorySegment. + * @param seed Used to validate incoming sketch arguments. + * @param dstMemFlag The given MemorySegment is a Destination (new Direct) MemorySegment. + * @param readOnly True if MemorySegment is to be treated as read only. + */ + protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstMemFlag, + final boolean readOnly) { + readOnly_ = readOnly; + if (wseg != null) { + wseg_ = wseg; + if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking + checkMinSizeMemory(wseg); + maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap + seedHash_ = ThetaUtil.computeSeedHash(seed); + wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed + seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); + ThetaUtil.checkSeedHashes(seedHash_, ThetaUtil.computeSeedHash(seed)); //check for seed hash conflict + maxLgArrLongs_ = 0; + } + } else { //compute & store seedHash + wseg_ = null; + maxLgArrLongs_ = 0; + seedHash_ = ThetaUtil.computeSeedHash(seed); + } + } + + /** + * Factory: Construct a new Intersection target on the java heap. + * Called by SetOperationBuilder, test. + * + * @param seed See Seed + * @return a new IntersectionImpl on the Java heap + */ + static IntersectionImpl initNewHeapInstance(final long seed) { + final boolean dstMemFlag = false; + final boolean readOnly = false; + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); + impl.hardReset(); + return impl; + } + + /** + * Factory: Construct a new Intersection target direct to the given destination MemorySegment. + * Called by SetOperationBuilder, test. + * + * @param seed See Seed + * @param dstSeg destination MemorySegment + * @return a new IntersectionImpl that may be off-heap + */ + static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegment dstSeg) { + //Load Preamble + //Pre0 + dstSeg.asSlice(0, CONST_PREAMBLE_LONGS << 3).fill((byte)0); + insertPreLongs(dstSeg, CONST_PREAMBLE_LONGS); //RF not used = 0 + insertSerVer(dstSeg, SER_VER); + insertFamilyID(dstSeg, Family.INTERSECTION.getID()); + //lgNomLongs not used by Intersection + //lgArrLongs set by hardReset + //flags are already 0: bigEndian = readOnly = compact = ordered = empty = false; + //seedHash loaded and checked in IntersectionImpl constructor + //Pre1 + //CurCount set by hardReset + insertP(dstSeg, (float) 1.0); //not used by intersection + //Pre2 + //thetaLong set by hardReset + + //Initialize + final boolean dstMemFlag = true; + final boolean readOnly = false; + final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstMemFlag, readOnly); + impl.hardReset(); + return impl; + } + + /** + * Factory: Heapify an intersection target from a MemorySegment image containing data. + * @param srcSeg The source MemorySegment object. + * @param seed See seed + * @return a IntersectionImpl instance on the Java heap + */ + static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long seed) { + final boolean dstMemFlag = false; + final boolean readOnly = false; + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); + memChecks(srcSeg); + + //Initialize + impl.lgArrLongs_ = extractLgArrLongs(srcSeg); + impl.curCount_ = extractCurCount(srcSeg); + impl.thetaLong_ = extractThetaLong(srcSeg); + impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; + if (!impl.empty_) { + if (impl.curCount_ > 0) { + impl.hashTable_ = new long[1 << impl.lgArrLongs_]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_); + } + } + return impl; + } + + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing + * intersection data. + * @param srcSeg The source MemorySegment image. + * @param seed See seed + * @param readOnly True if MemorySegment is to be treated as read only + * @return a IntersectionImpl that wraps a source MemorySegment that contains an Intersection image + */ + static IntersectionImpl wrapInstance( + final MemorySegment srcSeg, + final long seed, + final boolean readOnly) { + final boolean dstMemFlag = false; + final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstMemFlag, readOnly); + memChecks(srcSeg); + impl.lgArrLongs_ = extractLgArrLongs(srcSeg); + impl.curCount_ = extractCurCount(srcSeg); + impl.thetaLong_ = extractThetaLong(srcSeg); + impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; + return impl; + } + + @Override + public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, + final MemorySegment dstSeg) { + if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } + hardReset(); + intersect(a); + intersect(b); + final CompactSketch csk = getResult(dstOrdered, dstSeg); + hardReset(); + return csk; + } + + @Override + public void intersect(final Sketch sketchIn) { + if (sketchIn == null) { + throw new SketchesArgumentException("Intersection argument must not be null."); + } + if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } + if (empty_ || sketchIn.isEmpty()) { //empty rule + //Because of the def of null above and the Empty Rule (which is OR), empty_ must be true. + //Whatever the current internal state, we make our local empty. + resetToEmpty(); + return; + } + ThetaUtil.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); + //Set minTheta + thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule + empty_ = false; + if (wseg_ != null) { + insertThetaLong(wseg_, thetaLong_); + clearEmpty(wseg_); //false + } + + // The truth table for the following state machine. MinTheta is set above. + // Incoming sketch is not null and not empty, but could have 0 count and Theta < 1.0 + // Case curCount sketchInEntries | Actions + // 1 <0 0 | First intersect, set curCount = 0; HT = null; minTh; exit + // 2 0 0 | set curCount = 0; HT = null; minTh; exit + // 3 >0 0 | set curCount = 0; HT = null; minTh; exit + // 4 | Not used + // 5 <0 >0 | First intersect, clone SketchIn; exit + // 6 0 >0 | set curCount = 0; HT = null; minTh; exit + // 7 >0 >0 | Perform full intersect + final int sketchInEntries = sketchIn.getRetainedEntries(true); + + //states 1,2,3,6 + if (curCount_ == 0 || sketchInEntries == 0) { + curCount_ = 0; + if (wseg_ != null) { insertCurCount(wseg_, 0); } + hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid + } //end of states 1,2,3,6 + + // state 5 + else if (curCount_ < 0 && sketchInEntries > 0) { + curCount_ = sketchIn.getRetainedEntries(true); + final int requiredLgArrLongs = minLgHashTableSize(curCount_, ThetaUtil.REBUILD_THRESHOLD); + final int priorLgArrLongs = lgArrLongs_; //prior only used in error message + lgArrLongs_ = requiredLgArrLongs; + + if (wseg_ != null) { //Off heap, check if current dstMem is large enough + insertCurCount(wseg_, curCount_); + insertLgArrLongs(wseg_, lgArrLongs_); + if (requiredLgArrLongs <= maxLgArrLongs_) { + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); + } + else { //not enough space in dstMem + final int requiredBytes = (8 << requiredLgArrLongs) + 24; + final int givenBytes = (8 << priorLgArrLongs) + 24; + throw new SketchesArgumentException( + "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes); + } + } + else { //On the heap, allocate a HT + hashTable_ = new long[1 << lgArrLongs_]; + } + moveDataToTgt(sketchIn); + } //end of state 5 + + //state 7 + else if (curCount_ > 0 && sketchInEntries > 0) { + //Sets resulting hashTable, curCount and adjusts lgArrLongs + performIntersect(sketchIn); + } //end of state 7 + + else { + assert false : "Should not happen"; + } + } + + @Override + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { + if (curCount_ < 0) { + throw new SketchesStateException( + "Calling getResult() with no intervening intersections would represent the infinite set, " + + "which is not a legal result."); + } + long[] compactCache; + final boolean srcOrdered, srcCompact; + if (curCount_ == 0) { + compactCache = new long[0]; + srcCompact = true; + srcOrdered = false; //hashTable, even though empty + return CompactOperations.componentsToCompact( + thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, + dstSeg, compactCache); + } + //else curCount > 0 + final long[] hashTable; + if (wseg_ != null) { + final int htLen = 1 << lgArrLongs_; + hashTable = new long[htLen]; + MemorySegment.copy(dstSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + } else { + hashTable = hashTable_; + } + compactCache = compactCachePart(hashTable, lgArrLongs_, curCount_, thetaLong_, dstOrdered); + srcCompact = true; + srcOrdered = dstOrdered; + return CompactOperations.componentsToCompact( + thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, + dstSeg, compactCache); + } + + @Override + public boolean hasMemorySegment() { + return wseg_ != null; + } + + @Override + public boolean hasResult() { + return hasMemorySegment() ? wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + } + + @Override + public boolean isDirect() { + return hasMemorySegment() ? wseg_.isNative() : false; + } + + @Override + public void reset() { + hardReset(); + } + + @Override + public byte[] toByteArray() { + final int preBytes = CONST_PREAMBLE_LONGS << 3; + final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0; + final byte[] byteArrOut = new byte[preBytes + dataBytes]; + if (wseg_ != null) { + MemorySegment.copy(wseg_, JAVA_BYTE, 0, byteArrOut, 0, preBytes + dataBytes); + } + else { + final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); + + //preamble + segOut.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 + segOut.set(JAVA_BYTE, SER_VER_BYTE, (byte) SER_VER); + segOut.set(JAVA_BYTE, FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); + segOut.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 0); //not used + segOut.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); + if (empty_) { setBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } + else { clearBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } + segOut.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + segOut.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount_); + segOut.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, (float) 1.0); + segOut.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + + //data + if (curCount_ > 0) { + MemorySegment.copy(hashTable_, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, 1 << lgArrLongs_); + } + } + return byteArrOut; + } + + //restricted + + /** + * Gets the number of retained entries from this operation. If negative, it is interpreted + * as the infinite Universal Set. + */ + @Override + int getRetainedEntries() { + return curCount_; + } + + @Override + boolean isEmpty() { + return empty_; + } + + @Override + long[] getCache() { + if (wseg_ == null) { + return hashTable_ != null ? hashTable_ : new long[0]; + } + //Direct + final int arrLongs = 1 << lgArrLongs_; + final long[] outArr = new long[arrLongs]; + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); + return outArr; + } + + @Override + short getSeedHash() { + return seedHash_; + } + + @Override + long getThetaLong() { + return thetaLong_; + } + + private void performIntersect(final Sketch sketchIn) { + // curCount and input data are nonzero, match against HT + assert curCount_ > 0 && !empty_; + final long[] hashTable; + if (wseg_ != null) { + final int htLen = 1 << lgArrLongs_; + hashTable = new long[htLen]; + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + } else { + hashTable = hashTable_; + } + //allocate space for matching + final long[] matchSet = new long[ min(curCount_, sketchIn.getRetainedEntries(true)) ]; + + int matchSetCount = 0; + final boolean isOrdered = sketchIn.isOrdered(); + final HashIterator it = sketchIn.iterator(); + while (it.next()) { + final long hashIn = it.get(); + if (hashIn < thetaLong_) { + final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn); + if (foundIdx != -1) { + matchSet[matchSetCount++] = hashIn; + } + } else { + if (isOrdered) { break; } // early stop + } + } + //reduce effective array size to minimum + curCount_ = matchSetCount; + lgArrLongs_ = minLgHashTableSize(matchSetCount, ThetaUtil.REBUILD_THRESHOLD); + if (wseg_ != null) { + insertCurCount(wseg_, matchSetCount); + insertLgArrLongs(wseg_, lgArrLongs_); + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); //clear for rebuild + } else { + Arrays.fill(hashTable_, 0, 1 << lgArrLongs_, 0L); //clear for rebuild + } + + if (curCount_ > 0) { + moveDataToTgt(matchSet, matchSetCount); //move matchSet to target + } else { + if (thetaLong_ == Long.MAX_VALUE) { + empty_ = true; + } + } + } + + private void moveDataToTgt(final long[] arr, final int count) { + final int arrLongsIn = arr.length; + int tmpCnt = 0; + if (wseg_ != null) { //Off Heap puts directly into mem + final int preBytes = CONST_PREAMBLE_LONGS << 3; + final int lgArrLongs = lgArrLongs_; + final long thetaLong = thetaLong_; + for (int i = 0; i < arrLongsIn; i++ ) { + final long hashIn = arr[i]; + if (continueCondition(thetaLong, hashIn)) { continue; } + hashInsertOnlyMemory(wseg_, lgArrLongs, hashIn, preBytes); + tmpCnt++; + } + } else { //On Heap. Assumes HT exists and is large enough + for (int i = 0; i < arrLongsIn; i++ ) { + final long hashIn = arr[i]; + if (continueCondition(thetaLong_, hashIn)) { continue; } + hashInsertOnly(hashTable_, lgArrLongs_, hashIn); + tmpCnt++; + } + } + assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count; + } + + private void moveDataToTgt(final Sketch sketch) { + final int count = sketch.getRetainedEntries(); + int tmpCnt = 0; + if (wseg_ != null) { //Off Heap puts directly into mem + final int preBytes = CONST_PREAMBLE_LONGS << 3; + final int lgArrLongs = lgArrLongs_; + final long thetaLong = thetaLong_; + final HashIterator it = sketch.iterator(); + while (it.next()) { + final long hash = it.get(); + if (continueCondition(thetaLong, hash)) { continue; } + hashInsertOnlyMemory(wseg_, lgArrLongs, hash, preBytes); + tmpCnt++; + } + } else { //On Heap. Assumes HT exists and is large enough + final HashIterator it = sketch.iterator(); + while (it.next()) { + final long hash = it.get(); + if (continueCondition(thetaLong_, hash)) { continue; } + hashInsertOnly(hashTable_, lgArrLongs_, hash); + tmpCnt++; + } + } + assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count; + } + + private void hardReset() { + resetCommon(); + if (wseg_ != null) { + insertCurCount(wseg_, -1); //Universal Set + clearEmpty(wseg_); //false + } + curCount_ = -1; //Universal Set + empty_ = false; + } + + private void resetToEmpty() { + resetCommon(); + if (wseg_ != null) { + insertCurCount(wseg_, 0); + setEmpty(wseg_); //true + } + curCount_ = 0; + empty_ = true; + } + + private void resetCommon() { + if (wseg_ != null) { + if (readOnly_) { throw new SketchesReadOnlyException(); } + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS).fill((byte)0); + insertLgArrLongs(wseg_, ThetaUtil.MIN_LG_ARR_LONGS); + insertThetaLong(wseg_, Long.MAX_VALUE); + } + lgArrLongs_ = ThetaUtil.MIN_LG_ARR_LONGS; + thetaLong_ = Long.MAX_VALUE; + hashTable_ = null; + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java new file mode 100644 index 000000000..96ccb41e8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; + +import java.lang.foreign.MemorySegment; + +/* + * This is to uncompress serial version 4 sketch incrementally + */ +class MemoryCompactCompressedHashIterator implements HashIterator { + private MemorySegment seg; + private int offset; + private int entryBits; + private int numEntries; + private int index; + private long previous; + private int offsetBits; + private long[] buffer; + private byte[] bytes; + private boolean isBlockMode; + private boolean isFirstUnpack1; + + MemoryCompactCompressedHashIterator( + final MemorySegment srcSeg, + final int offset, + final int entryBits, + final int numEntries + ) { + this.seg = srcSeg; + this.offset = offset; + this.entryBits = entryBits; + this.numEntries = numEntries; + index = -1; + previous = 0; + offsetBits = 0; + buffer = new long[8]; + bytes = new byte[entryBits]; + isBlockMode = numEntries >= 8; + isFirstUnpack1 = true; + } + + @Override + public long get() { + return buffer[index & 7]; + } + + @Override + public boolean next() { + if (++index == numEntries) { return false; } + if (isBlockMode) { + if ((index & 7) == 0) { + if (numEntries - index >= 8) { + unpack8(); + } else { + isBlockMode = false; + unpack1(); + } + } + } else { + unpack1(); + } + return true; + } + + private void unpack1() { + if (isFirstUnpack1) { + MemorySegment.copy(seg, JAVA_BYTE, offset, bytes, 0, wholeBytesToHoldBits((numEntries - index) * entryBits)); + offset = 0; + isFirstUnpack1 = false; + } + final int i = index & 7; + BitPacking.unpackBits(buffer, i, entryBits, bytes, offset, offsetBits); + offset += (offsetBits + entryBits) >>> 3; + offsetBits = (offsetBits + entryBits) & 7; + buffer[i] += previous; + previous = buffer[i]; + } + + private void unpack8() { + MemorySegment.copy(seg, JAVA_BYTE, offset, bytes, 0, entryBits); + BitPacking.unpackBitsBlock8(buffer, 0, bytes, 0, entryBits); + offset += entryBits; + for (int i = 0; i < 8; i++) { + buffer[i] += previous; + previous = buffer[i]; + } + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java new file mode 100644 index 000000000..eb2137afd --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +/** + * @author Lee Rhodes + */ +class MemoryHashIterator implements HashIterator { + private MemorySegment seg; + private int arrLongs; + private long thetaLong; + private long offsetBytes; + private int index; + private long hash; + + MemoryHashIterator(final MemorySegment srcSeg, final int arrLongs, final long thetaLong) { + this.seg = srcSeg; + this.arrLongs = arrLongs; + this.thetaLong = thetaLong; + offsetBytes = PreambleUtil.extractPreLongs(srcSeg) << 3; + index = -1; + hash = 0; + } + + @Override + public long get() { + return hash; + } + + @Override + public boolean next() { + while (++index < arrLongs) { + hash = seg.get(JAVA_LONG_UNALIGNED, offsetBytes + (index << 3)); + if ((hash != 0) && (hash < thetaLong)) { + return true; + } + } + return false; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java new file mode 100644 index 000000000..55035a456 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java @@ -0,0 +1,533 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT; +import static java.lang.foreign.ValueLayout.JAVA_INT; +import static java.lang.foreign.ValueLayout.JAVA_LONG; +import static java.lang.foreign.ValueLayout.JAVA_SHORT; +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.zeroPad; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.ThetaUtil; + +//@formatter:off + +/** + * This class defines the preamble data structure and provides basic utilities for some of the key + * fields. + * + *

The intent of the design of this class was to isolate the detailed knowledge of the bit and + * byte layout of the serialized form of the sketches derived from the Sketch class into one place. + * This allows the possibility of the introduction of different serialization + * schemes with minimal impact on the rest of the library.

+ * + *

+ * MAP: Low significance bytes of this long data structure are on the right. However, the + * multi-byte integers (int and long) are stored in native byte order. The + * byte values are treated as unsigned.

+ * + *

An empty CompactSketch only requires 8 bytes. + * Flags: notSI, Ordered*, Compact, Empty*, ReadOnly, LE. + * (*) Earlier versions did not set these.

+ * + *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 1   |
+ * 
+ * + *

A SingleItemSketch (extends CompactSketch) requires an 8 byte preamble plus a single + * hash item of 8 bytes. Flags: SingleItem*, Ordered, Compact, notEmpty, ReadOnly, LE. + * (*) Earlier versions did not set these.

+ * + *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 1   |
+ *
+ *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+ *  1   ||---------------------------Single long hash----------------------------------------|
+ * 
+ * + *

An exact (non-estimating) CompactSketch requires 16 bytes of preamble plus a compact array of + * longs.

+ * + *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 2   |
+ *
+ *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+ *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
+ *  2   ||----------------------Start of Compact Long Array----------------------------------|
+ * 
+ * + *

An estimating CompactSketch requires 24 bytes of preamble plus a compact array of longs.

+ * + *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 3   |
+ *
+ *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+ *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
+ *  2   ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
+ *  3   ||----------------------Start of Compact Long Array----------------------------------|
+ *  
+ * + *

The UpdateSketch and AlphaSketch require 24 bytes of preamble followed by a non-compact + * array of longs representing a hash table.

+ * + *

The following table applies to both the Theta UpdateSketch and the Alpha Sketch

+ *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  |  LgArr |  lgNom | FamID  | SerVer | RF, PreLongs = 3   |
+ *
+ *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+ *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
+ *  2   ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
+ *  3   ||----------------------Start of Hash Table of longs---------------------------------|
+ *  
+ * + *

Union objects require 32 bytes of preamble plus a non-compact array of longs representing a + * hash table.

+ * + *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  |  LgArr |  lgNom | FamID  | SerVer | RF, PreLongs = 4   |
+ *
+ *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+ *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
+ *
+ *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
+ *  2   ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
+ *  3   ||---------------------------UNION THETA LONG----------------------------------------|
+ *
+ *      ||   39   |   38   |   37   |   36   |   35   |   34   |   33   |    32              |
+ *  4   ||----------------------Start of Hash Table of longs---------------------------------|
+ *
+ *  
+ * + * @author Lee Rhodes + */ +final class PreambleUtil { + + private PreambleUtil() {} + + // ###### DO NOT MESS WITH THIS FROM HERE ... + // Preamble byte Addresses + static final int PREAMBLE_LONGS_BYTE = 0; //lower 6 bits in byte. + static final int LG_RESIZE_FACTOR_BIT = 6; //upper 2 bits in byte. Not used by compact, direct + static final int SER_VER_BYTE = 1; + static final int FAMILY_BYTE = 2; //SerVer1,2 was SKETCH_TYPE_BYTE + static final int LG_NOM_LONGS_BYTE = 3; //not used by compact + static final int LG_ARR_LONGS_BYTE = 4; //not used by compact + static final int FLAGS_BYTE = 5; + static final int SEED_HASH_SHORT = 6; //byte 6,7 + static final int RETAINED_ENTRIES_INT = 8; //8 byte aligned + static final int P_FLOAT = 12; //4 byte aligned, not used by compact + static final int THETA_LONG = 16; //8-byte aligned + static final int UNION_THETA_LONG = 24; //8-byte aligned, only used by Union + + // flag bit masks + static final int BIG_ENDIAN_FLAG_MASK = 1; //SerVer 1, 2, 3 + static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. SerVer 1, 2, 3 + static final int EMPTY_FLAG_MASK = 4; //SerVer 2, 3 + static final int COMPACT_FLAG_MASK = 8; //SerVer 2 was NO_REBUILD_FLAG_MASK, 3 + static final int ORDERED_FLAG_MASK = 16;//SerVer 2 was UNORDERED_FLAG_MASK, 3 + static final int SINGLEITEM_FLAG_MASK = 32;//SerVer 3 + //The last 2 bits of the flags byte are reserved and assumed to be zero, for now. + + //Backward compatibility: SerVer1 preamble always 3 longs, SerVer2 preamble: 1, 2, 3 longs + // SKETCH_TYPE_BYTE 2 //SerVer1, SerVer2 + // V1, V2 types: Alpha = 1, QuickSelect = 2, SetSketch = 3; V3 only: Buffered QS = 4 + static final int LG_RESIZE_RATIO_BYTE_V1 = 5; //used by SerVer 1 + static final int FLAGS_BYTE_V1 = 6; //used by SerVer 1 + + //Other constants + static final int SER_VER = 3; + + // serial version 4 compressed ordered sketch, not empty, not single item + static final int ENTRY_BITS_BYTE_V4 = 3; // number of bits packed in deltas between hashes + static final int NUM_ENTRIES_BYTES_BYTE_V4 = 4; // number of bytes used for the number of entries + static final int THETA_LONG_V4 = 8; //8-byte aligned + + static final boolean NATIVE_ORDER_IS_BIG_ENDIAN = + (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); + + /** + * Computes the number of bytes required for an updatable sketch using a hash-table cache. + * This does not apply for compact sketches. + * @param lgArrLongs log2(current hash-table size) + * @param preambleLongs current preamble size + * @return the size in bytes + */ + static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { + return (8 << lgArrLongs) + (preambleLongs << 3); + } + + // STRINGS + + /** + * Returns a human readable string summary of the preamble state of the given byte array. + * Used primarily in testing. + * + * @param byteArr the given byte array. + * @return the summary preamble string. + */ + static String preambleToString(final byte[] byteArr) { + final MemorySegment seg = MemorySegment.ofArray(byteArr); + return preambleToString(seg); + } + + /** + * Returns a human readable string summary of the preamble state of the given Memory. + * Note: other than making sure that the given Memory size is large + * enough for just the preamble, this does not do much value checking of the contents of the + * preamble as this is primarily a tool for debugging the preamble visually. + * + * @param seg the given Memory. + * @return the summary preamble string. + */ + static String preambleToString(final MemorySegment seg) { + final int preLongs = getAndCheckPreLongs(seg); + final int rfId = extractLgResizeFactor(seg); + final ResizeFactor rf = ResizeFactor.getRF(rfId); + final int serVer = extractSerVer(seg); + final int familyId = extractFamilyID(seg); + final Family family = Family.idToFamily(familyId); + final int lgNomLongs = extractLgNomLongs(seg); + final int lgArrLongs = extractLgArrLongs(seg); + + //Flags + final int flags = extractFlags(seg); + final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + + zeroPad(Integer.toBinaryString(flags), 8); + final String nativeOrder = ByteOrder.nativeOrder().toString(); + final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0; + final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; + final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; + final boolean compact = (flags & COMPACT_FLAG_MASK) > 0; + final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0; + final boolean singleItem = (flags & SINGLEITEM_FLAG_MASK) > 0; //!empty && (preLongs == 1); + + final int seedHash = extractSeedHash(seg); + + //assumes preLongs == 1; empty or singleItem + int curCount = singleItem ? 1 : 0; + float p = (float) 1.0; //preLongs 1 or 2 + long thetaLong = Long.MAX_VALUE; //preLongs 1 or 2 + long thetaULong = thetaLong; //preLongs 1, 2 or 3 + + if (preLongs == 2) { //exact (non-estimating) CompactSketch + curCount = extractCurCount(seg); + p = extractP(seg); + } + else if (preLongs == 3) { //Update Sketch + curCount = extractCurCount(seg); + p = extractP(seg); + thetaLong = extractThetaLong(seg); + thetaULong = thetaLong; + } + else if (preLongs == 4) { //Union + curCount = extractCurCount(seg); + p = extractP(seg); + thetaLong = extractThetaLong(seg); + thetaULong = extractUnionThetaLong(seg); + } + //else the same as an empty sketch or singleItem + + final double thetaDbl = thetaLong / Util.LONG_MAX_VALUE_AS_DOUBLE; + final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16); + final double thetaUDbl = thetaULong / Util.LONG_MAX_VALUE_AS_DOUBLE; + final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16); + + final StringBuilder sb = new StringBuilder(); + sb.append(LS); + sb.append("### SKETCH PREAMBLE SUMMARY:").append(LS); + sb.append("Native Byte Order : ").append(nativeOrder).append(LS); + sb.append("Byte 0: Preamble Longs : ").append(preLongs).append(LS); + sb.append("Byte 0: ResizeFactor : ").append(rfId + ", " + rf.toString()).append(LS); + sb.append("Byte 1: Serialization Version: ").append(serVer).append(LS); + sb.append("Byte 2: Family : ").append(familyId + ", " + family.toString()).append(LS); + sb.append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS); + sb.append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS); + sb.append("Byte 5: Flags Field : ").append(flagsStr).append(LS); + sb.append(" Bit Flag Name : State:").append(LS); + sb.append(" 0 BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS); + sb.append(" 1 READ_ONLY : ").append(readOnly).append(LS); + sb.append(" 2 EMPTY : ").append(empty).append(LS); + sb.append(" 3 COMPACT : ").append(compact).append(LS); + sb.append(" 4 ORDERED : ").append(ordered).append(LS); + sb.append(" 5 SINGLE_ITEM : ").append(singleItem).append(LS); + sb.append("Bytes 6-7 : Seed Hash Hex : ").append(Integer.toHexString(seedHash)).append(LS); + if (preLongs == 1) { + sb.append(" --ABSENT FIELDS, ASSUMED:").append(LS); + sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); + sb.append("Bytes 12-15: P : ").append(p).append(LS); + sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); + sb.append(" Theta (long) : ").append(thetaLong).append(LS); + sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); + } + else if (preLongs == 2) { + sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); + sb.append("Bytes 12-15: P : ").append(p).append(LS); + sb.append(" --ABSENT, ASSUMED:").append(LS); + sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); + sb.append(" Theta (long) : ").append(thetaLong).append(LS); + sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); + } + else if (preLongs == 3) { + sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); + sb.append("Bytes 12-15: P : ").append(p).append(LS); + sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); + sb.append(" Theta (long) : ").append(thetaLong).append(LS); + sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); + } + else { //preLongs == 4 + sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); + sb.append("Bytes 12-15: P : ").append(p).append(LS); + sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); + sb.append(" Theta (long) : ").append(thetaLong).append(LS); + sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); + sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS); + sb.append(" ThetaU (long) : ").append(thetaULong).append(LS); + sb.append(" ThetaU (long,hex): ").append(thetaUHex).append(LS); + } + sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS); + sb.append( "Data Bytes : ").append(curCount * 8).append(LS); + sb.append( "TOTAL Sketch Bytes : ").append((preLongs + curCount) * 8).append(LS); + sb.append( "TOTAL Capacity Bytes : ").append(seg.byteSize()).append(LS); + sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS); + return sb.toString(); + } + + //@formatter:on + + static int extractPreLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + } + + static int extractLgResizeFactor(final MemorySegment seg) { + return (seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; + } + + static int extractLgResizeRatioV1(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_RESIZE_RATIO_BYTE_V1) & 0X3; + } + + static int extractSerVer(final MemorySegment seg) { + return seg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + } + + static int extractFamilyID(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; + } + + static int extractLgNomLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; + } + + static int extractLgArrLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; + } + + static int extractFlags(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; + } + + static int extractFlagsV1(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FLAGS_BYTE_V1) & 0XFF; + } + + static int extractSeedHash(final MemorySegment seg) { + return seg.get(JAVA_SHORT, SEED_HASH_SHORT) & 0XFFFF; + } + + static int extractCurCount(final MemorySegment seg) { + return seg.get(JAVA_INT, RETAINED_ENTRIES_INT); + } + + static float extractP(final MemorySegment seg) { + return seg.get(JAVA_FLOAT, P_FLOAT); + } + + static long extractThetaLong(final MemorySegment seg) { + return seg.get(JAVA_LONG, THETA_LONG); + } + + static long extractUnionThetaLong(final MemorySegment seg) { + return seg.get(JAVA_LONG, UNION_THETA_LONG); + } + + static int extractEntryBitsV4(final MemorySegment seg) { + return seg.get(JAVA_BYTE, ENTRY_BITS_BYTE_V4) & 0XFF; + } + + static int extractNumEntriesBytesV4(final MemorySegment seg) { + return seg.get(JAVA_BYTE, NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF; + } + + static long extractThetaLongV4(final MemorySegment seg) { + return seg.get(JAVA_LONG, THETA_LONG_V4); + } + + /** + * Sets PreLongs in the low 6 bits and sets LgRF in the upper 2 bits = 0. + * @param seg the target MemorySegment + * @param preLongs the given number of preamble longs + */ + static void insertPreLongs(final MemorySegment seg, final int preLongs) { + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); + } + + /** + * Sets the top 2 lgRF bits and does not affect the lower 6 bits (PreLongs). + * To work properly, this should be called after insertPreLongs(). + * @param seg the target MemorySegment + * @param rf the given lgRF bits + */ + static void insertLgResizeFactor(final MemorySegment seg, final int rf) { + final int curByte = seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0xFF; + final int shift = LG_RESIZE_FACTOR_BIT; // shift in bits + final int mask = 3; + final byte newByte = (byte) (((rf & mask) << shift) | (~(mask << shift) & curByte)); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, newByte); + } + + static void insertSerVer(final MemorySegment seg, final int serVer) { + seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) serVer); + } + + static void insertFamilyID(final MemorySegment seg, final int famId) { + seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) famId); + } + + static void insertLgNomLongs(final MemorySegment seg, final int lgNomLongs) { + seg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) lgNomLongs); + } + + static void insertLgArrLongs(final MemorySegment seg, final int lgArrLongs) { + seg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs); + } + + static void insertFlags(final MemorySegment seg, final int flags) { + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); + } + + static void insertSeedHash(final MemorySegment seg, final int seedHash) { + seg.set(JAVA_SHORT, SEED_HASH_SHORT, (short) seedHash); + } + + static void insertCurCount(final MemorySegment seg, final int curCount) { + seg.set(JAVA_INT, RETAINED_ENTRIES_INT, curCount); + } + + static void insertP(final MemorySegment seg, final float p) { + seg.set(JAVA_FLOAT, P_FLOAT, p); + } + + static void insertThetaLong(final MemorySegment seg, final long thetaLong) { + seg.set(JAVA_LONG, THETA_LONG, thetaLong); + } + + static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) { + seg.set(JAVA_LONG, UNION_THETA_LONG, unionThetaLong); + } + + static void setEmpty(final MemorySegment seg) { + int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; + flags |= EMPTY_FLAG_MASK; + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); + } + + static void clearEmpty(final MemorySegment seg) { + int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; + flags &= ~EMPTY_FLAG_MASK; + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); + } + + static boolean isEmptyFlag(final MemorySegment seg) { + return ((extractFlags(seg) & EMPTY_FLAG_MASK) > 0); + } + + /** + * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. + * @param seg the given MemorySegment + * @return the extracted prelongs value. + */ + static int getAndCheckPreLongs(final MemorySegment seg) { + final long cap = seg.byteSize(); + if (cap < 8) { + throwNotBigEnough(cap, 8); + } + final int preLongs = extractPreLongs(seg); + final int required = Math.max(preLongs << 3, 8); + if (cap < required) { + throwNotBigEnough(cap, required); + } + return preLongs; + } + + static final short checkMemorySeedHash(final MemorySegment seg, final long seed) { + final short seedHashMem = (short) extractSeedHash(seg); + ThetaUtil.checkSeedHashes(seedHashMem, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash + return seedHashMem; + } + + private static void throwNotBigEnough(final long cap, final int required) { + throw new SketchesArgumentException( + "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap + + ", Required: " + required); + } + + static int wholeBytesToHoldBits(final int bits) { + return (bits >>> 3) + ((bits & 7) > 0 ? 1 : 0); + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java new file mode 100644 index 000000000..5e92447f9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; +import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; +import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.HashOperations; + +/** + * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments. + * + *

NOTE: These operations copy data from the input MemorySegment into local arrays, perform the required operations on the + * arrays, and then copies the result to the destination MemorySegment. Attempting to perform these operations directly on the + * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are + * vectorized at the machine level and are quite fast. Measurements reveal that this is a good tradeoff.

+ * + * @author Lee Rhodes + */ +final class Rebuilder { + + private Rebuilder() {} + + /** + * Rebuild the hashTable in the given MemorySegment at its current size. Changes theta and thus count. + * This assumes a MemorySegment preamble of standard form with correct values of curCount and thetaLong. + * ThetaLong and curCount will change. + * Afterwards, caller must update local class members curCount and thetaLong from MemorySegment. + * + * @param seg the given MemorySegment + * @param preambleLongs size of preamble in longs + * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch + */ + static final void quickSelectAndRebuild(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { + + //Copy data from input segment into local buffer array for QS algorithm + final int lgArrLongs = extractLgArrLongs(seg); + final int arrLongs = 1 << lgArrLongs; + final long[] tmpArr = new long[arrLongs]; + final int preBytes = preambleLongs << 3; + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, tmpArr, 0, arrLongs); + + //Do the QuickSelect on a tmp arr to create new thetaLong + final int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS + final long newThetaLong = selectExcludingZeros(tmpArr, extractCurCount(seg), pivot); + insertThetaLong(seg, newThetaLong); //UPDATE thetaLong + + //Rebuild to clean up dirty data, update count + final long[] tgtArr = new long[arrLongs]; + final int newCurCount = + HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong); + insertCurCount(seg, newCurCount); //UPDATE curCount + + //put the rebuilt array back into MemorySegment + MemorySegment.copy(tgtArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, arrLongs); + } + + /** + * Moves me (the entire updatable sketch) to a new larger MemorySegment location and rebuilds the hash table. + * This assumes a MemorySegment preamble of standard form with the correct value of thetaLong. + * Afterwards, the caller must update the local MemorySegment reference, lgArrLongs + * and hashTableThreshold from the dstMemory and free the source MemorySegment. + * + * @param srcSeg the source MemorySegment + * @param preambleLongs size of preamble in longs + * @param srcLgArrLongs size (log_base2) of source hash table + * @param dstSeg the destination MemorySegment, which may be garbage + * @param dstLgArrLongs the destination hash table target size + * @param thetaLong theta as a long + */ + static final void moveAndResize(final MemorySegment srcSeg, final int preambleLongs, + final int srcLgArrLongs, final MemorySegment dstSeg, final int dstLgArrLongs, final long thetaLong) { + + //Move Preamble to destination MemorySegment + final int preBytes = preambleLongs << 3; + MemorySegment.copy(srcSeg, 0, dstSeg, 0, preBytes); + + //Bulk copy source Hash Table to local buffer array + final int srcHTLen = 1 << srcLgArrLongs; + final long[] srcHTArr = new long[srcHTLen]; + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); + + //Create destination buffer + final int dstHTLen = 1 << dstLgArrLongs; + final long[] dstHTArr = new long[dstHTLen]; + + //Rebuild hash table in destination buffer + HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong); + + //Bulk copy to destination MemorySegment + MemorySegment.copy(dstHTArr, 0, dstSeg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); + dstSeg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update lgArrLongs in dstSeg + } + + /** + * Resizes existing hash array into a larger one within a single MemorySegment, assuming enough space. + * This assumes a preamble of standard form with the correct value of thetaLong. + * The lgArrLongs will change. + * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold + * from the returned MemorySegment. + * + * @param seg the source and destination MemorySegment + * @param preambleLongs the size of the preamble in longs + * @param srcLgArrLongs the size of the source hash table + * @param tgtLgArrLongs the LgArrLongs value for the new hash table + */ + static final void resize(final MemorySegment seg, final int preambleLongs, + final int srcLgArrLongs, final int tgtLgArrLongs) { + + //Preamble stays in place + final int preBytes = preambleLongs << 3; + + //Bulk copy source to on-heap buffer + final int srcHTLen = 1 << srcLgArrLongs; //current value + final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer + //seg.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); + + //Create destination on-heap buffer + final int dstHTLen = 1 << tgtLgArrLongs; + final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer + + //Rebuild hash table in destination buffer + HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, extractThetaLong(seg)); + + //Bulk copy to destination memory + MemorySegment.copy(dstHTArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); + insertLgArrLongs(seg, tgtLgArrLongs); //update in mem + } + + /** + * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be + * an integer value between zero and the given lgRF, inclusive; + * @param capBytes the current memory capacity in bytes + * @param lgArrLongs the current lg hash table size in longs + * @param preLongs the current preamble size in longs + * @param lgRF the configured lg Resize Factor + * @return the actual log2 Resize Factor that can be used to grow the hash table + */ + static final int actLgResizeFactor(final long capBytes, final int lgArrLongs, final int preLongs, + final int lgRF) { + final int maxHTLongs = Util.floorPowerOf2(((int)(capBytes >>> 3) - preLongs)); + final int lgFactor = Math.max(Integer.numberOfTrailingZeros(maxHTLongs) - lgArrLongs, 0); + return (lgFactor >= lgRF) ? lgRF : lgFactor; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java new file mode 100644 index 000000000..5c959cde6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.common.Family.idToFamily; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The parent API for all Set Operations + * + * @author Lee Rhodes + */ +public abstract class SetOperation { + static final int CONST_PREAMBLE_LONGS = 3; + + /** + * Constructor + */ + SetOperation() {} + + /** + * Makes a new builder + * + * @return a new builder + */ + public static final SetOperationBuilder builder() { + return new SetOperationBuilder(); + } + + /** + * Heapify takes the SetOperations image in MemorySegment and instantiates an on-heap + * SetOperation using the + * Default Update Seed. + * The resulting SetOperation will not retain any link to the source MemorySegment. + * + *

Note: Only certain set operators during stateful operations can be serialized and thus + * heapified.

+ * + * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. + * @return a Heap-based SetOperation from the given MemorySegment + */ + public static SetOperation heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify takes the SetOperation image in MemorySegment and instantiates an on-heap + * SetOperation using the given expectedSeed. + * The resulting SetOperation will not retain any link to the source MemorySegment. + * + *

Note: Only certain set operators during stateful operations can be serialized and thus + * heapified.

+ * + * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return a Heap-based SetOperation from the given MemorySegment + */ + public static SetOperation heapify(final MemorySegment srcSeg, final long expectedSeed) { + final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); + final Family family = idToFamily(famID); + switch (family) { + case UNION : { + return UnionImpl.heapifyInstance(srcSeg, expectedSeed); + } + case INTERSECTION : { + return IntersectionImpl.heapifyInstance(srcSeg, expectedSeed); + } + default: { + throw new SketchesArgumentException("SetOperation cannot heapify family: " + + family.toString()); + } + } + } + + /** + * Wrap takes the SetOperation image in MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * This method assumes the + * Default Update Seed. + * + *

Note: Only certain set operators during stateful operations can be serialized and thus + * wrapped.

+ * + * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. + * @return a SetOperation backed by the given MemorySegment + */ + public static SetOperation wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap takes the SetOperation image in MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * + *

Note: Only certain set operators during stateful operations can be serialized and thus + * wrapped.

+ * + * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return a SetOperation backed by the given MemorySegment + */ + public static SetOperation wrap(final MemorySegment srcSeg, final long expectedSeed) { + final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); + final Family family = idToFamily(famID); + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + switch (family) { + case UNION : { + return UnionImpl.wrapInstance(srcSeg, expectedSeed); + } + case INTERSECTION : { + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, true); + } + default: + throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString()); + } + } + + /** + * Returns the maximum required storage bytes given a nomEntries parameter for Union operations + * @param nomEntries Nominal Entries + * This will become the ceiling power of 2 if it is not. + * @return the maximum required storage bytes given a nomEntries parameter + */ + public static int getMaxUnionBytes(final int nomEntries) { + final int nomEnt = ceilingPowerOf2(nomEntries); + return (nomEnt << 4) + (Family.UNION.getMaxPreLongs() << 3); + } + + /** + * Returns the maximum required storage bytes given a nomEntries parameter for Intersection + * operations + * @param nomEntries Nominal Entries + * This will become the ceiling power of 2 if it is not. + * @return the maximum required storage bytes given a nomEntries parameter + */ + public static int getMaxIntersectionBytes(final int nomEntries) { + final int nomEnt = ceilingPowerOf2(nomEntries); + final int bytes = (nomEnt << 4) + (Family.INTERSECTION.getMaxPreLongs() << 3); + return bytes; + } + + /** + * Returns the maximum number of bytes for the returned CompactSketch, given the + * value of nomEntries of the first sketch A of AnotB. + * @param nomEntries this value must be a power of 2. + * @return the maximum number of bytes. + */ + public static int getMaxAnotBResultBytes(final int nomEntries) { + final int ceil = ceilingPowerOf2(nomEntries); + return 24 + (15 * ceil); + } + + /** + * Gets the Family of this SetOperation + * @return the Family of this SetOperation + */ + public abstract Family getFamily(); + + //restricted + + /** + * Gets the hash array in compact form. + * This is only useful during stateful operations. + * This should never be made public. + * @return the hash array + */ + abstract long[] getCache(); + + /** + * Gets the current count of retained entries. + * This is only useful during stateful operations. + * Intentionally not made public because behavior will be confusing to end user. + * + * @return Gets the current count of retained entries. + */ + abstract int getRetainedEntries(); + + /** + * Returns the seedHash established during class construction. + * @return the seedHash. + */ + abstract short getSeedHash(); + + /** + * Gets the current value of ThetaLong. + * Only useful during stateful operations. + * Intentionally not made public because behavior will be confusing to end user. + * @return the current value of ThetaLong. + */ + abstract long getThetaLong(); + + /** + * Returns true if this object's internal data is backed by a Memory object, + * which may be on-heap or off-heap. + * @return true if this object's internal data is backed by a Memory object. + */ + public boolean hasMemorySegment() { return false; } + + /** + * Returns true if this object's internal data is backed by an off-heap MemorySegment. + * @return true if this object's internal data is backed by an off-heap MemorySegment. + */ + public boolean isDirect() { return false; } + + /** + * Returns true if this set operator is empty. + * Only useful during stateful operations. + * Intentionally not made public because behavior will be confusing to end user. + * @return true if this set operator is empty. + */ + abstract boolean isEmpty(); + + /** + * Returns true if the two given MemorySegments refer to the same backing resource, + * which is either an off-heap memory location and size, or the same on-heap array object. + * + *

This is a convenient delegate of + * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}

+ * + * @param seg1 The first given MemorySegment + * @param seg2 The second given MemorySegment + * @return true if both MemorySegments are determined to be the same backing memory. + */ + public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { + return Util.isSameResource(seg1, seg2); + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java new file mode 100644 index 000000000..cf64326b8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.TAB; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * For building a new SetOperation. + * + * @author Lee Rhodes + */ +public class SetOperationBuilder { + private int bLgNomLongs; + private long bSeed; + private ResizeFactor bRF; + private float bP; + + /** + * Constructor for building a new SetOperation. The default configuration is + *
    + *
  • Max Nominal Entries (max K): + * {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
  • + *
  • Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
  • + *
  • {@link ResizeFactor#X8}
  • + *
  • Input Sampling Probability: 1.0
  • + *
  • Memory: null
  • + *
+ */ + public SetOperationBuilder() { + bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); + bSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + bP = (float) 1.0; + bRF = ResizeFactor.X8; + } + + /** + * Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a + * Set Operation can be less than max K, but never greater. + * The minimum value is 16 and the maximum value is 67,108,864, which is 2^26. + * @param nomEntries Nominal Entries + * This will become the ceiling power of 2 if it is not a power of 2. + * @return this SetOperationBuilder + */ + public SetOperationBuilder setNominalEntries(final int nomEntries) { + bLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries)); + if ((bLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) { + throw new SketchesArgumentException("Nominal Entries must be >= 16 and <= 67108864: " + + nomEntries); + } + return this; + } + + /** + * Alternative method of setting the Nominal Entries for this set operation from the log_base2 value. + * The minimum value is 4 and the maximum value is 26. + * Be aware that set operations as large as this maximum value may not have been + * thoroughly characterized for performance. + * + * @param lgNomEntries the log_base2 Nominal Entries. + * @return this SetOperationBuilder + */ + public SetOperationBuilder setLogNominalEntries(final int lgNomEntries) { + bLgNomLongs = ThetaUtil.checkNomLongs(1 << lgNomEntries); + return this; + } + + /** + * Returns Log-base 2 Nominal Entries + * @return Log-base 2 Nominal Entries + */ + public int getLgNominalEntries() { + return bLgNomLongs; + } + + /** + * Sets the long seed value that is require by the hashing function. + * @param seed See seed + * @return this SetOperationBuilder + */ + public SetOperationBuilder setSeed(final long seed) { + bSeed = seed; + return this; + } + + /** + * Returns the seed + * @return the seed + */ + public long getSeed() { + return bSeed; + } + + /** + * Sets the upfront uniform sampling probability, p. Although this functionality is + * implemented for Unions only, it rarely makes sense to use it. The proper use of upfront + * sampling is when building the sketches. + * @param p See Sampling Probability, p + * @return this SetOperationBuilder + */ + public SetOperationBuilder setP(final float p) { + if ((p <= 0.0) || (p > 1.0)) { + throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p); + } + bP = p; + return this; + } + + /** + * Returns the pre-sampling probability p + * @return the pre-sampling probability p + */ + public float getP() { + return bP; + } + + /** + * Sets the cache Resize Factor + * @param rf See Resize Factor + * @return this SetOperationBuilder + */ + public SetOperationBuilder setResizeFactor(final ResizeFactor rf) { + bRF = rf; + return this; + } + + /** + * Returns the Resize Factor + * @return the Resize Factor + */ + public ResizeFactor getResizeFactor() { + return bRF; + } + + /** + * Returns a SetOperation with the current configuration of this Builder and the given Family. + * @param family the chosen SetOperation family + * @return a SetOperation + */ + public SetOperation build(final Family family) { + return build(family, null); + } + + /** + * Returns a SetOperation with the current configuration of this Builder, the given Family + * and the given destination memory. Note that the destination MemorySegment cannot be used with AnotB. + * @param family the chosen SetOperation family + * @param dstSeg The destination MemorySegment. + * @return a SetOperation + */ + public SetOperation build(final Family family, final MemorySegment dstSeg) { + SetOperation setOp = null; + switch (family) { + case UNION: { + if (dstSeg == null) { + setOp = UnionImpl.initNewHeapInstance(bLgNomLongs, bSeed, bP, bRF); + } + else { + setOp = UnionImpl.initNewDirectInstance(bLgNomLongs, bSeed, bP, bRF, dstSeg); + } + break; + } + case INTERSECTION: { + if (dstSeg == null) { + setOp = IntersectionImpl.initNewHeapInstance(bSeed); + } + else { + setOp = IntersectionImpl.initNewDirectInstance(bSeed, dstSeg); + } + break; + } + case A_NOT_B: { + if (dstSeg == null) { + setOp = new AnotBimpl(bSeed); + } + else { + throw new SketchesArgumentException( + "AnotB can not be persisted."); + } + break; + } + default: + throw new SketchesArgumentException( + "Given Family cannot be built as a SetOperation: " + family.toString()); + } + return setOp; + } + + /** + * Convenience method, returns a configured SetOperation Union with + * Default Nominal Entries + * @return a Union object + */ + public Union buildUnion() { + return (Union) build(Family.UNION); + } + + /** + * Convenience method, returns a configured SetOperation Union with + * Default Nominal Entries + * and the given destination MemorySegment. + * @param dstSeg The destination MemorySegment. + * @return a Union object + */ + public Union buildUnion(final MemorySegment dstSeg) { + return (Union) build(Family.UNION, dstSeg); + } + + /** + * Convenience method, returns a configured SetOperation Intersection with + * Default Nominal Entries + * @return an Intersection object + */ + public Intersection buildIntersection() { + return (Intersection) build(Family.INTERSECTION); + } + + /** + * Convenience method, returns a configured SetOperation Intersection with + * Default Nominal Entries + * and the given destination memory. + * @param dstSeg The destination Memory. + * @return an Intersection object + */ + public Intersection buildIntersection(final MemorySegment dstSeg) { + return (Intersection) build(Family.INTERSECTION, dstSeg); + } + + /** + * Convenience method, returns a configured SetOperation ANotB with + * Default Update Seed + * @return an ANotB object + */ + public AnotB buildANotB() { + return (AnotB) build(Family.A_NOT_B); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("SetOperationBuilder configuration:").append(LS); + sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS); + sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS); + sb.append("Seed:").append(TAB).append(bSeed).append(LS); + sb.append("p:").append(TAB).append(bP).append(LS); + sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java new file mode 100644 index 000000000..2659df84b --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java @@ -0,0 +1,413 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.datasketches.common.ByteArrayUtil.putLongLE; +import static org.apache.datasketches.hash.MurmurHash3.hash; +import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * A CompactSketch that holds only one item hash. + * + * @author Lee Rhodes + */ +final class SingleItemSketch extends CompactSketch { + private static final long DEFAULT_SEED_HASH = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED) & 0xFFFFL; + + // For backward compatibility, a candidate pre0_ long must have: + // Flags (byte 5): Ordered, Compact, NOT Empty, Read Only, LittleEndian = 11010 = 0x1A. + // Flags mask will be 0x1F. + // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now. + // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3, + // and the hash seed matches, it is virtually guaranteed that we have a SingleItem Sketch. + + private static final long PRE0_LO6_SI = 0X00_00_3A_00_00_03_03_01L; //with SI flag + private long pre0_ = 0; + private long hash_ = 0; + + //Internal Constructor. All checking & hashing has been done, assumes default seed + private SingleItemSketch(final long hash) { + pre0_ = (DEFAULT_SEED_HASH << 48) | PRE0_LO6_SI; + hash_ = hash; + } + + //All checking & hashing has been done, given the relevant seed + SingleItemSketch(final long hash, final long seed) { + final long seedHash = ThetaUtil.computeSeedHash(seed) & 0xFFFFL; + pre0_ = (seedHash << 48) | PRE0_LO6_SI; + hash_ = hash; + } + + //All checking & hashing has been done, given the relevant seedHash + SingleItemSketch(final long hash, final short seedHash) { + final long seedH = seedHash & 0xFFFFL; + pre0_ = (seedH << 48) | PRE0_LO6_SI; + hash_ = hash; + } + + /** + * Creates a SingleItemSketch on the heap given a SingleItemSketch MemorySegment image and a seedHash. + * Checks the seed hash of the given MemorySegment against the given seedHash. + * @param srcSeg the MemorySegment to be heapified. + * @param expectedSeedHash the given seedHash to be checked against the srcMem seedHash + * @return a SingleItemSketch + */ //does not override Sketch + static SingleItemSketch heapify(final MemorySegment srcSeg, final short expectedSeedHash) { + ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), expectedSeedHash); + final boolean singleItem = otherCheckForSingleItem(srcSeg); + if (singleItem) { return new SingleItemSketch(srcSeg.get(JAVA_LONG_UNALIGNED, 8), expectedSeedHash); } + throw new SketchesArgumentException("Input MemorySegment is not a SingleItemSketch."); + } + + @Override + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg == null) { return this; } + else { + dstSeg.set(JAVA_LONG_UNALIGNED, 0, pre0_); + dstSeg.set(JAVA_LONG_UNALIGNED, 8, hash_); + return new DirectCompactSketch(dstSeg); + } + } + + //Create methods using the default seed + + /** + * Create this sketch with a long. + * + * @param datum The given long datum. + * @return a SingleItemSketch + */ + static SingleItemSketch create(final long datum) { + final long[] data = { datum }; + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + /** + * Create this sketch with the given double (or float) datum. + * The double will be converted to a long using Double.doubleToLongBits(datum), + * which normalizes all NaN values to a single NaN representation. + * Plus and minus zero will be normalized to plus zero. + * The special floating-point values NaN and +/- Infinity are treated as distinct. + * + * @param datum The given double datum. + * @return a SingleItemSketch + */ + static SingleItemSketch create(final double datum) { + final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 + final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + /** + * Create this sketch with the given String. + * The string is converted to a byte array using UTF8 encoding. + * If the string is null or empty no create attempt is made and the method returns null. + * + *

Note: this will not produce the same hash values as the {@link #create(char[])} + * method and will generally be a little slower depending on the complexity of the UTF8 encoding. + *

+ * + * @param datum The given String. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final String datum) { + if ((datum == null) || datum.isEmpty()) { return null; } + final byte[] data = datum.getBytes(UTF_8); + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + /** + * Create this sketch with the given byte array. + * If the byte array is null or empty no create attempt is made and the method returns null. + * + * @param data The given byte array. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final byte[] data) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + /** + * Create this sketch with the given char array. + * If the char array is null or empty no create attempt is made and the method returns null. + * + *

Note: this will not produce the same output hash values as the {@link #create(String)} + * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

+ * + * @param data The given char array. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final char[] data) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + /** + * Create this sketch with the given integer array. + * If the integer array is null or empty no create attempt is made and the method returns null. + * + * @param data The given int array. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final int[] data) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + /** + * Create this sketch with the given long array. + * If the long array is null or empty no create attempt is made and the method returns null. + * + * @param data The given long array. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final long[] data) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + } + + //Updates with a user specified seed + + /** + * Create this sketch with a long and a seed. + * + * @param datum The given long datum. + * @param seed used to hash the given value. + * @return a SingleItemSketch + */ + static SingleItemSketch create(final long datum, final long seed) { + final long[] data = { datum }; + return new SingleItemSketch(hash(data, seed)[0] >>> 1); + } + + /** + * Create this sketch with the given double (or float) datum and a seed. + * The double will be converted to a long using Double.doubleToLongBits(datum), + * which normalizes all NaN values to a single NaN representation. + * Plus and minus zero will be normalized to plus zero. + * The special floating-point values NaN and +/- Infinity are treated as distinct. + * + * @param datum The given double datum. + * @param seed used to hash the given value. + * @return a SingleItemSketch + */ + static SingleItemSketch create(final double datum, final long seed) { + final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 + final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms + return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); + } + + /** + * Create this sketch with the given String and a seed. + * The string is converted to a byte array using UTF8 encoding. + * If the string is null or empty no create attempt is made and the method returns null. + * + *

Note: this will not produce the same output hash values as the {@link #create(char[])} + * method and will generally be a little slower depending on the complexity of the UTF8 encoding. + *

+ * + * @param datum The given String. + * @param seed used to hash the given value. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final String datum, final long seed) { + if ((datum == null) || datum.isEmpty()) { return null; } + final byte[] data = datum.getBytes(UTF_8); + return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); + } + + /** + * Create this sketch with the given byte array and a seed. + * If the byte array is null or empty no create attempt is made and the method returns null. + * + * @param data The given byte array. + * @param seed used to hash the given value. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final byte[] data, final long seed) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); + } + + /** + * Create this sketch with the given char array and a seed. + * If the char array is null or empty no create attempt is made and the method returns null. + * + *

Note: this will not produce the same output hash values as the {@link #create(String)} + * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

+ * + * @param data The given char array. + * @param seed used to hash the given value. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final char[] data, final long seed) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); + } + + /** + * Create this sketch with the given integer array and a seed. + * If the integer array is null or empty no create attempt is made and the method returns null. + * + * @param data The given int array. + * @param seed used to hash the given value. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final int[] data, final long seed) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); + } + + /** + * Create this sketch with the given long array (as an item) and a seed. + * If the long array is null or empty no create attempt is made and the method returns null. + * + * @param data The given long array. + * @param seed used to hash the given value. + * @return a SingleItemSketch or null + */ + static SingleItemSketch create(final long[] data, final long seed) { + if ((data == null) || (data.length == 0)) { return null; } + return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); + } + + //Sketch + + @Override //much faster + public int getCountLessThanThetaLong(final long thetaLong) { + return (hash_ < thetaLong) ? 1 : 0; + } + + @Override + public int getCurrentBytes() { + return 16; + } + + @Override + public double getEstimate() { + return 1.0; + } + + @Override + public HashIterator iterator() { + return new HeapCompactHashIterator(new long[] { hash_ }); + } + + @Override + public double getLowerBound(final int numStdDev) { + return 1.0; + } + + @Override + public int getRetainedEntries(final boolean valid) { + return 1; + } + + @Override + public long getThetaLong() { + return Long.MAX_VALUE; + } + + @Override + public double getUpperBound(final int numStdDev) { + return 1.0; + } + + @Override + public boolean isEmpty() { + return false; + } + + @Override + public boolean isOrdered() { + return true; + } + + @Override + public byte[] toByteArray() { + final byte[] out = new byte[16]; + putLongLE(out, 0, pre0_); + putLongLE(out, 8, hash_); + return out; + } + + //restricted methods + + @Override + long[] getCache() { + return new long[] { hash_ }; + } + + @Override + int getCompactPreambleLongs() { + return 1; + } + + @Override + int getCurrentPreambleLongs() { + return 1; + } + + @Override + MemorySegment getMemorySegment() { + return null; + } + + @Override + short getSeedHash() { + return (short) (pre0_ >>> 48); + } + + static final boolean otherCheckForSingleItem(final MemorySegment seg) { + return otherCheckForSingleItem(extractPreLongs(seg), extractSerVer(seg), + extractFamilyID(seg), extractFlags(seg) ); + } + + static final boolean otherCheckForSingleItem(final int preLongs, final int serVer, + final int famId, final int flags) { + // Flags byte: SI=X, Ordered=T, Compact=T, Empty=F, ReadOnly=T, BigEndian=F = X11010 = 0x1A. + // Flags mask will be 0x1F. + // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now. + // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3, + // and the hash seed matches (not done here), it is virtually guaranteed that we have a + // SingleItem Sketch. + final boolean numPreLongs = preLongs == 1; + final boolean numSerVer = serVer >= 3; + final boolean numFamId = famId == Family.COMPACT.getID(); + final boolean numFlags = (flags & 0x1F) == 0x1A; //no SI, yet + final boolean singleFlag = (flags & SINGLEITEM_FLAG_MASK) > 0; + return (numPreLongs && numSerVer && numFamId && numFlags) || singleFlag; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java new file mode 100644 index 000000000..3c5650a91 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -0,0 +1,695 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.common.Family.idToFamily; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.common.Util.zeroPad; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.apache.datasketches.thetacommon.HashOperations.count; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.BinomialBoundsN; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The top-level class for all theta sketches. This class is never constructed directly. + * Use the UpdateSketch.builder() methods to create UpdateSketches. + * + * @author Lee Rhodes + */ +public abstract class Sketch { + + Sketch() {} + + //public static factory constructor-type methods + + /** + * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. + * + *

The resulting sketch will not retain any link to the source MemorySegment.

+ * + *

For Update Sketches this method checks if the + * Default Update Seed

+ * was used to create the source MemorySegment image. + * + *

For Compact Sketches this method assumes that the sketch image was created with the + * correct hash seed, so it is not checked.

+ * + * @param srcSeg an image of a Sketch. + * + * @return a Sketch on the heap. + */ + public static Sketch heapify(final MemorySegment srcSeg) { + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); + final Family family = idToFamily(familyID); + if (family == Family.COMPACT) { + return CompactSketch.heapify(srcSeg); + } + return heapifyUpdateFromMemory(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. + * + *

The resulting sketch will not retain any link to the source MemorySegment.

+ * + *

For Update and Compact Sketches this method checks if the given expectedSeed was used to + * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

+ * + * @param srcSeg an image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * Compact sketches store a 16-bit hash of the seed, but not the seed itself. + * @return a Sketch on the heap. + */ + public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); + final Family family = idToFamily(familyID); + if (family == Family.COMPACT) { + return CompactSketch.heapify(srcSeg, expectedSeed); + } + return heapifyUpdateFromMemory(srcSeg, expectedSeed); + } + + /** + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + *

Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a on-heap CompactSketch + * where all data will be copied to the heap. These early versions were never designed to + * "wrap".

+ * + *

Wrapping any subclass of this class that is empty or contains only a single item will + * result in on-heap equivalent forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.

+ * + *

For Update Sketches this method checks if the + * Default Update Seed

+ * was used to create the source MemorySegment image. + * + *

For Compact Sketches this method assumes that the sketch image was created with the + * correct hash seed, so it is not checked.

+ * + * @param srcSeg an image of a Sketch. + * See Memory. + * @return a Sketch backed by the given MemorySegment + */ + public static Sketch wrap(final MemorySegment srcSeg) { + final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; + final Family family = Family.idToFamily(familyID); + if (family == Family.QUICKSELECT) { + if (serVer == 3 && preLongs == 3) { + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } else { + throw new SketchesArgumentException( + "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); + } + } + if (family == Family.COMPACT) { + return CompactSketch.wrap(srcSeg); + } + throw new SketchesArgumentException( + "Cannot wrap family: " + family + " as a Sketch"); + } + + /** + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. + * There is no data copying onto the java heap. + * The wrap operation enables fast read-only merging and access to all the public read-only API. + * + *

Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as direct sketches can be wrapped. + * Wrapping earlier serial version sketches will result in a on-heap CompactSketch + * where all data will be copied to the heap. These early versions were never designed to + * "wrap".

+ * + *

Wrapping any subclass of this class that is empty or contains only a single item will + * result in on-heap equivalent forms of empty and single item sketch respectively. + * This is actually faster and consumes less overall memory.

+ * + *

For Update and Compact Sketches this method checks if the given expectedSeed was used to + * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

+ * + * @param srcSeg a MemorySegment with an image of a Sketch. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return a UpdateSketch backed by the given Memory except as above. + */ + public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; + final Family family = Family.idToFamily(familyID); + if (family == Family.QUICKSELECT) { + if (serVer == 3 && preLongs == 3) { + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed); + } else { + throw new SketchesArgumentException( + "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); + } + } + if (family == Family.COMPACT) { + return CompactSketch.wrap(srcSeg, expectedSeed); + } + throw new SketchesArgumentException( + "Cannot wrap family: " + family + " as a Sketch"); + } + + //Sketch interface + + /** + * Converts this sketch to a ordered CompactSketch. + * + *

If this.isCompact() == true this method returns this, + * otherwise, this method is equivalent to + * {@link #compact(boolean, MemorySegment) compact(true, null)}. + * + *

A CompactSketch is always immutable.

+ * + * @return this sketch as an ordered CompactSketch. + */ + public CompactSketch compact() { + return (this.isCompact()) ? (CompactSketch)this : compact(true, null); + } + + /** + * Convert this sketch to a CompactSketch. + * + *

If this sketch is a type of UpdateSketch, the compacting process converts the hash table + * of the UpdateSketch to a simple list of the valid hash values. + * Any hash values of zero or equal-to or greater than theta will be discarded. + * The number of valid values remaining in the CompactSketch depends on a number of factors, + * but may be larger or smaller than Nominal Entries (or k). + * It will never exceed 2k. + * If it is critical to always limit the size to no more than k, + * then rebuild() should be called on the UpdateSketch prior to calling this method.

+ * + *

A CompactSketch is always immutable.

+ * + *

A new CompactSketch object is created:

+ *
  • if dstMem != null
  • + *
  • if dstMem == null and this.hasMemory() == true
  • + *
  • if dstMem == null and this has more than 1 item and this.isOrdered() == false + * and dstOrdered == true.
  • + *
+ * + *

Otherwise, this operation returns this.

+ * + * @param dstOrdered assumed true if this sketch is empty or has only one value + * See Destination Ordered + * + * @param dstSeg + * See Destination MemorySegment. + * + * @return this sketch as a CompactSketch. + */ + public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); + + /** + * Returns the number of storage bytes required for this Sketch if its current state were + * compacted. It this sketch is already in the compact form this is equivalent to + * calling {@link #getCurrentBytes()}. + * @return number of compact bytes + */ + public abstract int getCompactBytes(); + + /** + * Gets the number of hash values less than the given theta expressed as a long. + * @param thetaLong the given theta as a long between zero and Long.MAX_VALUE. + * @return the number of hash values less than the given thetaLong. + */ + public int getCountLessThanThetaLong(final long thetaLong) { + return count(getCache(), thetaLong); + } + + /** + * Returns the number of storage bytes required for this sketch in its current state. + * + * @return the number of storage bytes required for this sketch + */ + public abstract int getCurrentBytes(); + + /** + * Gets the unique count estimate. + * @return the sketch's best estimate of the cardinality of the input stream. + */ + public abstract double getEstimate(); + + /** + * Returns the Family that this sketch belongs to + * @return the Family that this sketch belongs to + */ + public abstract Family getFamily(); + + /** + * Gets the approximate lower error bound given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @return the lower bound. + */ + public double getLowerBound(final int numStdDev) { + return isEstimationMode() + ? lowerBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty()) + : getRetainedEntries(true); + } + + /** + * Returns the maximum number of storage bytes required for a CompactSketch with the given + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. + * @return the maximum number of storage bytes required for a CompactSketch with the given number + * of retained entries. + */ + public static int getMaxCompactSketchBytes(final int numberOfEntries) { + if (numberOfEntries == 0) { return 8; } + if (numberOfEntries == 1) { return 16; } + return (numberOfEntries << 3) + 24; + } + + /** + * Returns the maximum number of storage bytes required for a CompactSketch given the configured + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries + * @return the maximum number of storage bytes required for a CompactSketch with the given + * lgNomEntries. + */ + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD + + Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES; + } + + /** + * Returns the maximum number of storage bytes required for an UpdateSketch with the given + * number of nominal entries (power of 2). + * @param nomEntries Nominal Entries + * This will become the ceiling power of 2 if it is not. + * @return the maximum number of storage bytes required for a UpdateSketch with the given + * nomEntries + */ + public static int getMaxUpdateSketchBytes(final int nomEntries) { + final int nomEnt = ceilingPowerOf2(nomEntries); + return (nomEnt << 4) + (Family.QUICKSELECT.getMaxPreLongs() << 3); + } + + /** + * Returns the number of valid entries that have been retained by the sketch. + * @return the number of valid retained entries + */ + public int getRetainedEntries() { + return getRetainedEntries(true); + } + + /** + * Returns the number of entries that have been retained by the sketch. + * @param valid if true, returns the number of valid entries, which are less than theta and used + * for estimation. + * Otherwise, return the number of all entries, valid or not, that are currently in the internal + * sketch cache. + * @return the number of retained entries + */ + public abstract int getRetainedEntries(boolean valid); + + /** + * Returns the serialization version from the given MemorySegment + * @param seg the sketch MemorySegment + * @return the serialization version from the MemorySegment + */ + public static int getSerializationVersion(final MemorySegment seg) { + return seg.get(JAVA_BYTE, SER_VER_BYTE); + } + + /** + * Gets the value of theta as a double with a value between zero and one + * @return the value of theta as a double + */ + public double getTheta() { + return getThetaLong() / LONG_MAX_VALUE_AS_DOUBLE; + } + + /** + * Gets the value of theta as a long + * @return the value of theta as a long + */ + public abstract long getThetaLong(); + + /** + * Gets the approximate upper error bound given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @return the upper bound. + */ + public double getUpperBound(final int numStdDev) { + return isEstimationMode() + ? upperBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty()) + : getRetainedEntries(true); + } + + /** + * Returns true if this object's internal data is backed by a MemorySegment object, + * which may be on-heap or off-heap. + * @return true if this object's internal data is backed by a MemorySegment object. + */ + public boolean hasMemorySegment() { return false; } + + /** + * Returns true if this sketch is in compact form. + * @return true if this sketch is in compact form. + */ + public abstract boolean isCompact(); + + /** + * Returns true if this object's internal data is backed by an off-heap MemorySegment. + * @return true if this object's internal data is backed by an off-heap MemorySegment. + */ + public boolean isDirect() { return false; } + + /** + * See Empty + * @return true if empty. + */ + public abstract boolean isEmpty(); + + /** + * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode). + * This is true if theta < 1.0 AND isEmpty() is false. + * @return true if the sketch is in estimation mode. + */ + public boolean isEstimationMode() { + return estMode(getThetaLong(), isEmpty()); + } + + /** + * Returns true if internal cache is ordered + * @return true if internal cache is ordered + */ + public abstract boolean isOrdered(); + + /** + * Returns true if the two given MemorySegments refer to the same backing resource, + * which is either an off-heap memory location and size, or the same on-heap array object. + * + *

This is a convenient delegate of + * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}

+ * + * @param seg1 The first given MemorySegment + * @param seg2 The second given MemorySegment + * @return true if both MemorySegments are determined to be the same backing memory. + */ + public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { + return Util.isSameResource(seg1, seg2); + } + + /** + * Returns a HashIterator that can be used to iterate over the retained hash values of the + * Theta sketch. + * @return a HashIterator that can be used to iterate over the retained hash values of the + * Theta sketch. + */ + public abstract HashIterator iterator(); + + /** + * Serialize this sketch to a byte array form. + * @return byte array of this sketch + */ + public abstract byte[] toByteArray(); + + /** + * Returns a human readable summary of the sketch. This method is equivalent to the parameterized + * call:
+ * Sketch.toString(sketch, true, false, 8, true); + * @return summary + */ + @Override + public String toString() { + return toString(true, false, 8, true); + } + + /** + * Gets a human readable listing of contents and summary of the given sketch. + * This can be a very long string. If this sketch is in a "dirty" state there + * may be values in the dataDetail view that are ≥ theta. + * + * @param sketchSummary If true the sketch summary will be output at the end. + * @param dataDetail If true, includes all valid hash values in the sketch. + * @param width The number of columns of hash values. Default is 8. + * @param hexMode If true, hashes will be output in hex. + * @return The result string, which can be very long. + */ + public String toString(final boolean sketchSummary, final boolean dataDetail, final int width, + final boolean hexMode) { + final StringBuilder sb = new StringBuilder(); + + int nomLongs = 0; + int arrLongs = 0; + float p = 0; + int rf = 0; + final boolean updateSketch = this instanceof UpdateSketch; + + final long thetaLong = getThetaLong(); + final int curCount = this.getRetainedEntries(true); + + if (updateSketch) { + final UpdateSketch uis = (UpdateSketch)this; + nomLongs = 1 << uis.getLgNomLongs(); + arrLongs = 1 << uis.getLgArrLongs(); + p = uis.getP(); + rf = uis.getResizeFactor().getValue(); + } + + if (dataDetail) { + final int w = width > 0 ? width : 8; // default is 8 wide + if (curCount > 0) { + sb.append("### SKETCH DATA DETAIL"); + final HashIterator it = iterator(); + int j = 0; + while (it.next()) { + final long h = it.get(); + if (j % w == 0) { + sb.append(LS).append(String.format(" %6d", j + 1)); + } + if (hexMode) { + sb.append(" " + zeroPad(Long.toHexString(h), 16) + ","); + } + else { + sb.append(String.format(" %20d,", h)); + } + j++ ; + } + sb.append(LS).append("### END DATA DETAIL").append(LS + LS); + } + } + + if (sketchSummary) { + final double thetaDbl = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; + final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16); + final String thisSimpleName = this.getClass().getSimpleName(); + final int seedHash = Short.toUnsignedInt(getSeedHash()); + + sb.append(LS); + sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS); + if (updateSketch) { + sb.append(" Nominal Entries (k) : ").append(nomLongs).append(LS); + } + sb.append(" Estimate : ").append(getEstimate()).append(LS); + sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); + sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); + if (updateSketch) { + sb.append(" p : ").append(p).append(LS); + } + sb.append(" Theta (double) : ").append(thetaDbl).append(LS); + sb.append(" Theta (long) : ").append(thetaLong).append(LS); + sb.append(" Theta (long) hex : ").append(thetaHex).append(LS); + sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); + sb.append(" Empty? : ").append(isEmpty()).append(LS); + sb.append(" Ordered? : ").append(isOrdered()).append(LS); + if (updateSketch) { + sb.append(" Resize Factor : ").append(rf).append(LS); + sb.append(" Array Size Entries : ").append(arrLongs).append(LS); + } + sb.append(" Retained Entries : ").append(curCount).append(LS); + sb.append(" Seed Hash : ").append(Integer.toHexString(seedHash)) + .append(" | ").append(seedHash).append(LS); + sb.append("### END SKETCH SUMMARY").append(LS); + + } + return sb.toString(); + } + + /** + * Returns a human readable string of the preamble of a byte array image of a Theta Sketch. + * @param byteArr the given byte array + * @return a human readable string of the preamble of a byte array image of a Theta Sketch. + */ + public static String toString(final byte[] byteArr) { + return PreambleUtil.preambleToString(byteArr); + } + + /** + * Returns a human readable string of the preamble of a MemorySegment image of a Theta Sketch. + * @param mem the given Memory object + * @return a human readable string of the preamble of a MemorySegment image of a Theta Sketch. + */ + public static String toString(final MemorySegment mem) { + return PreambleUtil.preambleToString(mem); + } + + //Restricted methods + + /** + * Gets the internal cache array. For on-heap sketches this will return a reference to the actual + * cache array. For Memory-based sketches this returns a copy. + * @return the internal cache array. + */ + abstract long[] getCache(); + + /** + * Gets preamble longs if stored in compact form. If this sketch is already in compact form, + * this is identical to the call {@link #getCurrentPreambleLongs()}. + * @return preamble longs if stored in compact form. + */ + abstract int getCompactPreambleLongs(); + + /** + * Gets the number of data longs if stored in current state. + * @return the number of data longs if stored in current state. + */ + abstract int getCurrentDataLongs(); + + /** + * Returns preamble longs if stored in current state. + * @return number of preamble longs if stored. + */ + abstract int getCurrentPreambleLongs(); + + /** + * Returns the backing MemorySegment object if it exists, otherwise null. + * @return the backing MemorySegment object if it exists, otherwise null. + */ + abstract MemorySegment getMemorySegment(); + + /** + * Gets the 16-bit seed hash + * @return the seed hash + */ + abstract short getSeedHash(); + + /** + * Returns true if given Family id is one of the theta sketches + * @param id the given Family id + * @return true if given Family id is one of the theta sketches + */ + static final boolean isValidSketchID(final int id) { + return id == Family.ALPHA.getID() + || id == Family.QUICKSELECT.getID() + || id == Family.COMPACT.getID(); + } + + /** + * Checks Ordered and Compact flags for integrity between sketch and Memory + * @param sketch the given sketch + */ + static final void checkSketchAndMemoryFlags(final Sketch sketch) { + final MemorySegment seg = sketch.getMemorySegment(); + if (seg == null) { return; } + final int flags = PreambleUtil.extractFlags(seg); + if ((flags & COMPACT_FLAG_MASK) > 0 ^ sketch.isCompact()) { + throw new SketchesArgumentException("Possible corruption: " + + "MemorySegment Compact Flag inconsistent with Sketch"); + } + if ((flags & ORDERED_FLAG_MASK) > 0 ^ sketch.isOrdered()) { + throw new SketchesArgumentException("Possible corruption: " + + "MemorySegment Ordered Flag inconsistent with Sketch"); + } + } + + static final double estimate(final long thetaLong, final int curCount) { + return curCount * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong); + } + + static final double lowerBound(final int curCount, final long thetaLong, final int numStdDev, + final boolean empty) { + final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; + return BinomialBoundsN.getLowerBound(curCount, theta, numStdDev, empty); + } + + static final double upperBound(final int curCount, final long thetaLong, final int numStdDev, + final boolean empty) { + final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; + return BinomialBoundsN.getUpperBound(curCount, theta, numStdDev, empty); + } + + private static final boolean estMode(final long thetaLong, final boolean empty) { + return thetaLong < Long.MAX_VALUE && !empty; + } + + /** + * Instantiates a Heap Update Sketch from MemorySegment. Only SerVer3. SerVer 1 & 2 already handled. + * @param srcSeg the source MemorySegment + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return a Sketch + */ + private static final Sketch heapifyUpdateFromMemory(final MemorySegment srcSeg, final long expectedSeed) { + final long cap = srcSeg.byteSize(); + if (cap < 8) { + throw new SketchesArgumentException( + "Corrupted: valid sketch must be at least 8 bytes."); + } + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); + final Family family = idToFamily(familyID); + + if (family == Family.ALPHA) { + final int flags = PreambleUtil.extractFlags(srcSeg); + final boolean compactFlag = (flags & COMPACT_FLAG_MASK) != 0; + if (compactFlag) { + throw new SketchesArgumentException( + "Corrupted: ALPHA family image: cannot be compact"); + } + return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); + } + if (family == Family.QUICKSELECT) { + return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); + } + throw new SketchesArgumentException( + "Sketch cannot heapify family: " + family + " as a Sketch"); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/Union.java b/src/main/java/org/apache/datasketches/theta2/Union.java new file mode 100644 index 000000000..861857366 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/Union.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import org.apache.datasketches.common.Family; + +/** + * Compute the union of two or more theta sketches. + * A new instance represents an empty set. + * + * @author Lee Rhodes + */ +public abstract class Union extends SetOperation { + + /** + * Returns the number of storage bytes required for this union in its current state. + * + * @return the number of storage bytes required for this union in its current state. + */ + public abstract int getCurrentBytes(); + + @Override + public Family getFamily() { + return Family.UNION; + } + + /** + * Returns the maximum required storage bytes for this union. + * @return the maximum required storage bytes for this union. + */ + public abstract int getMaxUnionBytes(); + + /** + * Gets the result of this operation as an ordered CompactSketch on the Java heap. + * This does not disturb the underlying data structure of the union. + * Therefore, it is OK to continue updating the union after this operation. + * @return the result of this operation as an ordered CompactSketch on the Java heap + */ + public abstract CompactSketch getResult(); + + /** + * Gets the result of this operation as a CompactSketch of the chosen form. + * This does not disturb the underlying data structure of the union. + * Therefore, it is OK to continue updating the union after this operation. + * + * @param dstOrdered + * See Destination Ordered + * + * @param dstSeg destination MemorySegment + * + * @return the result of this operation as a CompactSketch of the chosen form + */ + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); + + /** + * Resets this Union. The seed remains intact, everything else reverts back to its virgin state. + */ + public abstract void reset(); + + /** + * Returns a byte array image of this Union object + * @return a byte array image of this Union object + */ + public abstract byte[] toByteArray(); + + /** + * This implements a stateless, pair-wise union operation. The returned sketch will be cut back to + * the smaller of the two k values if required. + * + *

Nulls and empty sketches are ignored.

+ * + * @param sketchA The first argument + * @param sketchB The second argument + * @return the result ordered CompactSketch on the heap. + */ + public CompactSketch union(final Sketch sketchA, final Sketch sketchB) { + return union(sketchA, sketchB, true, null); + } + + /** + * This implements a stateless, pair-wise union operation. The returned sketch will be cut back to + * k if required, similar to the regular Union operation. + * + *

Nulls and empty sketches are ignored.

+ * + * @param sketchA The first argument + * @param sketchB The second argument + * @param dstOrdered If true, the returned CompactSketch will be ordered. + * @param dstSeg If not null, the returned CompactSketch will be placed in this MemorySegment. + * @return the result CompactSketch. + */ + public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstOrdered, + MemorySegment dstSeg); + + /** + * Perform a Union operation with this union and the given on-heap sketch of the Theta Family. + * This method is not valid for the older SetSketch, which was prior to Open Source (August, 2015). + * + *

This method can be repeatedly called. + * + *

Nulls and empty sketches are ignored.

+ * + * @param sketchIn The incoming sketch. + */ + public abstract void union(Sketch sketchIn); + + /** + * Perform a Union operation with this union and the given MemorySegment image of any sketch of the + * Theta Family. The input image may be from earlier versions of the Theta Compact Sketch, + * called the SetSketch (circa 2014), which was prior to Open Source and are compact and ordered. + * + *

This method can be repeatedly called. + * + *

Nulls and empty sketches are ignored.

+ * + * @param seg MemorySegment image of sketch to be merged + */ + public abstract void union(MemorySegment seg); + + /** + * Update this union with the given long data item. + * + * @param datum The given long datum. + */ + public abstract void update(long datum); + + /** + * Update this union with the given double (or float) data item. + * The double will be converted to a long using Double.doubleToLongBits(datum), + * which normalizes all NaN values to a single NaN representation. + * Plus and minus zero will be normalized to plus zero. + * Each of the special floating-point values NaN and +/- Infinity are treated as distinct. + * + * @param datum The given double datum. + */ + public abstract void update(double datum); + + /** + * Update this union with the with the given String data item. + * The string is converted to a byte array using UTF8 encoding. + * If the string is null or empty no update attempt is made and the method returns. + * + *

Note: this will not produce the same output hash values as the {@link #update(char[])} + * method and will generally be a little slower depending on the complexity of the UTF8 encoding. + *

+ * + *

Note: this is not a Sketch Union operation. This treats the given string as a data item.

+ * + * @param datum The given String. + */ + public abstract void update(String datum); + + /** + * Update this union with the given byte array item. + * If the byte array is null or empty no update attempt is made and the method returns. + * + *

Note: this is not a Sketch Union operation. This treats the given byte array as a data + * item.

+ * + * @param data The given byte array. + */ + public abstract void update(byte[] data); + + /** + * Update this union with the given ByteBuffer item. + * If the ByteBuffer is null or empty no update attempt is made and the method returns. + * + *

Note: this is not a Sketch Union operation. This treats the given ByteBuffer as a data + * item.

+ * + * @param data The given ByteBuffer. + */ + public abstract void update(ByteBuffer data); + + /** + * Update this union with the given integer array item. + * If the integer array is null or empty no update attempt is made and the method returns. + * + *

Note: this is not a Sketch Union operation. This treats the given integer array as a data + * item.

+ * + * @param data The given int array. + */ + public abstract void update(int[] data); + + /** + * Update this union with the given char array item. + * If the char array is null or empty no update attempt is made and the method returns. + * + *

Note: this will not produce the same output hash values as the {@link #update(String)} + * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

+ * + *

Note: this is not a Sketch Union operation. This treats the given char array as a data + * item.

+ * + * @param data The given char array. + */ + public abstract void update(char[] data); + + /** + * Update this union with the given long array item. + * If the long array is null or empty no update attempt is made and the method returns. + * + *

Note: this is not a Sketch Union operation. This treats the given char array as a data + * item.

+ * + * @param data The given long array. + */ + public abstract void update(long[] data); + +} diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java new file mode 100644 index 000000000..a86365c7d --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java @@ -0,0 +1,365 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.UNION_THETA_LONG; +import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractUnionThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong; +import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Shared code for the HeapUnion and DirectUnion implementations. + * + * @author Lee Rhodes + * @author Kevin Lang + */ +final class UnionImpl extends Union { + + /** + * Although the gadget object is initially an UpdateSketch, in the context of a Union it is used + * as a specialized buffer that happens to leverage much of the machinery of an UpdateSketch. + * However, in this context some of the key invariants of the sketch algorithm are intentionally + * violated as an optimization. As a result this object can not be considered as an UpdateSketch + * and should never be exported as an UpdateSketch. It's internal state is not necessarily + * finalized and may contain garbage. Also its internal concept of "nominal entries" or "k" can + * be meaningless. It is private for very good reasons. + */ + private final UpdateSketch gadget_; + private final short expectedSeedHash_; //eliminates having to compute the seedHash on every union. + private long unionThetaLong_; //when on-heap, this is the only copy + private boolean unionEmpty_; //when on-heap, this is the only copy + + private UnionImpl(final UpdateSketch gadget, final long seed) { + gadget_ = gadget; + expectedSeedHash_ = ThetaUtil.computeSeedHash(seed); + } + + /** + * Construct a new Union SetOperation on the java heap. + * Called by SetOperationBuilder. + * + * @param lgNomLongs See lgNomLongs + * @param seed See seed + * @param p See Sampling Probability, p + * @param rf See Resize Factor + * @return instance of this sketch + */ + static UnionImpl initNewHeapInstance( + final int lgNomLongs, + final long seed, + final float p, + final ResizeFactor rf) { + final UpdateSketch gadget = //create with UNION family + new HeapQuickSelectSketch(lgNomLongs, seed, p, rf, true); + final UnionImpl unionImpl = new UnionImpl(gadget, seed); + unionImpl.unionThetaLong_ = gadget.getThetaLong(); + unionImpl.unionEmpty_ = gadget.isEmpty(); + return unionImpl; + } + + /** + * Construct a new Direct Union in the off-heap destination MemorySegment. + * Called by SetOperationBuilder. + * + * @param lgNomLongs See lgNomLongs. + * @param seed See seed + * @param p See Sampling Probability, p + * @param rf See Resize Factor + * @param memReqSvr a given instance of a MemoryRequestServer + * @param dstSeg the given MemorySegment object destination. It will be cleared prior to use. + * @return this class + */ + static UnionImpl initNewDirectInstance( + final int lgNomLongs, + final long seed, + final float p, + final ResizeFactor rf, + final MemorySegment dstSeg) { + final UpdateSketch gadget = //create with UNION family + new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, dstSeg, true); + final UnionImpl unionImpl = new UnionImpl(gadget, seed); + unionImpl.unionThetaLong_ = gadget.getThetaLong(); + unionImpl.unionEmpty_ = gadget.isEmpty(); + return unionImpl; + } + + /** + * Heapify a Union from a MemorySegment Union object containing data. + * Called by SetOperation. + * @param srcSeg The source MemorySegment Union object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); + final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); + return unionImpl; + } + + /** + * Fast-wrap a Union object around a Union MemorySegment object containing data. + * This does NO validity checking of the given MemorySegment. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed); + final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); + return unionImpl; + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * Called by SetOperation. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed); + final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); + return unionImpl; + } + + @Override + public int getCurrentBytes() { + return gadget_.getCurrentBytes(); + } + + @Override + public int getMaxUnionBytes() { + final int lgK = gadget_.getLgNomLongs(); + return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3); + } + + @Override + public CompactSketch getResult() { + return getResult(true, null); + } + + @Override + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { + final int gadgetCurCount = gadget_.getRetainedEntries(true); + final int k = 1 << gadget_.getLgNomLongs(); + final long[] gadgetCacheCopy = + gadget_.hasMemorySegment() ? gadget_.getCache() : gadget_.getCache().clone(); + + //Pull back to k + final long curGadgetThetaLong = gadget_.getThetaLong(); + final long adjGadgetThetaLong = gadgetCurCount > k + ? selectExcludingZeros(gadgetCacheCopy, gadgetCurCount, k + 1) : curGadgetThetaLong; + + //Finalize Theta and curCount + final long unionThetaLong = gadget_.hasMemorySegment() + ? gadget_.getMemorySegment().get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG) + : unionThetaLong_; + + final long minThetaLong = min(min(curGadgetThetaLong, adjGadgetThetaLong), unionThetaLong); + final int curCountOut = minThetaLong < curGadgetThetaLong + ? HashOperations.count(gadgetCacheCopy, minThetaLong) + : gadgetCurCount; + + //Compact the cache + final long[] compactCacheOut = + CompactOperations.compactCache(gadgetCacheCopy, curCountOut, minThetaLong, dstOrdered); + final boolean empty = gadget_.isEmpty() && unionEmpty_; + final short seedHash = gadget_.getSeedHash(); + return CompactOperations.componentsToCompact( + minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstSeg, compactCacheOut); + } + + @Override + public boolean hasMemorySegment() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.hasMemorySegment() : false; + } + + @Override + public boolean isDirect() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.isDirect() : false; + } + + @Override + public void reset() { + gadget_.reset(); + unionThetaLong_ = gadget_.getThetaLong(); + unionEmpty_ = gadget_.isEmpty(); + } + + @Override + public byte[] toByteArray() { + final byte[] gadgetByteArr = gadget_.toByteArray(); + final MemorySegment seg = MemorySegment.ofArray(gadgetByteArr); + insertUnionThetaLong(seg, unionThetaLong_); + if (gadget_.isEmpty() != unionEmpty_) { + clearEmpty(seg); + unionEmpty_ = false; + } + return gadgetByteArr; + } + + @Override //Stateless Union + public CompactSketch union(final Sketch sketchA, final Sketch sketchB, final boolean dstOrdered, + final MemorySegment dstSeg) { + reset(); + union(sketchA); + union(sketchB); + final CompactSketch csk = getResult(dstOrdered, dstSeg); + reset(); + return csk; + } + + @Override + public void union(final Sketch sketchIn) { + //UNION Empty Rule: AND the empty states. + + if (sketchIn == null || sketchIn.isEmpty()) { + //null and empty is interpreted as (Theta = 1.0, count = 0, empty = T). Nothing changes + return; + } + //sketchIn is valid and not empty + ThetaUtil.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); + if (sketchIn instanceof SingleItemSketch) { + gadget_.hashUpdate(sketchIn.getCache()[0]); + return; + } + Sketch.checkSketchAndMemoryFlags(sketchIn); + + unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule + unionEmpty_ = false; + final boolean isOrdered = sketchIn.isOrdered(); + final HashIterator it = sketchIn.iterator(); + while (it.next()) { + final long hash = it.get(); + if (hash < unionThetaLong_ && hash < gadget_.getThetaLong()) { + gadget_.hashUpdate(hash); // backdoor update, hash function is bypassed + } else { + if (isOrdered) { break; } + } + } + unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //Theta rule with gadget + if (gadget_.hasMemorySegment()) { + final MemorySegment wseg = gadget_.getMemorySegment(); + PreambleUtil.insertUnionThetaLong(wseg, unionThetaLong_); + PreambleUtil.clearEmpty(wseg); + } + } + + @Override + public void union(final MemorySegment seg) { + if (seg != null) { + union(Sketch.wrap(seg)); + } + } + + @Override + public void update(final long datum) { + gadget_.update(datum); + } + + @Override + public void update(final double datum) { + gadget_.update(datum); + } + + @Override + public void update(final String datum) { + gadget_.update(datum); + } + + @Override + public void update(final byte[] data) { + gadget_.update(data); + } + + @Override + public void update(final ByteBuffer data) { + gadget_.update(data); + } + + @Override + public void update(final char[] data) { + gadget_.update(data); + } + + @Override + public void update(final int[] data) { + gadget_.update(data); + } + + @Override + public void update(final long[] data) { + gadget_.update(data); + } + + //Restricted + + @Override + long[] getCache() { + return gadget_.getCache(); + } + + @Override + int getRetainedEntries() { + return gadget_.getRetainedEntries(true); + } + + @Override + short getSeedHash() { + return gadget_.getSeedHash(); + } + + @Override + long getThetaLong() { + return min(unionThetaLong_, gadget_.getThetaLong()); + } + + @Override + boolean isEmpty() { + return gadget_.isEmpty() && unionEmpty_; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java b/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java new file mode 100644 index 000000000..6c12ca7c2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +/** + * See Update Return State + * + * @author Lee Rhodes + */ +public enum UpdateReturnState { + + /** + * The hash was accepted into the sketch and the retained count was incremented. + */ + InsertedCountIncremented, //all UpdateSketches + + /** + * The hash was accepted into the sketch, the retained count was incremented. + * The current cache was out of room and resized larger based on the Resize Factor. + */ + InsertedCountIncrementedResized, //used by HeapQuickSelectSketch + + /** + * The hash was accepted into the sketch, the retained count was incremented. + * The current cache was out of room and at maximum size, so the cache was rebuilt. + */ + InsertedCountIncrementedRebuilt, //used by HeapQuickSelectSketch + + /** + * The hash was accepted into the sketch and the retained count was not incremented. + */ + InsertedCountNotIncremented, //used by enhancedHashInsert for Alpha + + /** + * The hash was inserted into the local concurrent buffer, + * but has not yet been propagated to the concurrent shared sketch. + */ + ConcurrentBufferInserted, //used by ConcurrentHeapThetaBuffer + + /** + * The hash has been propagated to the concurrent shared sketch. + * This does not reflect the action taken by the shared sketch. + */ + ConcurrentPropagated, //used by ConcurrentHeapThetaBuffer + + /** + * The hash was rejected as a duplicate. + */ + RejectedDuplicate, //all UpdateSketches hashUpdate(), enhancedHashInsert + + /** + * The hash was rejected because it was null or empty. + */ + RejectedNullOrEmpty, //UpdateSketch.update(arr[]) + + /** + * The hash was rejected because the value was negative, zero or + * greater than theta. + */ + RejectedOverTheta; //all UpdateSketches.hashUpdate() + +} diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java new file mode 100644 index 000000000..ee5f93ea2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java @@ -0,0 +1,469 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; +import static org.apache.datasketches.common.Util.checkBounds; +import static org.apache.datasketches.hash.MurmurHash3.hash; +import static org.apache.datasketches.theta2.CompactOperations.componentsToCompact; +import static org.apache.datasketches.theta2.PreambleUtil.BIG_ENDIAN_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.checkMemorySeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; +import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; +import static org.apache.datasketches.theta2.PreambleUtil.extractP; +import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; +import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; +import static org.apache.datasketches.theta2.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta2.UpdateReturnState.RejectedNullOrEmpty; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.util.Objects; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * The parent class for the Update Sketch families, such as QuickSelect and Alpha. + * The primary task of an Update Sketch is to consider datums presented via the update() methods + * for inclusion in its internal cache. This is the sketch building process. + * + * @author Lee Rhodes + */ +public abstract class UpdateSketch extends Sketch { + + UpdateSketch() {} + + /** + * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto + * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as writable, direct objects can be wrapped. This method assumes the + * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * Default Update Seed. + * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash. + * It must have a size of at least 24 bytes. + * @return an UpdateSketch backed by the given MemorySegment + */ + public static UpdateSketch wrap(final MemorySegment srcWSeg) { + return wrap(srcWSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap takes the sketch image in MemorySegment and refers to it directly. There is no data copying onto + * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have + * been explicitly stored as writable direct objects can be wrapped. + * An attempt to "wrap" earlier version sketches will result in a "heapified", normal + * Java Heap version of the sketch where all data will be copied to the heap. + * @param srcWSeg an image of a writable sketch where the image seed hash matches the given seed hash. + * It must have a size of at least 24 bytes. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * Compact sketches store a 16-bit hash of the seed, but not the seed itself. + * @return a UpdateSketch backed by the given MemorySegment + */ + public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expectedSeed) { + Objects.requireNonNull(srcWSeg, "Source MemorySeg e t must not be null"); + checkBounds(0, 24, srcWSeg.byteSize()); //need min 24 bytes + final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; + final Family family = Family.idToFamily(familyID); + if (family != Family.QUICKSELECT) { + throw new SketchesArgumentException( + "A " + family + " sketch cannot be wrapped as an UpdateSketch."); + } + if ((serVer == 3) && (preLongs == 3)) { + return DirectQuickSelectSketch.writableWrap(srcWSeg, expectedSeed); + } else { + throw new SketchesArgumentException( + "Corrupted: An UpdateSketch image must have SerVer = 3 and preLongs = 3"); + } + } + + /** + * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the + * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * @param srcSeg the given MemorySegment with a sketch image. + * It must have a size of at least 24 bytes. + * @return an UpdateSketch + */ + public static UpdateSketch heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Instantiates an on-heap UpdateSketch from a MemorySegment. + * @param srcSeg the given MemorySegment. + * It must have a size of at least 24 bytes. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See Update Hash Seed. + * @return an UpdateSketch + */ + public static UpdateSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + Objects.requireNonNull(srcSeg, "Source Memory must not be null"); + checkBounds(0, 24, srcSeg.byteSize()); //need min 24 bytes + final Family family = Family.idToFamily(srcSeg.get(JAVA_BYTE, FAMILY_BYTE)); + if (family.equals(Family.ALPHA)) { + return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); + } + return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); + } + + //Sketch interface + + @Override + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { + return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), + false, false, dstOrdered, dstWSeg, getCache()); + } + + @Override + public int getCompactBytes() { + final int preLongs = getCompactPreambleLongs(); + final int dataLongs = getRetainedEntries(true); + return (preLongs + dataLongs) << 3; + } + + @Override + int getCurrentDataLongs() { + return 1 << getLgArrLongs(); + } + + @Override + public boolean isCompact() { + return false; + } + + @Override + public boolean isOrdered() { + return false; + } + + //UpdateSketch interface + + /** + * Returns a new builder + * @return a new builder + */ + public static final UpdateSketchBuilder builder() { + return new UpdateSketchBuilder(); + } + + /** + * Returns the configured ResizeFactor + * @return the configured ResizeFactor + */ + public abstract ResizeFactor getResizeFactor(); + + /** + * Gets the configured sampling probability, p. + * See Sampling Probability, p + * @return the sampling probability, p + */ + abstract float getP(); + + /** + * Gets the configured seed + * @return the configured seed + */ + abstract long getSeed(); + + /** + * Resets this sketch back to a virgin empty state. + */ + public abstract void reset(); + + /** + * Rebuilds the hash table to remove dirty values or to reduce the size + * to nominal entries. + * @return this sketch + */ + public abstract UpdateSketch rebuild(); + + /** + * Present this sketch with a long. + * + * @param datum The given long datum. + * @return + * See Update Return State + */ + public UpdateReturnState update(final long datum) { + final long[] data = { datum }; + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given double (or float) datum. + * The double will be converted to a long using Double.doubleToLongBits(datum), + * which normalizes all NaN values to a single NaN representation. + * Plus and minus zero will be normalized to plus zero. + * The special floating-point values NaN and +/- Infinity are treated as distinct. + * + * @param datum The given double datum. + * @return + * See Update Return State + */ + public UpdateReturnState update(final double datum) { + final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 + final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN & +/- infinity forms + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given String. + * The string is converted to a byte array using UTF8 encoding. + * If the string is null or empty no update attempt is made and the method returns. + * + *

Note: this will not produce the same output hash values as the {@link #update(char[])} + * method and will generally be a little slower depending on the complexity of the UTF8 encoding. + *

+ * + * @param datum The given String. + * @return + * See Update Return State + */ + public UpdateReturnState update(final String datum) { + if ((datum == null) || datum.isEmpty()) { + return RejectedNullOrEmpty; + } + final byte[] data = datum.getBytes(UTF_8); + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given byte array. + * If the byte array is null or empty no update attempt is made and the method returns. + * + * @param data The given byte array. + * @return + * See Update Return State + */ + public UpdateReturnState update(final byte[] data) { + if ((data == null) || (data.length == 0)) { + return RejectedNullOrEmpty; + } + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given ByteBuffer + * If the ByteBuffer is null or empty, no update attempt is made and the method returns. + * + * @param buffer the input ByteBuffer + * @return + * See Update Return State + */ + public UpdateReturnState update(final ByteBuffer buffer) { + if (buffer == null || buffer.hasRemaining() == false) { + return RejectedNullOrEmpty; + } + return hashUpdate(hash(buffer, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given char array. + * If the char array is null or empty no update attempt is made and the method returns. + * + *

Note: this will not produce the same output hash values as the {@link #update(String)} + * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

+ * + * @param data The given char array. + * @return + * See Update Return State + */ + public UpdateReturnState update(final char[] data) { + if ((data == null) || (data.length == 0)) { + return RejectedNullOrEmpty; + } + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given integer array. + * If the integer array is null or empty no update attempt is made and the method returns. + * + * @param data The given int array. + * @return + * See Update Return State + */ + public UpdateReturnState update(final int[] data) { + if ((data == null) || (data.length == 0)) { + return RejectedNullOrEmpty; + } + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + /** + * Present this sketch with the given long array. + * If the long array is null or empty no update attempt is made and the method returns. + * + * @param data The given long array. + * @return + * See Update Return State + */ + public UpdateReturnState update(final long[] data) { + if ((data == null) || (data.length == 0)) { + return RejectedNullOrEmpty; + } + return hashUpdate(hash(data, getSeed())[0] >>> 1); + } + + //restricted methods + + /** + * All potential updates converge here. + * + *

Don't ever call this unless you really know what you are doing!

+ * + * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. + * A negative hash value will throw an exception. + * @return See Update Return State + */ + abstract UpdateReturnState hashUpdate(long hash); + + /** + * Gets the Log base 2 of the current size of the internal cache + * @return the Log base 2 of the current size of the internal cache + */ + abstract int getLgArrLongs(); + + /** + * Gets the Log base 2 of the configured nominal entries + * @return the Log base 2 of the configured nominal entries + */ + public abstract int getLgNomLongs(); + + /** + * Returns true if the internal cache contains "dirty" values that are greater than or equal + * to thetaLong. + * @return true if the internal cache is dirty. + */ + abstract boolean isDirty(); + + /** + * Returns true if numEntries (curCount) is greater than the hashTableThreshold. + * @param numEntries the given number of entries (or current count). + * @return true if numEntries (curCount) is greater than the hashTableThreshold. + */ + abstract boolean isOutOfSpace(int numEntries); + + static void checkUnionQuickSelectFamily(final MemorySegment seg, final int preambleLongs, + final int lgNomLongs) { + //Check Family + final int familyID = extractFamilyID(seg); //byte 2 + final Family family = Family.idToFamily(familyID); + if (family.equals(Family.UNION)) { + if (preambleLongs != Family.UNION.getMinPreLongs()) { + throw new SketchesArgumentException( + "Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs); + } + } + else if (family.equals(Family.QUICKSELECT)) { + if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) { + throw new SketchesArgumentException( + "Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs); + } + } else { + throw new SketchesArgumentException( + "Possible corruption: Invalid Family: " + family.toString()); + } + + //Check lgNomLongs + if (lgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS) { + throw new SketchesArgumentException( + "Possible corruption: Current Memory lgNomLongs < min required size: " + + lgNomLongs + " < " + ThetaUtil.MIN_LG_NOM_LONGS); + } + } + + static void checkMemIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs, + final int lgNomLongs, final int lgArrLongs) { + + //Check SerVer + final int serVer = extractSerVer(srcSeg); //byte 1 + if (serVer != SER_VER) { + throw new SketchesArgumentException( + "Possible corruption: Invalid Serialization Version: " + serVer); + } + + //Check flags + final int flags = extractFlags(srcSeg); //byte 5 + final int flagsMask = + ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK; + if ((flags & flagsMask) > 0) { + throw new SketchesArgumentException( + "Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only"); + } + + //Check seed hashes + final short seedHash = checkMemorySeedHash(srcSeg, expectedSeed); //byte 6,7 + ThetaUtil.checkSeedHashes(seedHash, ThetaUtil.computeSeedHash(expectedSeed)); + + //Check mem capacity, lgArrLongs + final long curCapBytes = srcSeg.byteSize(); + final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + if (curCapBytes < minReqBytes) { + throw new SketchesArgumentException( + "Possible corruption: Current Memory size < min required size: " + + curCapBytes + " < " + minReqBytes); + } + //check Theta, p + final float p = extractP(srcSeg); //bytes 12-15 + final long thetaLong = extractThetaLong(srcSeg); //bytes 16-23 + final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; + //if (lgArrLongs <= lgNomLongs) the sketch is still resizing, thus theta cannot be < p. + if ((lgArrLongs <= lgNomLongs) && (theta < p) ) { + throw new SketchesArgumentException( + "Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " + + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p); + } + } + + /** + * This checks to see if the memory RF factor was set correctly as early versions may not + * have set it. + * @param srcSeg the source MemorySegment + * @param lgNomLongs the current lgNomLongs + * @param lgArrLongs the current lgArrLongs + * @return true if the the memory RF factor is incorrect and the caller can either + * correct it or throw an error. + */ + static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgNomLongs, + final int lgArrLongs) { + final int lgT = lgNomLongs + 1; + final int lgA = lgArrLongs; + final int lgR = extractLgResizeFactor(srcSeg); + if (lgR == 0) { return lgA != lgT; } + return !(((lgT - lgA) % lgR) == 0); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java new file mode 100644 index 000000000..e8353888f --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java @@ -0,0 +1,493 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.TAB; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * For building a new UpdateSketch. + * + * @author Lee Rhodes + */ +public class UpdateSketchBuilder { + private int bLgNomLongs; + private long bSeed; + private ResizeFactor bRF; + private Family bFam; + private float bP; + + //Fields for concurrent theta sketch + private int bNumPoolThreads; + private int bLocalLgNomLongs; + private boolean bPropagateOrderedCompact; + private double bMaxConcurrencyError; + private int bMaxNumLocalThreads; + + /** + * Constructor for building a new UpdateSketch. The default configuration is + *
    + *
  • Nominal Entries: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
  • + *
  • Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
  • + *
  • Input Sampling Probability: 1.0
  • + *
  • Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
  • + *
  • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. + * For direct sketches, which are targeted for native memory off the Java heap, this value will + * be fixed at either {@link ResizeFactor#X1} or {@link ResizeFactor#X2}.
  • + *
  • MemoryRequestServer (Direct only): + * {@link org.apache.datasketches.memory.DefaultMemoryRequestServer}.
  • + *
+ * Parameters unique to the concurrent sketches only: + *
    + *
  • Number of local Nominal Entries: 4
  • + *
  • Concurrent NumPoolThreads: 3
  • + *
  • Concurrent PropagateOrderedCompact: true
  • + *
  • Concurrent MaxConcurrencyError: 0
  • + *
+ */ + public UpdateSketchBuilder() { + bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); + bSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + bP = (float) 1.0; + bRF = ResizeFactor.X8; + bFam = Family.QUICKSELECT; + // Default values for concurrent sketch + bNumPoolThreads = ConcurrentPropagationService.NUM_POOL_THREADS; + bLocalLgNomLongs = 4; //default is smallest legal QS sketch + bPropagateOrderedCompact = true; + bMaxConcurrencyError = 0; + bMaxNumLocalThreads = 1; + } + + /** + * Sets the Nominal Entries for this sketch. + * This value is also used for building a shared concurrent sketch. + * The minimum value is 16 (2^4) and the maximum value is 67,108,864 (2^26). + * Be aware that sketches as large as this maximum value may not have been + * thoroughly tested or characterized for performance. + * + * @param nomEntries Nominal Entries + * This will become the ceiling power of 2 if the given value is not. + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setNominalEntries(final int nomEntries) { + bLgNomLongs = ThetaUtil.checkNomLongs(nomEntries); + return this; + } + + /** + * Alternative method of setting the Nominal Entries for this sketch from the log_base2 value. + * This value is also used for building a shared concurrent sketch. + * The minimum value is 4 and the maximum value is 26. + * Be aware that sketches as large as this maximum value may not have been + * thoroughly characterized for performance. + * + * @param lgNomEntries the Log Nominal Entries. Also for the concurrent shared sketch + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setLogNominalEntries(final int lgNomEntries) { + bLgNomLongs = ThetaUtil.checkNomLongs(1 << lgNomEntries); + return this; + } + + /** + * Returns Log-base 2 Nominal Entries + * @return Log-base 2 Nominal Entries + */ + public int getLgNominalEntries() { + return bLgNomLongs; + } + + /** + * Sets the Nominal Entries for the concurrent local sketch. The minimum value is 16 and the + * maximum value is 67,108,864, which is 2^26. + * Be aware that sketches as large as this maximum + * value have not been thoroughly tested or characterized for performance. + * + * @param nomEntries Nominal Entries + * This will become the ceiling power of 2 if it is not. + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setLocalNominalEntries(final int nomEntries) { + bLocalLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries)); + if ((bLocalLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) { + throw new SketchesArgumentException( + "Nominal Entries must be >= 16 and <= 67108864: " + nomEntries); + } + return this; + } + + /** + * Alternative method of setting the Nominal Entries for a local concurrent sketch from the + * log_base2 value. + * The minimum value is 4 and the maximum value is 26. + * Be aware that sketches as large as this maximum + * value have not been thoroughly tested or characterized for performance. + * + * @param lgNomEntries the Log Nominal Entries for a concurrent local sketch + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setLocalLogNominalEntries(final int lgNomEntries) { + bLocalLgNomLongs = lgNomEntries; + if ((bLocalLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) { + throw new SketchesArgumentException( + "Log Nominal Entries must be >= 4 and <= 26: " + lgNomEntries); + } + return this; + } + + /** + * Returns Log-base 2 Nominal Entries for the concurrent local sketch + * @return Log-base 2 Nominal Entries for the concurrent local sketch + */ + public int getLocalLgNominalEntries() { + return bLocalLgNomLongs; + } + + /** + * Sets the long seed value that is required by the hashing function. + * @param seed See seed + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setSeed(final long seed) { + bSeed = seed; + return this; + } + + /** + * Returns the seed + * @return the seed + */ + public long getSeed() { + return bSeed; + } + + /** + * Sets the upfront uniform sampling probability, p + * @param p See Sampling Probability, p + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setP(final float p) { + if ((p <= 0.0) || (p > 1.0)) { + throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p); + } + bP = p; + return this; + } + + /** + * Returns the pre-sampling probability p + * @return the pre-sampling probability p + */ + public float getP() { + return bP; + } + + /** + * Sets the cache Resize Factor. + * @param rf See Resize Factor + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setResizeFactor(final ResizeFactor rf) { + bRF = rf; + return this; + } + + /** + * Returns the Resize Factor + * @return the Resize Factor + */ + public ResizeFactor getResizeFactor() { + return bRF; + } + + /** + * Set the Family. + * @param family the family for this builder + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setFamily(final Family family) { + bFam = family; + return this; + } + + /** + * Returns the Family + * @return the Family + */ + public Family getFamily() { + return bFam; + } + + /** + * Sets the number of pool threads used for background propagation in the concurrent sketches. + * @param numPoolThreads the given number of pool threads + */ + public void setNumPoolThreads(final int numPoolThreads) { + bNumPoolThreads = numPoolThreads; + } + + /** + * Gets the number of background pool threads used for propagation in the concurrent sketches. + * @return the number of background pool threads + */ + public int getNumPoolThreads() { + return bNumPoolThreads; + } + + /** + * Sets the Propagate Ordered Compact flag to the given value. Used with concurrent sketches. + * + * @param prop the given value + * @return this UpdateSketchBuilder + */ + public UpdateSketchBuilder setPropagateOrderedCompact(final boolean prop) { + bPropagateOrderedCompact = prop; + return this; + } + + /** + * Gets the Propagate Ordered Compact flag used with concurrent sketches. + * @return the Propagate Ordered Compact flag + */ + public boolean getPropagateOrderedCompact() { + return bPropagateOrderedCompact; + } + + /** + * Sets the Maximum Concurrency Error. + * @param maxConcurrencyError the given Maximum Concurrency Error. + */ + public void setMaxConcurrencyError(final double maxConcurrencyError) { + bMaxConcurrencyError = maxConcurrencyError; + } + + /** + * Gets the Maximum Concurrency Error + * @return the Maximum Concurrency Error + */ + public double getMaxConcurrencyError() { + return bMaxConcurrencyError; + } + + /** + * Sets the Maximum Number of Local Threads. + * This is used to set the size of the local concurrent buffers. + * @param maxNumLocalThreads the given Maximum Number of Local Threads + */ + public void setMaxNumLocalThreads(final int maxNumLocalThreads) { + bMaxNumLocalThreads = maxNumLocalThreads; + } + + /** + * Gets the Maximum Number of Local Threads. + * @return the Maximum Number of Local Threads. + */ + public int getMaxNumLocalThreads() { + return bMaxNumLocalThreads; + } + + // BUILD FUNCTIONS + + /** + * Returns an UpdateSketch with the current configuration of this Builder. + * @return an UpdateSketch + */ + public UpdateSketch build() { + return build(null); + } + + /** + * Returns an UpdateSketch with the current configuration of this Builder + * with the specified backing destination MemorySegment store. + * Note: this cannot be used with the Alpha Family of sketches. + * @param dstSeg The destination MemorySegment. + * @return an UpdateSketch + */ + public UpdateSketch build(final MemorySegment dstSeg) { + UpdateSketch sketch = null; + switch (bFam) { + case ALPHA: { + if (dstSeg == null) { + sketch = HeapAlphaSketch.newHeapInstance(bLgNomLongs, bSeed, bP, bRF); + } + else { + throw new SketchesArgumentException("AlphaSketch cannot be made Direct to Memory."); + } + break; + } + case QUICKSELECT: { + if (dstSeg == null) { + sketch = new HeapQuickSelectSketch(bLgNomLongs, bSeed, bP, bRF, false); + } + else { + sketch = new DirectQuickSelectSketch( + bLgNomLongs, bSeed, bP, bRF, dstSeg, false); + } + break; + } + default: { + throw new SketchesArgumentException( + "Given Family cannot be built as a Theta Sketch: " + bFam.toString()); + } + } + return sketch; + } + + /** + * Returns an on-heap concurrent shared UpdateSketch with the current configuration of the + * Builder. + * + *

The parameters unique to the shared concurrent sketch are: + *

    + *
  • Number of Pool Threads (default is 3)
  • + *
  • Maximum Concurrency Error
  • + *
+ * + *

Key parameters that are in common with other Theta sketches: + *

    + *
  • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
  • + *
+ * + * @return an on-heap concurrent UpdateSketch with the current configuration of the Builder. + */ + public UpdateSketch buildShared() { + return buildShared(null); + } + + /** + * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current + * configuration of the Builder and the given destination WritableMemory. If the destination + * WritableMemory is null, this defaults to an on-heap concurrent shared UpdateSketch. + * + *

The parameters unique to the shared concurrent sketch are: + *

    + *
  • Number of Pool Threads (default is 3)
  • + *
  • Maximum Concurrency Error
  • + *
+ * + *

Key parameters that are in common with other Theta sketches: + *

    + *
  • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
  • + *
  • Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
  • + *
+ * + * @param dstSeg the given MemorySegment for Direct, otherwise null. + * @return a concurrent UpdateSketch with the current configuration of the Builder + * and the given destination MemorySegment. + */ + @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", + justification = "Harmless in Builder, fix later") + public UpdateSketch buildShared(final MemorySegment dstSeg) { + ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; + if (dstSeg == null) { + return new ConcurrentHeapQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError); + } else { + return new ConcurrentDirectQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError, dstSeg); + } + } + + /** + * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current + * configuration of the Builder, the data from the given sketch, and the given destination + * MemorySegment. If the destination MemorySegment is null, this defaults to an on-heap + * concurrent shared UpdateSketch. + * + *

The parameters unique to the shared concurrent sketch are: + *

    + *
  • Number of Pool Threads (default is 3)
  • + *
  • Maximum Concurrency Error
  • + *
+ * + *

Key parameters that are in common with other Theta sketches: + *

    + *
  • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
  • + *
  • Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
  • + *
+ * + * @param sketch a given UpdateSketch from which the data is used to initialize the returned + * shared sketch. + * @param dstSeg the given MemorySegment for Direct, otherwise null. + * @return a concurrent UpdateSketch with the current configuration of the Builder + * and the given destination MemorySegment. + */ + @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", + justification = "Harmless in Builder, fix later") + public UpdateSketch buildSharedFromSketch(final UpdateSketch sketch, final MemorySegment dstSeg) { + ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; + if (dstSeg == null) { + return new ConcurrentHeapQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError); + } else { + return new ConcurrentDirectQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError, dstSeg); + } + } + + /** + * Returns a local, on-heap, concurrent UpdateSketch to be used as a per-thread local buffer + * along with the given concurrent shared UpdateSketch and the current configuration of this + * Builder. + * + *

The parameters unique to the local concurrent sketch are: + *

    + *
  • Local Nominal Entries or Local Log Nominal Entries
  • + *
  • Propagate Ordered Compact flag
  • + *
+ * + * @param shared the concurrent shared sketch to be accessed via the concurrent local sketch. + * @return an UpdateSketch to be used as a per-thread local buffer. + */ + public UpdateSketch buildLocal(final UpdateSketch shared) { + if ((shared == null) || !(shared instanceof ConcurrentSharedThetaSketch)) { + throw new SketchesStateException("The concurrent shared sketch must be built first."); + } + return new ConcurrentHeapThetaBuffer(bLocalLgNomLongs, bSeed, + (ConcurrentSharedThetaSketch) shared, bPropagateOrderedCompact, bMaxNumLocalThreads); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("UpdateSketchBuilder configuration:").append(LS); + sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS); + sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS); + sb.append("LgLocalK:").append(TAB).append(bLocalLgNomLongs).append(LS); + sb.append("LocalK:").append(TAB).append(1 << bLocalLgNomLongs).append(LS); + sb.append("Seed:").append(TAB).append(bSeed).append(LS); + sb.append("p:").append(TAB).append(bP).append(LS); + sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); + sb.append("Family:").append(TAB).append(bFam).append(LS); + sb.append("Propagate Ordered Compact").append(TAB).append(bPropagateOrderedCompact).append(LS); + sb.append("NumPoolThreads").append(TAB).append(bNumPoolThreads).append(LS); + sb.append("MaxConcurrencyError").append(TAB).append(bMaxConcurrencyError).append(LS); + sb.append("MaxNumLocalThreads").append(TAB).append(bMaxNumLocalThreads).append(LS); + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java new file mode 100644 index 000000000..1558c49e7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; +import static org.apache.datasketches.theta2.PreambleUtil.ENTRY_BITS_BYTE_V4; +import static org.apache.datasketches.theta2.PreambleUtil.NUM_ENTRIES_BYTES_BYTE_V4; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item. + * + *

This sketch can only be associated with a Serialization Version 4 format binary image.

+ */ +class WrappedCompactCompressedSketch extends WrappedCompactSketch { + + /** + * Construct this sketch with the given bytes. + * @param bytes containing serialized compact compressed sketch. + */ + WrappedCompactCompressedSketch(final byte[] bytes) { + super(bytes); + } + + /** + * Wraps the given bytes, which must be a SerVer 4 compressed CompactSketch image. + * @param bytes representation of serialized compressed compact sketch. + * @param seedHash The update seedHash. + * See Seed Hash. + * @return this sketch + */ + static WrappedCompactCompressedSketch wrapInstance(final byte[] bytes, final short seedHash) { + ThetaUtil.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); + return new WrappedCompactCompressedSketch(bytes); + } + + //Sketch Overrides + + @Override + public int getCurrentBytes() { + final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; + final int entryBits = bytes_[ENTRY_BITS_BYTE_V4]; + final int numEntriesBytes = bytes_[NUM_ENTRIES_BYTES_BYTE_V4]; + return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits); + } + + private static final int START_PACKED_DATA_EXACT_MODE = 8; + private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; + + @Override + public int getRetainedEntries(final boolean valid) { //compact is always valid + // number of entries is stored using variable length encoding + // most significant bytes with all zeros are not stored + // one byte in the preamble has the number of non-zero bytes used + final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; // if > 1 then the second long has theta + final int numEntriesBytes = bytes_[NUM_ENTRIES_BYTES_BYTE_V4]; + int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE; + int numEntries = 0; + for (int i = 0; i < numEntriesBytes; i++) { + numEntries |= Byte.toUnsignedInt(bytes_[offsetBytes++]) << (i << 3); + } + return numEntries; + } + + @Override + public long getThetaLong() { + final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; + return (preLongs > 1) ? ByteArrayUtil.getLongLE(bytes_, 8) : Long.MAX_VALUE; + } + + @Override + public boolean isEmpty() { + return false; + } + + @Override + public boolean isOrdered() { + return true; + } + + @Override + public HashIterator iterator() { + return new BytesCompactCompressedHashIterator( + bytes_, + (bytes_[PREAMBLE_LONGS_BYTE] > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) + + bytes_[NUM_ENTRIES_BYTES_BYTE_V4], + bytes_[ENTRY_BITS_BYTE_V4], + getRetainedEntries() + ); + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java new file mode 100644 index 000000000..08939ee41 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.ByteArrayUtil.getIntLE; +import static org.apache.datasketches.common.ByteArrayUtil.getLongLE; +import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; +import static org.apache.datasketches.theta2.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. + * + *

This sketch can only be associated with a Serialization Version 3 format binary image.

+ */ +class WrappedCompactSketch extends CompactSketch { + final byte[] bytes_; + + /** + * Construct this sketch with the given bytes. + * @param bytes containing serialized compact sketch. + */ + WrappedCompactSketch(final byte[] bytes) { + bytes_ = bytes; + } + + /** + * Wraps the given Memory, which must be a SerVer 3 CompactSketch image. + * @param bytes representation of serialized compressed compact sketch. + * @param seedHash The update seedHash. + * See Seed Hash. + * @return this sketch + */ + static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHash) { + ThetaUtil.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); + return new WrappedCompactSketch(bytes); + } + + //Sketch Overrides + + @Override + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + return memoryToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg); + } + + @Override + public int getCurrentBytes() { + final int preLongs = bytes_[PreambleUtil.PREAMBLE_LONGS_BYTE]; + final int numEntries = (preLongs == 1) ? 0 : getIntLE(bytes_, RETAINED_ENTRIES_INT); + return (preLongs + numEntries) << 3; + } + + @Override + public int getRetainedEntries(final boolean valid) { //compact is always valid + final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; + return (preLongs == 1) ? 0 : getIntLE(bytes_, RETAINED_ENTRIES_INT); + } + + @Override + public long getThetaLong() { + final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; + return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE; + } + + @Override + public boolean hasMemorySegment() { + return false; + } + + @Override + public boolean isDirect() { + return false; + } + + @Override + public boolean isEmpty() { + return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0; + } + + @Override + public boolean isOrdered() { + return (bytes_[FLAGS_BYTE] & ORDERED_FLAG_MASK) > 0; + } + + @Override + public HashIterator iterator() { + return new BytesCompactHashIterator( + bytes_, + bytes_[PREAMBLE_LONGS_BYTE] << 3, + getRetainedEntries() + ); + } + + @Override + public byte[] toByteArray() { + return Arrays.copyOf(bytes_, getCurrentBytes()); + } + + //restricted methods + + @Override + long[] getCache() { + final long[] cache = new long[getRetainedEntries()]; + int i = 0; + final HashIterator it = iterator(); + while (it.next()) { + cache[i++] = it.get(); + } + return cache; + } + + @Override + int getCompactPreambleLongs() { + return bytes_[PREAMBLE_LONGS_BYTE]; + } + + @Override + int getCurrentPreambleLongs() { + return bytes_[PREAMBLE_LONGS_BYTE]; + } + + @Override + MemorySegment getMemorySegment() { + return null; + } + + @Override + short getSeedHash() { + return getShortLE(bytes_, SEED_HASH_SHORT); + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/package-info.java b/src/main/java/org/apache/datasketches/theta2/package-info.java new file mode 100644 index 000000000..71c333bb5 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/package-info.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * The theta package contains the basic sketch classes that are members of the + * Theta Sketch Framework. + * + *

There is a separate Tuple package for many of the sketches that are derived from the + * same algorithms defined in the Theta Sketch Framework paper.

+ */ +package org.apache.datasketches.theta2; diff --git a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java index f6b22cb50..2cdb99a0a 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java +++ b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java @@ -19,9 +19,12 @@ package org.apache.datasketches.thetacommon; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.Math.max; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.Memory; @@ -284,6 +287,107 @@ public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int throw new SketchesArgumentException("Key not found and no empty slot in table!"); } + //With MemorySegment + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment. + * Returns the index if found, -1 if not found. The input MemorySegment may be read only. + * + * @param seg The MemorySegment containing the hash table to search. + * The hash table portion must be a power of 2 in size. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to search for. Must not be zero. + * @param segOffsetBytes offset in the MemorySegment where the hashTable starts + * @return Current probe index if found, -1 if not found. + */ + public static int hashSearchMemory(final MemorySegment seg, final int lgArrLongs, final long hash, + final int segOffsetBytes) { + if (hash == 0) { + throw new SketchesArgumentException("Given hash must not be zero: " + hash); + } + final int arrayMask = (1 << lgArrLongs) - 1; + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + final int loopIndex = curProbe; + do { + final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes; + final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); + if (curArrayHash == EMPTY) { return -1; } + else if (curArrayHash == hash) { return curProbe; } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + return -1; + } + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment. + * This method assumes that the input hash is not a duplicate. + * Useful for rebuilding tables to avoid unnecessary comparisons. + * Returns the index of insertion, which is always positive or zero. + * Throws an exception if table has no empty slot. + * + * @param wseg The writable MemorySegment that contains the hashTable to insert into. + * The size of the hashTable portion must be a power of 2. + * @param lgArrLongs The log_base2(hashTable.length. + * See lgArrLongs. + * @param hash value that must not be zero and will be inserted into the array into an empty slot. + * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts + * @return index of insertion. Always positive or zero. + */ + public static int hashInsertOnlyMemory(final MemorySegment wseg, final int lgArrLongs, + final long hash, final int memOffsetBytes) { + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + // search for duplicate or zero + final int loopIndex = curProbe; + do { + final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); + if (curArrayHash == EMPTY) { + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); + return curProbe; + } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + throw new SketchesArgumentException("No empty slot in table!"); + } + + /** + * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts + * values directly into a writable MemorySegment. + * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). + * Throws an exception if the value is not found and table has no empty slot. + * + * @param wseg The writable MemorySegment that contains the hashTable to insert into. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to be potentially inserted into an empty slot only if it is not + * a duplicate of any other hash value in the table. It must not be zero. + * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts + * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). + */ + public static int hashSearchOrInsertMemory(final MemorySegment wseg, final int lgArrLongs, + final long hash, final int memOffsetBytes) { + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + // search for duplicate or zero + final int loopIndex = curProbe; + do { + final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); + if (curArrayHash == EMPTY) { + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); + return ~curProbe; + } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate + // curArrayHash is not a duplicate and not zero, continue searching + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + throw new SketchesArgumentException("Key not found and no empty slot in table!"); + } + //Other related methods /** From 543e866b53ecd2c5462b10c3a3ed576547a1c676 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 10 Jun 2025 11:30:15 -0700 Subject: [PATCH 10/25] Partial progress on Theta tests. --- .../common/MemorySegmentStatus.java | 58 ++ .../org/apache/datasketches/common/Util.java | 131 ++- .../apache/datasketches/theta2/AnotBimpl.java | 9 + .../datasketches/theta2/CompactSketch.java | 15 + .../theta2/ConcurrentHeapThetaBuffer.java | 10 - .../theta2/ConcurrentSharedThetaSketch.java | 30 - .../theta2/DirectCompactSketch.java | 11 +- .../theta2/DirectQuickSelectSketchR.java | 10 +- .../theta2/EmptyCompactSketch.java | 6 +- .../datasketches/theta2/HeapAlphaSketch.java | 5 - .../theta2/HeapCompactSketch.java | 5 - .../theta2/HeapQuickSelectSketch.java | 5 - .../datasketches/theta2/IntersectionImpl.java | 29 +- .../MemoryCompactCompressedHashIterator.java | 23 +- .../datasketches/theta2/PreambleUtil.java | 30 +- .../datasketches/theta2/SetOperation.java | 45 +- .../datasketches/theta2/SingleItemSketch.java | 5 - .../apache/datasketches/theta2/Sketch.java | 42 +- .../apache/datasketches/theta2/Sketches.java | 407 ++++++++ .../apache/datasketches/theta2/UnionImpl.java | 41 +- .../datasketches/theta2/UpdateSketch.java | 15 + .../theta2/WrappedCompactSketch.java | 15 - .../datasketches/theta2/AnotBimplTest.java | 333 +++++++ .../theta2/BackwardConversions.java | 238 +++++ .../datasketches/theta2/BitPackingTest.java | 166 ++++ .../theta2/CompactSketchTest.java | 674 +++++++++++++ .../CornerCaseThetaSetOperationsTest.java | 518 ++++++++++ .../theta2/DirectIntersectionTest.java | 769 ++++++++++++++ .../theta2/DirectQuickSelectSketchTest.java | 936 ++++++++++++++++++ .../datasketches/theta2/DirectUnionTest.java | 827 ++++++++++++++++ .../apache/datasketches/theta2/EmptyTest.java | 169 ++++ .../datasketches/theta2/ExamplesTest.java | 124 +++ .../theta2/ForwardCompatibilityTest.java | 219 ++++ .../theta2/HeapAlphaSketchTest.java | 696 +++++++++++++ .../theta2/HeapIntersectionTest.java | 534 ++++++++++ .../theta2/HeapQuickSelectSketchTest.java | 642 ++++++++++++ .../datasketches/theta2/HeapUnionTest.java | 669 +++++++++++++ .../theta2/HeapifyWrapSerVer1and2Test.java | 609 ++++++++++++ .../datasketches/theta2/IteratorTest.java | 133 +++ .../theta2/SingleItemSketchTest.java | 377 +++++++ .../datasketches/theta2/SketchTest.java | 440 ++++++++ .../datasketches/theta2/UnionImplTest.java | 320 ++++++ .../datasketches/theta2/UpdateSketchTest.java | 237 +++++ 43 files changed, 10347 insertions(+), 230 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java create mode 100644 src/main/java/org/apache/datasketches/theta2/Sketches.java create mode 100644 src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/BackwardConversions.java create mode 100644 src/test/java/org/apache/datasketches/theta2/BitPackingTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/CornerCaseThetaSetOperationsTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/EmptyTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/ExamplesTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java create mode 100644 src/test/java/org/apache/datasketches/theta2/IteratorTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/SketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/UnionImplTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java diff --git a/src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java b/src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java new file mode 100644 index 000000000..889e8c0b8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/common/MemorySegmentStatus.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.common; + +import java.lang.foreign.MemorySegment; + +/** + * Methods for inquiring the status of a backing MemorySegment. + */ +public interface MemorySegmentStatus { + + /** + * Returns true if this object's internal data is backed by a MemorySegment, + * which may be on-heap or off-heap. + * @return true if this object's internal data is backed by a MemorySegment. + */ + boolean hasMemorySegment(); + + /** + * Returns true if this object's internal data is backed by an off-heap (direct or native)) MemorySegment. + * @return true if this object's internal data is backed by an off-heap (direct or native)) MemorySegment. + */ + boolean isDirect(); + + /** + * Returns true if the backing MemorySegment of this object refers to the same MemorySegment of that. + * They can either have the same off-heap memory location and size, or refer to the same on-heap array object. + * + *

If both segment are off-heap, they both must have the same starting address and the same size.

+ * + *

For on-heap segments, both segments must be based on or derived from the same array object and neither segment + * can be read-only.

+ * + *

Returns false if either argument is null;

+ * + * @param that The given MemorySegment. + * @return true if the backing MemorySegment of this object hierarchy refers to the same MemorySegment of that. + */ + boolean isSameResource(final MemorySegment that); + +} diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 969cdc389..11615a39a 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -28,7 +28,6 @@ import java.lang.foreign.MemorySegment; import java.util.Comparator; -import java.util.Objects; /** * Common utility functions. @@ -802,6 +801,80 @@ public static boolean le(final Object item1, final Object item2, final Compa //MemorySegment related + /** + * Clears all bytes of this MemorySegment to zero. + * @param seg the given MemorySegment + */ + public static void clear(final MemorySegment seg) { + seg.fill((byte)0); + } + + /** + * Clears a portion of this MemorySegment to zero. + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this MemorySegment start + * @param lengthBytes the length in bytes + */ + public static void clear(final MemorySegment seg, final long offsetBytes, final long lengthBytes) { + final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes); + slice.fill((byte)0); + } + + /** + * Clears the bits defined by the bitMask + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this Memory start. + * @param bitMask the bits set to one will be cleared + */ + public static void clearBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) { + final byte b = seg.get(JAVA_BYTE, offsetBytes); + seg.set(JAVA_BYTE, offsetBytes, (byte)(b & ~bitMask)); + } + + /** + * Returns true if both segments have the same contents and the same length. + * @param seg1 the given MemorySegment #1 + * @param seg2 the given MemorySegment #2 + * @return true if both segments have the same contents and the same length. + */ + public static boolean equalContents(final MemorySegment seg1, final MemorySegment seg2) { + if (seg1.byteSize() != seg2.byteSize()) { return false; } + return equalContents(seg1, 0, seg2, 0, seg1.byteSize()); + } + + /** + * Returns true if both segments have the same content for the specified region. + * @param seg1 the given MemorySegment #1 + * @param seg1offsetBytes the starting offset for MemorySegment #1 in bytes. + * @param seg2 the given MemorySegment #2 + * @param seg2offsetBytes the starting offset for MemorySegment #2 in bytes. + * @param lengthBytes the length of the region to be compared, in bytes. + * @return true, if both segments have the content for the specified region. + */ + public static boolean equalContents( + final MemorySegment seg1, + final long seg1offsetBytes, + final MemorySegment seg2, + final long seg2offsetBytes, + final long lengthBytes) { + if (seg1.equals(seg2) && (seg1.byteSize() == seg2.byteSize())) { return true; } //identical segments + final long seg1EndOff = seg1offsetBytes + lengthBytes; + final long seg2EndOff = seg2offsetBytes + lengthBytes; + return MemorySegment.mismatch(seg1, seg1offsetBytes, seg1EndOff, seg2, seg2offsetBytes, seg2EndOff) == -1; + } + + /** + * Fills a portion of this Memory region to the given byte value. + * @param seg the given MemorySegment + * @param offsetBytes offset bytes relative to this Memory start + * @param lengthBytes the length in bytes + * @param value the given byte value + */ + public static void fill(final MemorySegment seg, final long offsetBytes, final long lengthBytes, final byte value) { + final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes); + slice.fill(value); + } + /** * Returns true if the two given MemorySegments refer to the same backing resource, * which is either an off-heap memory location and size, or the same on-heap array object. @@ -811,13 +884,14 @@ public static boolean le(final Object item1, final Object item2, final Compa *

For on-heap segments, both segments must be based on or derived from the same array object and neither segment * can be read-only.

* + *

Returns false if either argument is null;

+ * * @param seg1 The first given MemorySegment * @param seg2 The second given MemorySegment * @return true if both MemorySegments are determined to be the same backing memory. */ public static boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { - Objects.requireNonNull(seg1, "seg1 must not be null."); - Objects.requireNonNull(seg2, "seg2 must not be null."); + if ((seg1 == null) || (seg2 == null)) { return false; } if (!seg1.scope().isAlive() || !seg2.scope().isAlive()) { throw new IllegalArgumentException("Both arguments must be alive."); } @@ -842,59 +916,16 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme * If the requested capacity is not divisible by eight, the returned size * will be rolled up to the next multiple of eight.

* - * @param capacityBytes The new capacity being requested. + * @param capacityBytes The new capacity being requested. It must not be negative. * @return a new MemorySegment with the requested capacity. */ public static MemorySegment newHeapSegment(final int capacityBytes) { - if (capacityBytes < 0) { - throw new IllegalArgumentException("Requested capacity must be positive."); - } - final long[] array = ((capacityBytes * 0x7) == 0) ? new long[capacityBytes >>> 3] : new long[(capacityBytes >>> 3) + 1]; + final long[] array = ((capacityBytes & 0x7) == 0) + ? new long[capacityBytes >>> 3] + : new long[(capacityBytes >>> 3) + 1]; return MemorySegment.ofArray(array); } - /** - * Clears all bytes of this MemorySegment to zero. - * @param seg the given MemorySegment - */ - public static void clear(final MemorySegment seg) { - seg.fill((byte)0); - } - - /** - * Clears a portion of this MemorySegment to zero. - * @param seg the given MemorySegment - * @param offsetBytes offset bytes relative to this MemorySegment start - * @param lengthBytes the length in bytes - */ - public static void clear(final MemorySegment seg, final long offsetBytes, final long lengthBytes) { - final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes); - slice.fill((byte)0); - } - - /** - * Fills a portion of this Memory region to the given byte value. - * @param seg the given MemorySegment - * @param offsetBytes offset bytes relative to this Memory start - * @param lengthBytes the length in bytes - * @param value the given byte value - */ - public static void fill(final MemorySegment seg, final long offsetBytes, final long lengthBytes, final byte value) { - final MemorySegment slice = seg.asSlice(offsetBytes, lengthBytes); - slice.fill(value); - } - - /** - * Clears the bits defined by the bitMask - * @param seg the given MemorySegment - * @param offsetBytes offset bytes relative to this Memory start. - * @param bitMask the bits set to one will be cleared - */ - public static void clearBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) { - final byte b = seg.get(JAVA_BYTE, offsetBytes); - seg.set(JAVA_BYTE, offsetBytes, (byte)(b & ~bitMask)); - } - /** * Sets the bits defined by the bitMask * @param seg the given MemorySegment diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java index 4931bb680..21872ecd6 100644 --- a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java @@ -233,6 +233,15 @@ long getThetaLong() { return thetaLong_; } + @Override + public boolean hasMemorySegment() { return false; } + + @Override + public boolean isDirect() { return false; } + + @Override + public boolean isSameResource( final MemorySegment that) { return false; } + @Override boolean isEmpty() { return empty_; diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java index 0498eed34..6ee764678 100644 --- a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java @@ -343,11 +343,26 @@ public Family getFamily() { return Family.COMPACT; } + @Override + public boolean hasMemorySegment() { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).hasMemorySegment()); + } + @Override public boolean isCompact() { return true; } + @Override + public boolean isDirect() { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isDirect()); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isSameResource(that)); + } + @Override public double getEstimate() { return Sketch.estimate(getThetaLong(), getRetainedEntries()); diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java index ab1d41a65..c93ed892b 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java @@ -147,16 +147,6 @@ public double getUpperBound(final int numStdDev) { return shared.getUpperBound(numStdDev); } - @Override - public boolean hasMemorySegment() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return shared.isEmpty(); diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java index 5bf147049..40746c3e6 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java @@ -22,8 +22,6 @@ import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.datasketches.common.Util; - /** * An internal interface to define the API of a concurrent shared theta sketch. * It reflects all data processed by a single or multiple update threads, and can serve queries at @@ -140,34 +138,6 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s double getUpperBound(int numStdDev); - /** - * Returns true if this object's internal data is backed by a Memory object, - * which may be on-heap or off-heap. - * @return true if this object's internal data is backed by a Memory object. - */ - boolean hasMemorySegment(); - - /** - * Returns true if this object's internal data is backed by direct (off-heap) Memory. - * @return true if this object's internal data is backed by direct (off-heap) Memory. - */ - boolean isDirect(); - - /** - * Returns true if the two given MemorySegments refer to the same backing resource, - * which is either an off-heap memory location and size, or the same on-heap array object. - * - *

This is a convenient delegate of - * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}

- * - * @param seg1 The first given MemorySegment - * @param seg2 The second given MemorySegment - * @return true if both MemorySegments are determined to be the same backing memory. - */ - default boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { - return Util.isSameResource(seg1, seg2); - } - boolean isEmpty(); boolean isEstimationMode(); diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java index 188f2cd73..ef0e4b604 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java @@ -33,6 +33,7 @@ import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -102,12 +103,12 @@ public long getThetaLong() { @Override public boolean hasMemorySegment() { - return seg_ != null; + return seg_ != null && seg_.scope().isAlive(); } @Override public boolean isDirect() { - return hasMemorySegment() ? seg_.isNative() : false; + return hasMemorySegment() && seg_.isNative(); } @Override @@ -123,6 +124,12 @@ public boolean isOrdered() { return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0; } + @Override + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(seg_, that); + + } + @Override public HashIterator iterator() { return new MemoryHashIterator(seg_, getRetainedEntries(true), getThetaLong()); diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java index 33d371554..c0db75b16 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java @@ -47,6 +47,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -147,12 +148,12 @@ public long getThetaLong() { @Override public boolean hasMemorySegment() { - return wseg_ != null; + return wseg_ != null && wseg_.scope().isAlive(); } @Override public boolean isDirect() { - return hasMemorySegment() ? wseg_.isNative() : false; + return hasMemorySegment() && wseg_.isNative(); } @Override @@ -160,6 +161,11 @@ public boolean isEmpty() { return PreambleUtil.isEmptyFlag(wseg_); } + @Override + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(wseg_, that); + } + @Override public HashIterator iterator() { return new MemoryHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); diff --git a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java index c9c6dd609..c4679374b 100644 --- a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java @@ -23,6 +23,7 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; /** @@ -134,11 +135,6 @@ int getCurrentPreambleLongs() { return 1; } - @Override - MemorySegment getMemorySegment() { - return null; - } - @Override short getSeedHash() { return 0; diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java index 5bc11d712..a3b7bd145 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java @@ -293,11 +293,6 @@ int getCurrentPreambleLongs() { return Family.ALPHA.getMinPreLongs(); } - @Override - MemorySegment getMemorySegment() { - return null; - } - @Override long[] getCache() { return cache_; diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java index 065213191..6cffd9818 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java @@ -128,11 +128,6 @@ int getCompactPreambleLongs() { return preLongs_; } - @Override - MemorySegment getMemorySegment() { - return null; - } - @Override short getSeedHash() { return seedHash_; diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java index 3096e5e1a..082259c21 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java @@ -230,11 +230,6 @@ int getLgArrLongs() { return lgArrLongs_; } - @Override - MemorySegment getMemorySegment() { - return null; - } - @Override UpdateReturnState hashUpdate(final long hash) { HashOperations.checkHashCorruption(hash); diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java index ba7bcd8ac..92ca096c3 100644 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -65,6 +65,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -93,9 +94,9 @@ class IntersectionImpl extends Intersection { /** * Constructor: Sets the class finals and computes, sets and checks the seedHash. - * @param wseg Can be either a Source(e.g. wrap) or Destination (new Direct) MemorySegment. + * @param wseg Can be either a Source(e.g. wrap) or Destination (new offHeap) MemorySegment. * @param seed Used to validate incoming sketch arguments. - * @param dstMemFlag The given MemorySegment is a Destination (new Direct) MemorySegment. + * @param dstMemFlag The given MemorySegment is a Destination (new offHeap) MemorySegment. * @param readOnly True if MemorySegment is to be treated as read only. */ protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstMemFlag, @@ -103,7 +104,7 @@ protected IntersectionImpl(final MemorySegment wseg, final long seed, final bool readOnly_ = readOnly; if (wseg != null) { wseg_ = wseg; - if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking + if (dstMemFlag) { //DstMem: compute & store seedHash, no seedHash checking checkMinSizeMemory(wseg); maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap seedHash_ = ThetaUtil.computeSeedHash(seed); @@ -275,17 +276,17 @@ else if (curCount_ < 0 && sketchInEntries > 0) { final int priorLgArrLongs = lgArrLongs_; //prior only used in error message lgArrLongs_ = requiredLgArrLongs; - if (wseg_ != null) { //Off heap, check if current dstMem is large enough + if (wseg_ != null) { //Off heap, check if current dstSeg is large enough insertCurCount(wseg_, curCount_); insertLgArrLongs(wseg_, lgArrLongs_); if (requiredLgArrLongs <= maxLgArrLongs_) { wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); } - else { //not enough space in dstMem + else { //not enough space in dstSeg final int requiredBytes = (8 << requiredLgArrLongs) + 24; final int givenBytes = (8 << priorLgArrLongs) + 24; throw new SketchesArgumentException( - "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes); + "Insufficient internal MemorySegment space: " + requiredBytes + " > " + givenBytes); } } else { //On the heap, allocate a HT @@ -305,6 +306,9 @@ else if (curCount_ > 0 && sketchInEntries > 0) { } } + @Override + MemorySegment getMemorySegment() { return wseg_; } + @Override public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { if (curCount_ < 0) { @@ -327,7 +331,7 @@ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dst if (wseg_ != null) { final int htLen = 1 << lgArrLongs_; hashTable = new long[htLen]; - MemorySegment.copy(dstSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); } else { hashTable = hashTable_; } @@ -341,7 +345,7 @@ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dst @Override public boolean hasMemorySegment() { - return wseg_ != null; + return wseg_ != null && wseg_.scope().isAlive(); } @Override @@ -351,7 +355,12 @@ public boolean hasResult() { @Override public boolean isDirect() { - return hasMemorySegment() ? wseg_.isNative() : false; + return hasMemorySegment() && wseg_.isNative(); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(wseg_, that); } @Override @@ -412,7 +421,7 @@ long[] getCache() { if (wseg_ == null) { return hashTable_ != null ? hashTable_ : new long[0]; } - //Direct + //offHeap final int arrLongs = 1 << lgArrLongs_; final long[] outArr = new long[arrLongs]; MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java index 96ccb41e8..31aa6ff92 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java @@ -24,10 +24,13 @@ import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.MemorySegmentStatus; +import org.apache.datasketches.common.Util; + /* * This is to uncompress serial version 4 sketch incrementally */ -class MemoryCompactCompressedHashIterator implements HashIterator { +class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus { private MemorySegment seg; private int offset; private int entryBits; @@ -44,8 +47,7 @@ class MemoryCompactCompressedHashIterator implements HashIterator { final MemorySegment srcSeg, final int offset, final int entryBits, - final int numEntries - ) { + final int numEntries) { this.seg = srcSeg; this.offset = offset; this.entryBits = entryBits; @@ -64,6 +66,21 @@ public long get() { return buffer[index & 7]; } + @Override + public boolean hasMemorySegment() { + return seg != null && seg.scope().isAlive(); + } + + @Override + public boolean isDirect() { + return hasMemorySegment() && seg.isNative(); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(seg, that); + } + @Override public boolean next() { if (++index == numEntries) { return false; } diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java index 55035a456..564446ed4 100644 --- a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java @@ -20,10 +20,10 @@ package org.apache.datasketches.theta2; import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT; -import static java.lang.foreign.ValueLayout.JAVA_INT; -import static java.lang.foreign.ValueLayout.JAVA_LONG; -import static java.lang.foreign.ValueLayout.JAVA_SHORT; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.LS; import static org.apache.datasketches.common.Util.zeroPad; @@ -387,23 +387,23 @@ static int extractFlagsV1(final MemorySegment seg) { } static int extractSeedHash(final MemorySegment seg) { - return seg.get(JAVA_SHORT, SEED_HASH_SHORT) & 0XFFFF; + return seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT) & 0XFFFF; } static int extractCurCount(final MemorySegment seg) { - return seg.get(JAVA_INT, RETAINED_ENTRIES_INT); + return seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); } static float extractP(final MemorySegment seg) { - return seg.get(JAVA_FLOAT, P_FLOAT); + return seg.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); } static long extractThetaLong(final MemorySegment seg) { - return seg.get(JAVA_LONG, THETA_LONG); + return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } static long extractUnionThetaLong(final MemorySegment seg) { - return seg.get(JAVA_LONG, UNION_THETA_LONG); + return seg.get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG); } static int extractEntryBitsV4(final MemorySegment seg) { @@ -415,7 +415,7 @@ static int extractNumEntriesBytesV4(final MemorySegment seg) { } static long extractThetaLongV4(final MemorySegment seg) { - return seg.get(JAVA_LONG, THETA_LONG_V4); + return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG_V4); } /** @@ -462,23 +462,23 @@ static void insertFlags(final MemorySegment seg, final int flags) { } static void insertSeedHash(final MemorySegment seg, final int seedHash) { - seg.set(JAVA_SHORT, SEED_HASH_SHORT, (short) seedHash); + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, (short) seedHash); } static void insertCurCount(final MemorySegment seg, final int curCount) { - seg.set(JAVA_INT, RETAINED_ENTRIES_INT, curCount); + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); } static void insertP(final MemorySegment seg, final float p) { - seg.set(JAVA_FLOAT, P_FLOAT, p); + seg.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, p); } static void insertThetaLong(final MemorySegment seg, final long thetaLong) { - seg.set(JAVA_LONG, THETA_LONG, thetaLong); + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); } static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) { - seg.set(JAVA_LONG, UNION_THETA_LONG, unionThetaLong); + seg.set(JAVA_LONG_UNALIGNED, UNION_THETA_LONG, unionThetaLong); } static void setEmpty(final MemorySegment seg) { diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java index 5c959cde6..55dda17b0 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java @@ -28,8 +28,9 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; +//import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -37,7 +38,7 @@ * * @author Lee Rhodes */ -public abstract class SetOperation { +public abstract class SetOperation implements MemorySegmentStatus { static final int CONST_PREAMBLE_LONGS = 3; /** @@ -140,7 +141,7 @@ public static SetOperation wrap(final MemorySegment srcSeg, final long expectedS return UnionImpl.wrapInstance(srcSeg, expectedSeed); } case INTERSECTION : { - return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, true); + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); } default: throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString()); @@ -198,6 +199,12 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract long[] getCache(); + /** + * Returns the backing MemorySegment object if it exists, otherwise null. + * @return the backing MemorySegment object if it exists, otherwise null. + */ + MemorySegment getMemorySegment() { return null; } + /** * Gets the current count of retained entries. * This is only useful during stateful operations. @@ -221,18 +228,11 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract long getThetaLong(); - /** - * Returns true if this object's internal data is backed by a Memory object, - * which may be on-heap or off-heap. - * @return true if this object's internal data is backed by a Memory object. - */ - public boolean hasMemorySegment() { return false; } + @Override + public abstract boolean hasMemorySegment(); - /** - * Returns true if this object's internal data is backed by an off-heap MemorySegment. - * @return true if this object's internal data is backed by an off-heap MemorySegment. - */ - public boolean isDirect() { return false; } + @Override + public abstract boolean isDirect(); /** * Returns true if this set operator is empty. @@ -242,18 +242,7 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract boolean isEmpty(); - /** - * Returns true if the two given MemorySegments refer to the same backing resource, - * which is either an off-heap memory location and size, or the same on-heap array object. - * - *

This is a convenient delegate of - * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}

- * - * @param seg1 The first given MemorySegment - * @param seg2 The second given MemorySegment - * @return true if both MemorySegments are determined to be the same backing memory. - */ - public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { - return Util.isSameResource(seg1, seg2); - } + @Override + public abstract boolean isSameResource(final MemorySegment seg); + } diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java index 2659df84b..222eef877 100644 --- a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java @@ -379,11 +379,6 @@ int getCurrentPreambleLongs() { return 1; } - @Override - MemorySegment getMemorySegment() { - return null; - } - @Override short getSeedHash() { return (short) (pre0_ >>> 48); diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java index 3c5650a91..82661aa27 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -35,8 +35,8 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.BinomialBoundsN; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -46,7 +46,7 @@ * * @author Lee Rhodes */ -public abstract class Sketch { +public abstract class Sketch implements MemorySegmentStatus { Sketch() {} @@ -382,25 +382,12 @@ public double getUpperBound(final int numStdDev) { : getRetainedEntries(true); } - /** - * Returns true if this object's internal data is backed by a MemorySegment object, - * which may be on-heap or off-heap. - * @return true if this object's internal data is backed by a MemorySegment object. - */ - public boolean hasMemorySegment() { return false; } - /** * Returns true if this sketch is in compact form. * @return true if this sketch is in compact form. */ public abstract boolean isCompact(); - /** - * Returns true if this object's internal data is backed by an off-heap MemorySegment. - * @return true if this object's internal data is backed by an off-heap MemorySegment. - */ - public boolean isDirect() { return false; } - /** * See Empty * @return true if empty. @@ -423,19 +410,21 @@ public boolean isEstimationMode() { public abstract boolean isOrdered(); /** - * Returns true if the two given MemorySegments refer to the same backing resource, - * which is either an off-heap memory location and size, or the same on-heap array object. + * Returns true if the backing MemorySegment of this object refers to the same MemorySegment of that. + * They can either have the same off-heap memory location and size, or refer to the same on-heap array object. + * + *

If both segment are off-heap, they both must have the same starting address and the same size.

+ * + *

For on-heap segments, both segments must be based on or derived from the same array object and neither segment + * can be read-only.

* - *

This is a convenient delegate of - * {@link org.apache.datasketches.common.Util#isSameResource(MemorySegment, MemorySegment) isSameResource()}

+ *

Returns false if either argument is null;

* - * @param seg1 The first given MemorySegment - * @param seg2 The second given MemorySegment - * @return true if both MemorySegments are determined to be the same backing memory. + * @param that The given MemorySegment. + * @return true if the backing MemorySegment of this object hierarchy refers to the same MemorySegment of that. */ - public boolean isSameResource(final MemorySegment seg1, final MemorySegment seg2) { - return Util.isSameResource(seg1, seg2); - } + @Override + public abstract boolean isSameResource(final MemorySegment that); /** * Returns a HashIterator that can be used to iterate over the retained hash values of the @@ -601,9 +590,10 @@ public static String toString(final MemorySegment mem) { /** * Returns the backing MemorySegment object if it exists, otherwise null. + * This is overridden where relevant. * @return the backing MemorySegment object if it exists, otherwise null. */ - abstract MemorySegment getMemorySegment(); + MemorySegment getMemorySegment() { return null; } /** * Gets the 16-bit seed hash diff --git a/src/main/java/org/apache/datasketches/theta2/Sketches.java b/src/main/java/org/apache/datasketches/theta2/Sketches.java new file mode 100644 index 000000000..cbcfac7d0 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/Sketches.java @@ -0,0 +1,407 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * This class brings together the common sketch and set operation creation methods and + * the public static methods into one place. + * + * @author Lee Rhodes + */ +public final class Sketches { + + private Sketches() {} + + /** + * Gets the unique count estimate from a valid MemorySegment image of a Sketch + * @param srcSeg the source MemorySegment + * @return the sketch's best estimate of the cardinality of the input stream. + */ + public static double getEstimate(final MemorySegment srcSeg) { + checkIfValidThetaSketch(srcSeg); + return Sketch.estimate(getThetaLong(srcSeg), getRetainedEntries(srcSeg)); + } + + /** + * Gets the approximate lower error bound from a valid MemorySegment image of a Sketch + * given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @param srcSeg the source MemorySegment + * @return the lower bound. + */ + public static double getLowerBound(final int numStdDev, final MemorySegment srcSeg) { + return Sketch.lowerBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg)); + } + + /** + * Ref: {@link SetOperation#getMaxAnotBResultBytes(int)}. + * Returns the maximum number of bytes for the returned CompactSketch, given the maximum + * value of nomEntries of the first sketch A of AnotB. + * @param maxNomEntries the given value + * @return the maximum number of bytes. + */ + public static int getMaxAnotBResultBytes(final int maxNomEntries) { + return SetOperation.getMaxAnotBResultBytes(maxNomEntries); + } + + /** + * Returns the maximum number of storage bytes required for a CompactSketch with the given + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. + * @return the maximum number of storage bytes required for a CompactSketch with the given number + * of retained entries. + */ + public static int getMaxCompactSketchBytes(final int numberOfEntries) { + return Sketch.getMaxCompactSketchBytes(numberOfEntries); + } + + /** + * Returns the maximum number of storage bytes required for a CompactSketch given the configured + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries + * @return the maximum number of storage bytes required for a CompactSketch with the given + * lgNomEntries. + * @see Sketch#getCompactSketchMaxBytes(int) + */ + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return Sketch.getCompactSketchMaxBytes(lgNomEntries); + } + + /** + * Ref: {@link SetOperation#getMaxIntersectionBytes(int)} + * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries} + * @return Ref: {@link SetOperation#getMaxIntersectionBytes(int)} + */ + public static int getMaxIntersectionBytes(final int nomEntries) { + return SetOperation.getMaxIntersectionBytes(nomEntries); + } + + /** + * Ref: {@link SetOperation#getMaxUnionBytes(int)} + * @param nomEntries Ref: {@link SetOperation#getMaxUnionBytes(int)}, {@code nomEntries} + * @return Ref: {@link SetOperation#getMaxUnionBytes(int)} + */ + public static int getMaxUnionBytes(final int nomEntries) { + return SetOperation.getMaxUnionBytes(nomEntries); + } + + /** + * Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} + * @param nomEntries Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}, {@code nomEntries} + * @return Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} + */ + public static int getMaxUpdateSketchBytes(final int nomEntries) { + return Sketch.getMaxUpdateSketchBytes(nomEntries); + } + + /** + * Ref: {@link Sketch#getSerializationVersion(MemorySegment)} + * @param srcSeg Ref: {@link Sketch#getSerializationVersion(MemorySegment)}, {@code srcSeg} + * @return Ref: {@link Sketch#getSerializationVersion(MemorySegment)} + */ + public static int getSerializationVersion(final MemorySegment srcSeg) { + return Sketch.getSerializationVersion(srcSeg); + } + + /** + * Gets the approximate upper error bound from a valid MemorySegment image of a Sketch + * given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @param srcSeg the source MemorySegment + * @return the upper bound. + */ + public static double getUpperBound(final int numStdDev, final MemorySegment srcSeg) { + return Sketch.upperBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg)); + } + + //Heapify Operations + + /** + * Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)} + * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)}, {@code srcSeg} + * @return {@link CompactSketch CompactSketch} + */ + public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg) { + return CompactSketch.heapify(srcSeg); + } + + /** + * Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, + * {@code expectedSeed} + * @return {@link CompactSketch CompactSketch} + */ + public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg, final long expectedSeed) { + return CompactSketch.heapify(srcSeg, expectedSeed); + } + + /** + * Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)} + * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)}, {@code srcSeg} + * @return {@link CompactSketch CompactSketch} + */ + public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg) { + return CompactSketch.wrap(srcSeg); + } + + /** + * Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, + * {@code expectedSeed} + * @return {@link CompactSketch CompactSketch} + */ + public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg, final long expectedSeed) { + return CompactSketch.wrap(srcSeg, expectedSeed); + } + + /** + * Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)} + * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)}, {@code srcSeg} + * @return {@link SetOperation SetOperation} + */ + public static SetOperation heapifySetOperation(final MemorySegment srcSeg) { + return SetOperation.heapify(srcSeg); + } + + /** + * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, + * {@code srcSeg} + * @param expectedSeed the seed used to validate the given Memory image. + * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, + * {@code expectedSeed} + * @return {@link SetOperation SetOperation} + */ + public static SetOperation heapifySetOperation(final MemorySegment srcSeg, final long expectedSeed) { + return SetOperation.heapify(srcSeg, expectedSeed); + } + + /** + * Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)} + * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)}, {@code srcSeg} + * @return {@link Sketch Sketch} + */ + public static Sketch heapifySketch(final MemorySegment srcSeg) { + return Sketch.heapify(srcSeg); + } + + /** + * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code expectedSeed} + * @return {@link Sketch Sketch} + */ + public static Sketch heapifySketch(final MemorySegment srcSeg, final long expectedSeed) { + return Sketch.heapify(srcSeg, expectedSeed); + } + + /** + * Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)} + * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)}, {@code srcSeg} + * @return {@link UpdateSketch UpdateSketch} + */ + public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg) { + return UpdateSketch.heapify(srcSeg); + } + + /** + * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, + * {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, + * {@code expectedSeed} + * @return {@link UpdateSketch UpdateSketch} + */ + public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) { + return UpdateSketch.heapify(srcSeg, expectedSeed); + } + + //Builders + + /** + * Ref: {@link SetOperationBuilder SetOperationBuilder} + * @return {@link SetOperationBuilder SetOperationBuilder} + */ + public static SetOperationBuilder setOperationBuilder() { + return new SetOperationBuilder(); + } + + /** + * Ref: {@link UpdateSketchBuilder UpdateSketchBuilder} + * @return {@link UpdateSketchBuilder UpdateSketchBuilder} + */ + public static UpdateSketchBuilder updateSketchBuilder() { + return new UpdateSketchBuilder(); + } + + //Wrap operations + + /** + * Convenience method, calls {@link SetOperation#wrap(MemorySegment)} and casts the result to a Intersection + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} + * @return a Intersection backed by the given MemorySegment + */ + public static Intersection wrapIntersection(final MemorySegment srcSeg) { + return (Intersection) SetOperation.wrap(srcSeg); + } + + /** + * Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)} + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)}, {@code srcSeg} + * @return {@link SetOperation SetOperation} + */ + public static SetOperation wrapSetOperation(final MemorySegment srcSeg) { + return wrapSetOperation(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(Memory, long)}, + * {@code expectedSeed} + * @return {@link SetOperation SetOperation} + */ + public static SetOperation wrapSetOperation(final MemorySegment srcSeg, final long expectedSeed) { + return SetOperation.wrap(srcSeg, expectedSeed); + } + + /** + * Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)} + * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)}, {@code srcSeg} + * @return {@link Sketch Sketch} + */ + public static Sketch wrapSketch(final MemorySegment srcSeg) { + return Sketch.wrap(srcSeg); + } + + /** + * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the expectedSeed used to validate the given MemorySegment image. + * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code expectedSeed} + * @return {@link Sketch Sketch} + */ + public static Sketch wrapSketch(final MemorySegment srcSeg, final long expectedSeed) { + return Sketch.wrap(srcSeg, expectedSeed); + } + + /** + * Convenience method, calls {@link SetOperation#wrap(MemorySegment)} and casts the result to a Union + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} + * @return a Union backed by the given MemorySegment. + */ + public static Union wrapUnion(final MemorySegment srcSeg) { + return (Union) SetOperation.wrap(srcSeg); + } + + /** + * Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)} + * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)}, {@code srcSeg} + * @return {@link UpdateSketch UpdateSketch} + */ + public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg) { + return wrapUpdateSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code expectedSeed} + * @return {@link UpdateSketch UpdateSketch} + */ + public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) { + return UpdateSketch.wrap(srcSeg, expectedSeed); + } + + //Restricted static methods + + static void checkIfValidThetaSketch(final MemorySegment srcSeg) { + final int fam = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); + if (!Sketch.isValidSketchID(fam)) { + throw new SketchesArgumentException("Source Memory not a valid Sketch. Family: " + + Family.idToFamily(fam).toString()); + } + } + + static boolean getEmpty(final MemorySegment srcSeg) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer == 1) { + return ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (getRetainedEntries(srcSeg) == 0)); + } + return (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 + } + + static int getPreambleLongs(final MemorySegment srcSeg) { + return srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //for SerVer 1,2,3 + } + + static int getRetainedEntries(final MemorySegment srcSeg) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer == 1) { + final int entries = srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); + if ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (entries == 0)) { + return 0; + } + return entries; + } + //SerVer 2 or 3 + final int preLongs = getPreambleLongs(srcSeg); + final boolean empty = (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 + if (preLongs == 1) { + return empty ? 0 : 1; + } + //preLongs > 1 + return srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); //for SerVer 1,2,3 + } + + static long getThetaLong(final MemorySegment srcSeg) { + final int preLongs = getPreambleLongs(srcSeg); + return (preLongs < 3) ? Long.MAX_VALUE : srcSeg.get(JAVA_LONG_UNALIGNED, THETA_LONG); //for SerVer 1,2,3 + } +} diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java index a86365c7d..f3bdbe2f4 100644 --- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java @@ -30,6 +30,7 @@ import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; +import java.util.Objects; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; @@ -87,7 +88,7 @@ static UnionImpl initNewHeapInstance( } /** - * Construct a new Direct Union in the off-heap destination MemorySegment. + * Construct a new Direct Union in the destination MemorySegment. * Called by SetOperationBuilder. * * @param lgNomLongs See lgNomLongs. @@ -121,11 +122,12 @@ static UnionImpl initNewDirectInstance( * @return this class */ static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); - final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); + final MemorySegment srcSegRO = srcSeg.asReadOnly(); + Family.UNION.checkFamilyID(extractFamilyID(srcSegRO)); + final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSegRO, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSegRO); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSegRO); return unionImpl; } @@ -139,7 +141,9 @@ static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expected */ static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) { Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); - final UpdateSketch gadget = DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed); + final UpdateSketch gadget = srcSeg.isReadOnly() + ? DirectQuickSelectSketchR.fastReadOnlyWrap(srcSeg, expectedSeed) + : DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); @@ -156,7 +160,9 @@ static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) { */ static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) { Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); - final UpdateSketch gadget = DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed); + final UpdateSketch gadget = srcSeg.isReadOnly() + ? DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed) + : DirectQuickSelectSketch.writableWrap(srcSeg, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); @@ -174,6 +180,11 @@ public int getMaxUnionBytes() { return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3); } + @Override + MemorySegment getMemorySegment() { + return hasMemorySegment() ? gadget_.getMemorySegment() : null; + } + @Override public CompactSketch getResult() { return getResult(true, null); @@ -212,14 +223,17 @@ public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dst @Override public boolean hasMemorySegment() { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.hasMemorySegment() : false; + return gadget_.hasMemorySegment(); } @Override public boolean isDirect() { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.isDirect() : false; + return gadget_.isDirect(); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return gadget_.isSameResource(that); } @Override @@ -290,9 +304,8 @@ public void union(final Sketch sketchIn) { @Override public void union(final MemorySegment seg) { - if (seg != null) { - union(Sketch.wrap(seg)); - } + Objects.requireNonNull(seg, "MemorySegment must be non-null"); + union(Sketch.wrap(seg.asReadOnly())); } @Override diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java index ee5f93ea2..8a8dc5fd3 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java @@ -158,16 +158,31 @@ int getCurrentDataLongs() { return 1 << getLgArrLongs(); } + @Override + public boolean hasMemorySegment() { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).hasMemorySegment()); + } + @Override public boolean isCompact() { return false; } + @Override + public boolean isDirect() { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isDirect()); + } + @Override public boolean isOrdered() { return false; } + @Override + public boolean isSameResource(final MemorySegment that) { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isSameResource(that)); + } + //UpdateSketch interface /** diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java index 08939ee41..09a20b19e 100644 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java @@ -90,16 +90,6 @@ public long getThetaLong() { return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE; } - @Override - public boolean hasMemorySegment() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0; @@ -147,11 +137,6 @@ int getCurrentPreambleLongs() { return bytes_[PREAMBLE_LONGS_BYTE]; } - @Override - MemorySegment getMemorySegment() { - return null; - } - @Override short getSeedHash() { return getShortLE(bytes_, SEED_HASH_SHORT); diff --git a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java new file mode 100644 index 000000000..a192e9875 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class AnotBimplTest { + + @Test + public void checkExactAnotB_AvalidNoOverlap() { + final int k = 512; + + final UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).build(); + final UpdateSketch usk2 = UpdateSketch.builder().setNominalEntries(k).build(); + + for (int i=0; iV1 dates from roughly Aug 2014 to about May 2015. + * The library at that time had an early Theta sketch with set operations based on ByteBuffer, + * the Alpha sketch, and an early HLL sketch. It also had an early adaptor for Pig. + * It also had code for the even earlier CountUniqueSketch (for backward compatibility), + * which was the bucket sketch based on Giroire. + * + *

Serialization Version 1:

+ *
+   * Long || Start Byte Adr:
+   * Adr:
+   *      ||  7 |   6   |     5    |   4   |   3   |    2   |    1   |     0    |
+   *  0   ||    | Flags | LgResize | LgArr | lgNom | SkType | SerVer | MD_LONGS |
+   *
+   *      || 15 |  14   |    13    |  12   |  11   |   10   |    9   |     8    |
+   *  1   ||                               | ------------CurCount-------------- |
+   *
+   *      || 23 |  22   |    21    |  20   |  19   |   18   |   17   |    16    |
+   *  2   || --------------------------THETA_LONG------------------------------ |
+   *
+   *      ||                                                         |    24    |
+   *  3   || ----------------------Start of Long Array------------------------  |
+   * 
+ * + *
    + *
  • The serialization for V1 was always to a compact form (no hash table spaces).
  • + *
  • MD_LONGS (Metadata Longs, now Preamble Longs) was always 3.
  • + *
  • SerVer is always 1.
  • + *
  • The SkType had three values: 1,2,3 for Alpha, QuickSelect, and SetSketch, + * respectively.
  • + *
  • Bytes lgNom and lgArr were only used by the QS and Alpha sketches.
  • + *
  • V1 LgResize (2 bits) was only relevant to the Alpha and QS sketches.
  • + *
  • The flags byte is in byte 6 (moved to 5 in V2).
  • + *
  • The only flag bits are BE(bit0)=0, and Read-Only(bit1)=1. Read-only was only set for the + * SetSketch.
  • + *
  • There is no seedHash.
  • + *
  • There is no concept of p-sampling so bytes 12-15 of Pre1 are empty.
  • + *
  • The determination of empty is when both curCount=0 and thetaLong = Long.MAX_VALUE.
  • + *
+ * + * @param skV3 a SerVer3, ordered CompactSketch + * @return a SerVer1 SetSketch as MemorySegment object. + */ + public static MemorySegment convertSerVer3toSerVer1(final CompactSketch skV3) { + //Check input sketch + final boolean validIn = skV3.isCompact() && skV3.isOrdered() && !skV3.hasMemorySegment(); + if (!validIn) { + throw new SketchesArgumentException("Invalid input sketch."); + } + + //Build V1 SetSketch in MemorySegment + final int curCount = skV3.getRetainedEntries(true); + final int bytes = (3 + curCount) << 3; + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);//Util.newHeapSegment(bytes); + //Pre0 + wseg.set(JAVA_BYTE, 0, (byte) 3); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 1); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //Compact (SetSketch) + wseg.set(JAVA_BYTE, 6, (byte) 2); //Flags ReadOnly, LittleEndian + //Pre1 + wseg.set(JAVA_INT_UNALIGNED, 8, curCount); + //Pre2 + wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong()); + //Data + if (curCount > 0) { + MemorySegment.copy(skV3.getCache(), 0, wseg, JAVA_LONG_UNALIGNED, 24, curCount); + } + return wseg; + } + + /** + * Converts a SerVer3 ordered, heap CompactSketch to a SerVer2 ordered, SetSketch in MemorySegment. + * This is exclusively for testing purposes. + * + *

V2 is short-lived and dates from roughly Mid May 2015 to about June 1st, 2015. + * (V3 was created about June 15th in preparation for OpenSource in July.) + * The Theta sketch had evolved but still based on ByteBuffer. There was an UpdateSketch, + * the Alpha sketch, and the early HLL sketch. It also had an early adaptor for Pig. + * + * + *

Serialization Version 2:

+ *
+   * Long || Start Byte Adr:
+   * Adr:
+   *      ||  7 |   6   |     5    |   4   |   3   |    2   |    1   |     0         |
+   *  0   || Seed Hash  |  Flags   | lgArr | lgNom | SkType | SerVer | MD_LONGS + RR |
+   *
+   *      || 15 |  14   |    13    |  12   |  11   |   10   |    9   |     8         |
+   *  1   || --------------p-------------- | ---------Retained Entries Count-------- |
+   *
+   *      || 23 |  22   |    21    |  20   |  19   |   18   |   17   |    16         |
+   *  2   || --------------------------THETA_LONG----------------------------------- |
+   *
+   *      ||                                                         |    24         |
+   *  3   || ----------Start of Long Array, could be at 2 or 3 --------------------  |
+   *  
+ * + *
    + *
  • The serialization for V2 was always to a compact form (no hash table spaces).
  • + *
  • MD_LONGS low 6 bits: 1 (Empty), 2 (Exact), 3 (Estimating).
  • + *
  • SerVer is always 2.
  • + *
  • The SkType had 4 values: 1,2,3,4; see below.
  • + *
  • Bytes lgNom and lgArr were only used by the QS and Alpha sketches.
  • + *
  • V2 LgResize top 2 bits if byte 0. Only relevant to the Alpha and QS sketches.
  • + *
  • The flags byte is in byte 5.
  • + *
  • The flag bits are specified below.
  • + *
  • There is a seedHash in bytes 6-7.
  • + *
  • p-sampling is bytes 12-15 of Pre1.
  • + *
  • The determination of empty based on the sketch field empty_.
  • + *
+ *
+   *   // Metadata byte Addresses
+   *   private static final int METADATA_LONGS_BYTE        = 0; //low 6 bits
+   *   private static final int LG_RESIZE_RATIO_BYTE       = 0; //upper 2 bits
+   *   private static final int SER_VER_BYTE               = 1;
+   *   private static final int SKETCH_TYPE_BYTE           = 2;
+   *   private static final int LG_NOM_LONGS_BYTE          = 3;
+   *   private static final int LG_ARR_LONGS_BYTE          = 4;
+   *   private static final int FLAGS_BYTE                 = 5;
+   *   private static final int SEED_HASH_SHORT            = 6;  //byte 6,7
+   *   private static final int RETAINED_ENTRIES_COUNT_INT = 8;  //4 byte aligned
+   *   private static final int P_FLOAT                    = 12; //4 byte aligned
+   *   private static final int THETA_LONG                 = 16; //8-byte aligned
+   *   //Backward compatibility
+   *   private static final int FLAGS_BYTE_V1              = 6;
+   *   private static final int LG_RESIZE_RATIO_BYTE_V1    = 5;
+   *
+   *   // Constant Values
+   *   static final int SER_VER                        = 2;
+   *   static final int ALPHA_SKETCH                   = 1; //SKETCH_TYPE_BYTE
+   *   static final int QUICK_SELECT_SKETCH            = 2;
+   *   static final int SET_SKETCH                     = 3;
+   *   static final int BUFFERED_QUICK_SELECT_SKETCH   = 4;
+   *   static final String[] SKETCH_TYPE_STR     =
+   *       { "None", "AlphaSketch", "QuickSelectSketch", "SetSketch", "BufferedQuickSelectSketch" };
+   *
+   *   // flag bit masks
+   *   static final int BIG_ENDIAN_FLAG_MASK     = 1;
+   *   static final int READ_ONLY_FLAG_MASK      = 2;
+   *   static final int EMPTY_FLAG_MASK          = 4;
+   *   static final int NO_REBUILD_FLAG_MASK     = 8;
+   *   static final int UNORDERED_FLAG_MASK     = 16;
+   * 
+ * + * @param skV3 a SerVer3, ordered CompactSketch + * @param seed used for checking the seed hash (if one exists). + * @return a SerVer2 SetSketch as MemorySegment object. + */ + public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) { + final short seedHash = ThetaUtil.computeSeedHash(seed); + MemorySegment wseg = null; + + if (skV3 instanceof EmptyCompactSketch) { + wseg = MemorySegment.ofArray(new long[1]); + wseg.set(JAVA_BYTE, 0, (byte) 1); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch + final byte flags = (byte) 0xE; //NoRebuild, Empty, ReadOnly, LE + wseg.set(JAVA_BYTE, 5, flags); + wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); + return wseg; + } + if (skV3 instanceof SingleItemSketch) { + final SingleItemSketch sis = (SingleItemSketch) skV3; + wseg = MemorySegment.ofArray(new long[3]); + wseg.set(JAVA_BYTE, 0, (byte) 2); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch + final byte flags = (byte) 0xA; //NoRebuild, notEmpty, ReadOnly, LE + wseg.set(JAVA_BYTE, 5, flags); + wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); + wseg.set(JAVA_INT_UNALIGNED, 8, 1); + final long[] arr = sis.getCache(); + wseg.set(JAVA_LONG_UNALIGNED, 16, arr[0]); + return wseg; + } + //General CompactSketch + final int preLongs = skV3.getCompactPreambleLongs(); + final int entries = skV3.getRetainedEntries(true); + final boolean unordered = !(skV3.isOrdered()); + final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE + wseg = Util.newHeapSegment((preLongs + entries) << 3); + wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch + + wseg.set(JAVA_BYTE, 5, flags); + wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); + wseg.set(JAVA_INT_UNALIGNED, 8, entries); + if (preLongs == 3) { + wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong()); + } + final long[] arr = skV3.getCache(); + MemorySegment.copy(arr, 0, wseg, JAVA_LONG_UNALIGNED, preLongs << 3, entries); + return wseg; + } +} diff --git a/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java b/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java new file mode 100644 index 000000000..d6a68bbd5 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.common.Util; +import org.testng.annotations.Test; + +public class BitPackingTest { + private final static boolean enablePrinting = false; +//for every number of bits from 1 to 63 +//generate pseudo-random data, pack, unpack and compare + + @Test + public void packUnpackBits() { + long value = 0xaa55aa55aa55aa55L; // arbitrary starting value + for (int n = 0; n < 10000; n++) { + for (int bits = 1; bits <= 63; bits++) { + final long mask = (1 << bits) - 1; + long[] input = new long[8]; + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += Util.INVERSE_GOLDEN_U64; + } + + byte[] bytes = new byte[8 * Long.BYTES]; + int bitOffset = 0; + int bufOffset = 0; + for (int i = 0; i < 8; ++i) { + BitPacking.packBits(input[i], bits, bytes, bufOffset, bitOffset); + bufOffset += (bitOffset + bits) >>> 3; + bitOffset = (bitOffset + bits) & 7; + } + + long[] output = new long[8]; + bitOffset = 0; + bufOffset = 0; + for (int i = 0; i < 8; ++i) { + BitPacking.unpackBits(output, i, bits, bytes, bufOffset, bitOffset); + bufOffset += (bitOffset + bits) >>> 3; + bitOffset = (bitOffset + bits) & 7; + } + + for (int i = 0; i < 8; ++i) { + assertEquals(output[i], input[i]); + } + } + } + } + + @Test + public void packUnpackBlocks() { + long value = 0xaa55aa55aa55aa55L; // arbitrary starting value + for (int n = 0; n < 10000; n++) { + for (int bits = 1; bits <= 63; bits++) { + if (enablePrinting) { System.out.println("bits " + bits); } + final long mask = (1L << bits) - 1; + long[] input = new long[8]; + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += Util.INVERSE_GOLDEN_U64; + } + + byte[] bytes = new byte[8 * Long.BYTES]; + BitPacking.packBitsBlock8(input, 0, bytes, 0, bits); + if (enablePrinting) { hexDump(bytes); } + + long[] output = new long[8]; + BitPacking.unpackBitsBlock8(output, 0, bytes, 0, bits); + + for (int i = 0; i < 8; ++i) { + if (enablePrinting) { System.out.println("checking value " + i); } + assertEquals(output[i], input[i]); + } + } + } + } + + @Test + public void packBitsUnpackBlocks() { + long value = 0; // arbitrary starting value + for (int n = 0; n < 10000; n++) { + for (int bits = 1; bits <= 63; bits++) { + final long mask = (1 << bits) - 1; + long[] input = new long[8]; + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += Util.INVERSE_GOLDEN_U64; + } + + byte[] bytes = new byte[8 * Long.BYTES]; + int bitOffset = 0; + int bufOffset = 0; + for (int i = 0; i < 8; ++i) { + BitPacking.packBits(input[i], bits, bytes, bufOffset, bitOffset); + bufOffset += (bitOffset + bits) >>> 3; + bitOffset = (bitOffset + bits) & 7; + } + + long[] output = new long[8]; + BitPacking.unpackBitsBlock8(output, 0, bytes, 0, bits); + + for (int i = 0; i < 8; ++i) { + assertEquals(output[i], input[i]); + } + } + } + } + + @Test + public void packBlocksUnpackBits() { + long value = 123L; // arbitrary starting value + for (int n = 0; n < 10000; n++) { + for (int bits = 1; bits <= 63; bits++) { + final long mask = (1 << bits) - 1; + long[] input = new long[8]; + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += Util.INVERSE_GOLDEN_U64; + } + + byte[] bytes = new byte[8 * Long.BYTES]; + BitPacking.packBitsBlock8(input, 0, bytes, 0, bits); + + long[] output = new long[8]; + int bitOffset = 0; + int bufOffset = 0; + for (int i = 0; i < 8; ++i) { + BitPacking.unpackBits(output, i, bits, bytes, bufOffset, bitOffset); + bufOffset += (bitOffset + bits) >>> 3; + bitOffset = (bitOffset + bits) & 7; + } + + for (int i = 0; i < 8; ++i) { + assertEquals(output[i], input[i]); + } + } + } + } + + void hexDump(byte[] bytes) { + for (int i = 0; i < bytes.length; i++) { + System.out.print(String.format("%02x ", bytes[i])); + } + System.out.println(); + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java new file mode 100644 index 000000000..6ad5e8cdc --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java @@ -0,0 +1,674 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.annotations.Test; + +import java.lang.foreign.Arena; + +/** + * @author Lee Rhodes + */ +public class CompactSketchTest { + + @Test + public void checkHeapifyWrap() { + int k = 4096; + final boolean ordered = true; + checkHeapifyWrap(k, 0, ordered); + checkHeapifyWrap(k, 1, ordered); + checkHeapifyWrap(k, 1, !ordered); + checkHeapifyWrap(k, k, ordered); //exact + checkHeapifyWrap(k, k, !ordered); //exact + checkHeapifyWrap(k, 4 * k, ordered); //estimating + checkHeapifyWrap(k, 4 * k, !ordered); //estimating + } + + //test combinations of compact ordered/not ordered and heap/direct + public void checkHeapifyWrap(int k, int u, boolean ordered) { + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(); + + for (int i=0; i>> 1 GT_MIDP + * 4611686018427387904 Theta for p = 0.5f = MIDP + * + * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V + * 922337217429372928 Theta for p = 0.1f = LOWP + * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V + */ + + private static final long GT_MIDP_V = 3L; + private static final float MIDP = 0.5f; + + private static final long GT_LOWP_V = 6L; + private static final float LOWP = 0.1f; + private static final long LT_LOWP_V = 4L; + + private static final double LOWP_THETA = LOWP; + + private enum SkType { + EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05 + EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value + ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value + DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value + } + + //================================= + + @Test + public void emptyEmpty() { + UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = true; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyExact() { + UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyDegenerate() { + UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyEstimation() { + UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void exactEmpty() { + UpdateSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_V); + UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactExact() { + UpdateSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_V); + UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactDegenerate() { + UpdateSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_V); + UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0 + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactEstimation() { + UpdateSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_V); + UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void estimationEmpty() { + UpdateSketch thetaA = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); + UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationExact() { + UpdateSketch thetaA = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); + UpdateSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationDegenerate() { + UpdateSketch thetaA = getSketch(SkType.ESTIMATION, MIDP, LT_LOWP_V); + UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationEstimation() { + UpdateSketch thetaA = getSketch(SkType.ESTIMATION, MIDP, LT_LOWP_V); + UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void degenerateEmpty() { + UpdateSketch thetaA = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0 + UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateExact() { + UpdateSketch thetaA = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0 + UpdateSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateDegenerate() { + UpdateSketch thetaA = getSketch(SkType.DEGENERATE, MIDP, GT_MIDP_V); //entries = 0 + UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateEstimation() { + UpdateSketch thetaA = getSketch(SkType.DEGENERATE, MIDP, GT_MIDP_V); //entries = 0 + UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_THETA; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_THETA; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + //================================= + + private static void checks( + UpdateSketch thetaA, + UpdateSketch thetaB, + double expectedIntersectTheta, + int expectedIntersectCount, + boolean expectedIntersectEmpty, + double expectedAnotbTheta, + int expectedAnotbCount, + boolean expectedAnotbEmpty, + double expectedUnionTheta, + int expectedUnionCount, + boolean expectedUnionEmpty) { + CompactSketch csk; + Intersection inter = SetOperation.builder().buildIntersection(); + AnotB anotb = SetOperation.builder().buildANotB(); + Union union = new SetOperationBuilder().buildUnion(); + + //Intersection Stateless Theta, Theta Updatable + csk = inter.intersect(thetaA, thetaB); + checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + //Intersection Stateless Theta, Theta Compact + csk = inter.intersect(thetaA.compact(), thetaB.compact()); + checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + + //AnotB Stateless Theta, Theta Updatable + csk = anotb.aNotB(thetaA, thetaB); + checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateless Theta, Theta Compact + csk = anotb.aNotB(thetaA.compact(), thetaB.compact()); + checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + + //AnotB Stateful Theta, Theta Updatable + anotb.setA(thetaA); + anotb.notB(thetaB); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateful Theta, Theta Compact + anotb.setA(thetaA.compact()); + anotb.notB(thetaB.compact()); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + + //Union Stateful Theta, Theta Updatable + union.union(thetaA); + union.union(thetaB); + csk = union.getResult(); + union.reset(); + checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //Union Stateful Theta, Theta Compact + union.union(thetaA.compact()); + union.union(thetaB.compact()); + csk = union.getResult(); + union.reset(); + checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + + } + + private static void checkResult( + String comment, + CompactSketch csk, + double expectedTheta, + int expectedEntries, + boolean expectedEmpty) { + double actualTheta = csk.getTheta(); + int actualEntries = csk.getRetainedEntries(); + boolean actualEmpty = csk.isEmpty(); + + boolean thetaOk = actualTheta == expectedTheta; + boolean entriesOk = actualEntries == expectedEntries; + boolean emptyOk = actualEmpty == expectedEmpty; + if (!thetaOk || !entriesOk || !emptyOk) { + StringBuilder sb = new StringBuilder(); + sb.append(comment + ": "); + if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); } + if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); } + if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); } + throw new IllegalArgumentException(sb.toString()); + } + } + + private static UpdateSketch getSketch(SkType skType, float p, long value) { + UpdateSketchBuilder bldr = UpdateSketch.builder(); + bldr.setLogNominalEntries(4); + UpdateSketch sk; + switch(skType) { + case EMPTY: { // { 1.0, 0, T} p and value are not used + sk = bldr.build(); + break; + } + case EXACT: { // { 1.0, >0, F} p is not used + sk = bldr.build(); + sk.update(value); + break; + } + case ESTIMATION: { // {<1.0, >0, F} + bldr.setP(p); + sk = bldr.build(); + sk.update(value); + break; + } + case DEGENERATE: { // {<1.0, 0, F} + bldr.setP(p); + sk = bldr.build(); + sk.update(value); + break; + } + + default: { return null; } // should not happen + } + return sk; + } + +// private static void println(Object o) { +// System.out.println(o.toString()); +// } +// +// @Test +// public void printHash() { +// long seed = DEFAULT_UPDATE_SEED; +// long v = 6; +// long hash = (hash(v, seed)[0]) >>> 1; +// println(v + ", " + hash); +// } +// +// @Test +// public void printPAsLong() { +// float p = 0.5f; +// println("p = " + p + ", " + (long)(Long.MAX_VALUE * p)); +// } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java new file mode 100644 index 000000000..c27c3b085 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java @@ -0,0 +1,769 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.apache.datasketches.theta2.SetOperation.CONST_PREAMBLE_LONGS; +import static org.apache.datasketches.theta2.SetOperation.getMaxIntersectionBytes; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class DirectIntersectionTest { + private static final int PREBYTES = CONST_PREAMBLE_LONGS << 3; //24 + + @Test + public void checkExactIntersectionNoOverlap() { + final int lgK = 9; + final int k = 1< k); + println("Est: "+est); + } + + @SuppressWarnings("unused") + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkOverflow() { + final int lgK = 9; //512 + final int k = 1< k); + println("Est: "+est); + + final byte[] byteArray = inter.toByteArray(); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + final Intersection inter2 = (Intersection) SetOperation.heapify(seg); + comp2 = inter2.getResult(false, null); + est2 = comp2.getEstimate(); + println("Est2: "+est2); + } + + /** + * This proves that the hash of 7 is < 0.5. This fact will be used in other tests involving P. + */ + @Test + public void checkPreject() { + final UpdateSketch sk = UpdateSketch.builder().setP((float) .5).build(); + sk.update(7); + assertEquals(sk.getRetainedEntries(), 0); + } + + @Test + public void checkWrapVirginEmpty() { + final int lgK = 5; + final int k = 1 << lgK; + Intersection inter1, inter2; + UpdateSketch sk1; + + final int segBytes = getMaxIntersectionBytes(k); + MemorySegment iMem = MemorySegment.ofArray(new byte[segBytes]); + + inter1 = SetOperation.builder().buildIntersection(iMem); //virgin off-heap + inter2 = Sketches.wrapIntersection(iMem); //virgin off-heap, identical to inter1 + //both in virgin state, empty = false + //note: both inter1 and inter2 are tied to the same MemorySegment, + // so an intersect to one also affects the other. Don't do what I do! + assertFalse(inter1.hasResult()); + assertFalse(inter2.hasResult()); + + //This constructs a sketch with 0 entries and theta < 1.0 + sk1 = UpdateSketch.builder().setP((float) .5).setNominalEntries(k).build(); + sk1.update(7); //will be rejected by P, see proof above. + + //A virgin intersection (empty = false) intersected with a not-empty zero cache sketch + //remains empty = false! + inter1.intersect(sk1); + assertFalse(inter1.isEmpty()); + assertTrue(inter1.hasResult()); + //note that inter2 is not independent + assertFalse(inter2.isEmpty()); + assertTrue(inter2.hasResult()); + + //test the path via toByteArray, now in a different state + iMem = MemorySegment.ofArray(inter1.toByteArray()); + inter2 = Sketches.wrapIntersection(iMem); + assertTrue(inter2.hasResult()); //still true + + //test the compaction path + final CompactSketch comp = inter2.getResult(true, null); + assertEquals(comp.getRetainedEntries(false), 0); + assertFalse(comp.isEmpty()); + } + + @Test + public void checkWrapNullEmpty2() { + final int lgK = 5; + final int k = 1< k); + println("Est: "+est); + + final byte[] segArr3 = inter2.toByteArray(); + final MemorySegment srcMem2 = MemorySegment.ofArray(segArr3); + inter3 = Sketches.wrapIntersection(srcMem2); + resultComp2 = inter3.getResult(false, null); + est2 = resultComp2.getEstimate(); + println("Est2: "+est2); + + inter.reset(); + inter2.reset(); + inter3.reset(); + } + + @Test + public void checkDefaultMinSize() { + final int k = 32; + final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkExceptionMinSize() { + final int k = 16; + final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + } + + @Test + public void checkGetResult() { + final int k = 1024; + final UpdateSketch sk = Sketches.updateSketchBuilder().build(); + + final int segBytes = getMaxIntersectionBytes(k); + final byte[] segArr = new byte[segBytes]; + final MemorySegment iMem = MemorySegment.ofArray(segArr); + + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iMem); + inter.intersect(sk); + final CompactSketch csk = inter.getResult(); + assertEquals(csk.getCompactBytes(), 8); + } + + @Test + public void checkFamily() { + //cheap trick + final int k = 16; + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); + final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + assertEquals(impl.getFamily(), Family.INTERSECTION); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkExceptions1() { + final int k = 16; + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + //corrupt SerVer + seg.set(JAVA_BYTE, PreambleUtil.SER_VER_BYTE, (byte) 2); + IntersectionImpl.wrapInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkExceptions2() { + final int k = 16; + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + //seg now has non-empty intersection + //corrupt empty and CurCount + Util.setBits(seg, PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK); + seg.set(JAVA_INT_UNALIGNED, PreambleUtil.RETAINED_ENTRIES_INT, 2); + IntersectionImpl.wrapInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + } + + //Check Alex's bug intersecting 2 direct full sketches with only overlap of 2 + // + @Test + public void checkOverlappedDirect() { + final int k = 1 << 4; + final int segBytes = 2*k*16 +PREBYTES; //plenty of room + final UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + final UpdateSketch sk2 = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + for (int i=0; i k); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkSamplingMode() { + int k = 4096; + float p = (float)0.5; + + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setP(p).setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + + for (int i = 0; i < k; i++ ) { usk.update(i); } + + double p2 = sk1.getP(); + double theta = sk1.getTheta(); + assertTrue(theta <= p2); + + double est = usk.getEstimate(); + assertEquals(k, est, k *.05); + double ub = usk.getUpperBound(1); + assertTrue(ub > est); + double lb = usk.getLowerBound(1); + assertTrue(lb < est); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkErrorBounds() { + int k = 512; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + + //Exact mode + for (int i = 0; i < k; i++ ) { usk.update(i); } + + double est = usk.getEstimate(); + double lb = usk.getLowerBound(2); + double ub = usk.getUpperBound(2); + assertEquals(est, ub, 0.0); + assertEquals(est, lb, 0.0); + + //Est mode + int u = 100*k; + for (int i = k; i < u; i++ ) { + usk.update(i); + usk.update(i); //test duplicate rejection + } + est = usk.getEstimate(); + lb = usk.getLowerBound(2); + ub = usk.getUpperBound(2); + assertTrue(est <= ub); + assertTrue(est >= lb); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + //Empty Tests + @Test + public void checkEmptyAndP() { + //virgin, p = 1.0 + int k = 1024; + float p = (float)1.0; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setP(p).setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + usk.update(1); + assertEquals(sk1.getRetainedEntries(true), 1); + assertFalse(usk.isEmpty()); + + //virgin, p = .001 + p = (float)0.001; + byte[] segArr2 = new byte[(int) wseg.byteSize()]; + MemorySegment seg2 = MemorySegment.ofArray(segArr2); + UpdateSketch usk2 = UpdateSketch.builder().setP(p).setNominalEntries(k).build(seg2); + sk1 = (DirectQuickSelectSketch)usk2; + + assertTrue(usk2.isEmpty()); + usk2.update(1); //will be rejected + assertEquals(sk1.getRetainedEntries(true), 0); + assertFalse(usk2.isEmpty()); + double est = usk2.getEstimate(); + //println("Est: "+est); + assertEquals(est, 0.0, 0.0); //because curCount = 0 + double ub = usk2.getUpperBound(2); //huge because theta is tiny! + //println("UB: "+ub); + assertTrue(ub > 0.0); + double lb = usk2.getLowerBound(2); + assertTrue(lb <= est); + //println("LB: "+lb); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkUpperAndLowerBounds() { + int k = 512; + int u = 2*k; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + + for (int i = 0; i < u; i++ ) { usk.update(i); } + + double est = usk.getEstimate(); + double ub = usk.getUpperBound(1); + double lb = usk.getLowerBound(1); + assertTrue(ub > est); + assertTrue(lb < est); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkRebuild() { + int k = 512; + int u = 4*k; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { usk.update(i); } + + assertFalse(usk.isEmpty()); + assertTrue(usk.getEstimate() > 0.0); + assertTrue(sk1.getRetainedEntries(false) > k); + + sk1.rebuild(); + assertEquals(sk1.getRetainedEntries(false), k); + assertEquals(sk1.getRetainedEntries(true), k); + sk1.rebuild(); + assertEquals(sk1.getRetainedEntries(false), k); + assertEquals(sk1.getRetainedEntries(true), k); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkResetAndStartingSubMultiple() { + int k = 512; + int u = 4*k; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { usk.update(i); } + + assertFalse(usk.isEmpty()); + assertTrue(sk1.getRetainedEntries(false) > k); + assertTrue(sk1.getThetaLong() < Long.MAX_VALUE); + + sk1.reset(); + assertTrue(usk.isEmpty()); + assertEquals(sk1.getRetainedEntries(false), 0); + assertEquals(usk.getEstimate(), 0.0, 0.0); + assertEquals(sk1.getThetaLong(), Long.MAX_VALUE); + + assertNotNull(sk1.getMemorySegment()); + assertFalse(sk1.isOrdered()); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkExactModeMemoryArr() { + int k = 4096; + int u = 4096; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { usk.update(i); } + + assertEquals(usk.getEstimate(), u, 0.0); + assertEquals(sk1.getRetainedEntries(false), u); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkEstModeMemoryArr() { + int k = 4096; + int u = 2*k; + + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { usk.update(i); } + + assertEquals(usk.getEstimate(), u, u*.05); + assertTrue(sk1.getRetainedEntries(false) > k); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkEstModeNativeMemory() { + int k = 4096; + int u = 2*k; + int segCapacity = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3); + + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(segCapacity, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { usk.update(i); } + double est = usk.getEstimate(); + println(""+est); + assertEquals(usk.getEstimate(), u, u*.05); + assertTrue(sk1.getRetainedEntries(false) > k); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void checkConstructReconstructFromMemory() { + int k = 4096; + int u = 2*k; + try (Arena arena = Arena.ofConfined()) { + MemorySegment wseg = makeNativeMemorySegment(k, arena); + + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { usk.update(i); } //force estimation + + double est1 = usk.getEstimate(); + int count1 = usk.getRetainedEntries(false); + assertEquals(est1, u, u*.05); + assertTrue(count1 >= k); + + byte[] serArr; + double est2; + int count2; + + serArr = usk.toByteArray(); + + MemorySegment seg2 = MemorySegment.ofArray(serArr); + + //reconstruct to Native/Direct + UpdateSketch usk2 = Sketches.wrapUpdateSketch(seg2); + + est2 = usk2.getEstimate(); + count2 = usk2.getRetainedEntries(false); + + assertEquals(count2, count1); + assertEquals(est2, est1, 0.0); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + + @Test(expectedExceptions = SketchesReadOnlyException.class) + public void updateAfterReadOnlyWrap() { + UpdateSketch usk1 = UpdateSketch.builder().build(); + UpdateSketch usk2 = (UpdateSketch) Sketch.wrap(MemorySegment.ofArray(usk1.toByteArray())); + usk2.update(0); + } + + public void updateAfterWritableWrap() { + UpdateSketch usk1 = UpdateSketch.builder().build(); + UpdateSketch usk2 = UpdateSketch.wrap(MemorySegment.ofArray(usk1.toByteArray())); + usk2.update(0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkNegativeHashes() { + int k = 512; + UpdateSketch qs = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); + qs.hashUpdate(-1L); + } + + @Test + public void checkConstructorSrcMemCorruptions() { + int k = 1024; //lgNomLongs = 10 + int u = k; //exact mode, lgArrLongs = 11 + + int bytes = Sketches.getMaxUpdateSketchBytes(k); + byte[] arr1 = new byte[bytes]; + MemorySegment seg1 = MemorySegment.ofArray(arr1); + ResizeFactor rf = ResizeFactor.X1; //0 + UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg1); + for (int i=0; i>> 1); //corrupt theta and + seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs + try { + usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + fail("Expected SketchesArgumentException"); + } catch (SketchesArgumentException e) { + //pass + } + seg1.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE); //fix theta and + seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 11); //fix lgArrLongs + byte badFlags = (byte) (BIG_ENDIAN_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK); + seg1.set(JAVA_BYTE, FLAGS_BYTE, badFlags); + try { + usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + fail("Expected SketchesArgumentException"); + } catch (SketchesArgumentException e) { + //pass + } + + byte[] arr2 = Arrays.copyOfRange(arr1, 0, bytes-1); //corrupt length + MemorySegment seg2 = MemorySegment.ofArray(arr2); + try { + usk2 = DirectQuickSelectSketch.writableWrap(seg2, ThetaUtil.DEFAULT_UPDATE_SEED); + fail("Expected SketchesArgumentException"); + } catch (SketchesArgumentException e) { + //pass + } + } + + @Test + public void checkCorruptRFWithInsufficientArray() { + int k = 1024; //lgNomLongs = 10 + + int bytes = Sketches.getMaxUpdateSketchBytes(k); + byte[] arr = new byte[bytes]; + MemorySegment seg = MemorySegment.ofArray(arr); + ResizeFactor rf = ResizeFactor.X8; // 3 + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg); + usk.update(0); + + insertLgResizeFactor(seg, 0); // corrupt RF: X1 + UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + assertEquals(dqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2 + } + + @Test + public void checkFamilyAndRF() { + int k = 16; + MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]); + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); + assertEquals(sketch.getFamily(), Family.QUICKSELECT); + assertEquals(sketch.getResizeFactor(), ResizeFactor.X8); + } + + //checks Alex's bug where lgArrLongs > lgNomLongs +1. + @Test + public void checkResizeInBigMem() { + int k = 1 << 14; + int u = 1 << 20; + MemorySegment seg = MemorySegment.ofArray(new byte[(8*k*16) +24]); + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); + for (int i=0; i 98663.0); + assertTrue(est < 101530.0); + } + + @Test + public void checkForDruidBug2() { //update union with just sketch memory reference + final int k = 16384; + final UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(); + for (int i = 0; i < 100000; i++) { + usk.update(Integer.toString(i)); + } + usk.rebuild(); //optional but created the symptom + final MemorySegment memIn = MemorySegment.ofArray(new byte[usk.getCompactBytes()]); + usk.compact(true, memIn); //side effect of loading the memIn + + //create empty target union in off-heap mem + final MemorySegment mem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(mem); + + union1.union(memIn); + + final CompactSketch csk = union1.getResult(); + + assertTrue(csk.getTheta() < 0.2); + assertEquals(csk.getRetainedEntries(true), 16384); + final double est = csk.getEstimate(); + assertTrue(est > 98663.0); + assertTrue(est < 101530.0); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //Disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/EmptyTest.java b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java new file mode 100644 index 000000000..c5492cf34 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; +import org.testng.annotations.Test; + + +/** + * Empty essentially means that the sketch has never seen data. + * + * @author Lee Rhodes + */ +public class EmptyTest { + + @Test + public void checkEmpty() { + final UpdateSketch sk1 = Sketches.updateSketchBuilder().build(); + final UpdateSketch sk2 = Sketches.updateSketchBuilder().build(); + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + + final int u = 100; + for (int i = 0; i < u; i++) { //disjoint + sk1.update(i); + sk2.update(i + u); + } + inter.intersect(sk1); + inter.intersect(sk2); + + final CompactSketch csk = inter.getResult(); + //The intersection of two disjoint, exact-mode sketches is empty, T == 1.0. + println(csk.toString()); + assertTrue(csk.isEmpty()); + + final AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); + final CompactSketch csk2 = aNotB.aNotB(csk, sk1); + //The AnotB of an empty, T == 1.0 sketch with another exact-mode sketch is empty, T == 1.0 + assertTrue(csk2.isEmpty()); + } + + @Test + public void checkNotEmpty() { + final UpdateSketch sk1 = Sketches.updateSketchBuilder().build(); + final UpdateSketch sk2 = Sketches.updateSketchBuilder().build(); + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + + final int u = 10000; //estimating + for (int i = 0; i < u; i++) { //disjoint + sk1.update(i); + sk2.update(i + u); + } + inter.intersect(sk1); + inter.intersect(sk2); + + final CompactSketch csk = inter.getResult(); + println(csk.toString()); + //The intersection of two disjoint, est-mode sketches is not-empty, T < 1.0. + assertFalse(csk.isEmpty()); + + AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); + final CompactSketch csk2 = aNotB.aNotB(csk, sk1); //empty, T < 1.0; with est-mode sketch + println(csk2.toString()); + //The AnotB of an empty, T < 1.0 sketch with another exact-mode sketch is not-empty. + assertFalse(csk2.isEmpty()); + + final UpdateSketch sk3 = Sketches.updateSketchBuilder().build(); + aNotB = Sketches.setOperationBuilder().buildANotB(); + final CompactSketch csk3 = aNotB.aNotB(sk3, sk1); //empty, T == 1.0; with est-mode sketch + println(csk3.toString()); + //the AnotB of an empty, T == 1.0 sketch with another est-mode sketch is empty, T < 1.0 + assertTrue(csk3.isEmpty()); + } + + @Test + public void checkPsampling() { + final UpdateSketch sk1 = Sketches.updateSketchBuilder().setP(.5F).build(); + assertTrue(sk1.isEmpty()); + //An empty P-sampling sketch where T < 1.0 and has never seen data is also empty + // and will have a full preamble of 24 bytes. But when compacted, theta returns to 1.0, so + // it will be stored as only 8 bytes. + assertEquals(sk1.compact().toByteArray().length, 8); + } + + //These 3 tests reproduce a failure mode where an "old" empty sketch of 8 bytes without + // its empty-flag bit set is read. + @Test + public void checkBackwardCompatibility1() { + final int k = 16; + final int bytes = Sketches.getMaxUnionBytes(k); //288 + final Union union = SetOperation.builder().buildUnion(MemorySegment.ofArray(new byte[bytes])); + final MemorySegment mem = badEmptySk(); + final Sketch wsk = Sketches.wrapSketch(mem); + union.union(wsk); //union has memory + } + + @Test + public void checkBackwardCompatibility2() { + final Union union = SetOperation.builder().setNominalEntries(16).buildUnion(); + final MemorySegment mem = badEmptySk(); + final Sketch wsk = Sketches.wrapSketch(mem); + union.union(wsk); //heap union + } + + @Test + public void checkBackwardCompatibility3() { + final MemorySegment mem = badEmptySk(); + Sketches.heapifySketch(mem); + } + + @Test + public void checkEmptyToCompact() { + final UpdateSketch sk1 = Sketches.updateSketchBuilder().build(); + final CompactSketch csk = sk1.compact(); + assertTrue(csk instanceof EmptyCompactSketch); + final CompactSketch csk2 = csk.compact(); + assertTrue(csk2 instanceof EmptyCompactSketch); + final CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[8])); + assertTrue(csk3 instanceof DirectCompactSketch); + assertEquals(csk2.getCurrentPreambleLongs(), 1); + } + + + //SerVer 2 had an empty sketch where preLongs = 1, but empty bit was not set. + private static MemorySegment badEmptySk() { + final long preLongs = 1; + final long serVer = 2; + final long family = 3; //compact + final long flags = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK; + final long seedHash = 0x93CC; + final long badEmptySk = seedHash << 48 | flags << 40 + | family << 16 | serVer << 8 | preLongs; + final MemorySegment wmem = MemorySegment.ofArray(new byte[8]); + wmem.set(JAVA_LONG_UNALIGNED, 0, badEmptySk); + return wmem; + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java b/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java new file mode 100644 index 000000000..faba72701 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class ExamplesTest { + + @Test + public void simpleCountingSketch() { + final int k = 4096; + final int u = 1000000; + + final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build(); + for (int i = 0; i < u; i++) { + sketch.update(i); + } + + println(sketch.toString()); + } + /* +### HeapQuickSelectSketch SUMMARY: + Nominal Entries (k) : 4096 + Estimate : 1002714.745231455 + Upper Bound, 95% conf : 1027777.3354974985 + Lower Bound, 95% conf : 978261.4472857157 + p : 1.0 + Theta (double) : 0.00654223948655085 + Theta (long) : 60341508738660257 + Theta (long, hex : 00d66048519437a1 + EstMode? : true + Empty? : false + Resize Factor : 8 + Array Size Entries : 8192 + Retained Entries : 6560 + Update Seed : 9001 + Seed Hash : ffff93cc +### END SKETCH SUMMARY + */ + + @Test + public void theta2dot0Examples() { + //Load source sketches + final UpdateSketchBuilder bldr = UpdateSketch.builder(); + final UpdateSketch skA = bldr.build(); + final UpdateSketch skB = bldr.build(); + for (int i = 1; i <= 1000; i++) { + skA.update(i); + skB.update(i + 250); + } + + //Union Stateless: + Union union = SetOperation.builder().buildUnion(); + CompactSketch csk = union.union(skA, skB); + assert csk.getEstimate() == 1250; + + //Union Stateful: + union = SetOperation.builder().buildUnion(); + union.union(skA); //first call + union.union(skB); //2nd through nth calls + //... + csk = union.getResult(); + assert csk.getEstimate() == 1250; + + //Intersection Stateless: + Intersection inter = SetOperation.builder().buildIntersection(); + csk = inter.intersect(skA, skB); + assert csk.getEstimate() == 750; + + //Intersection Stateful: + inter = SetOperation.builder().buildIntersection(); + inter.intersect(skA); //first call + inter.intersect(skB); //2nd through nth calls + //... + csk = inter.getResult(); + assert csk.getEstimate() == 750; + + //AnotB Stateless: + AnotB diff = SetOperation.builder().buildANotB(); + csk = diff.aNotB(skA, skB); + assert csk.getEstimate() == 250; + + //AnotB Stateful: + diff = SetOperation.builder().buildANotB(); + diff.setA(skA); //first call + diff.notB(skB); //2nd through nth calls + //... + csk = diff.getResult(true); + assert csk.getEstimate() == 250; + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //enable/disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java new file mode 100644 index 000000000..f04b01829 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class ForwardCompatibilityTest { + + @Test + public void checkSerVer1_Empty() { + CompactSketch csk = EmptyCompactSketch.getInstance(); + MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly(); + Sketch sketch = Sketch.heapify(srcSeg); + assertEquals(sketch.isEmpty(), true); + assertEquals(sketch.isEstimationMode(), false); + assertEquals(sketch.isDirect(), false); + assertEquals(sketch.hasMemorySegment(), false); + assertEquals(sketch.isCompact(), true); + assertEquals(sketch.isOrdered(), true); + assertTrue(sketch instanceof EmptyCompactSketch); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkSerVer1_badPrelongs() { + CompactSketch csk = EmptyCompactSketch.getInstance(); + + MemorySegment srcWseg = convertSerVer3toSerVer1(csk); + MemorySegment srcseg = srcWseg.asReadOnly(); + srcWseg.set(JAVA_BYTE, 0, (byte) 1); + Sketch.heapify(srcWseg); //throws because bad preLongs + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkSerVer1_tooSmall() { + UpdateSketch usk = Sketches.updateSketchBuilder().build(); + usk.update(1); + usk.update(2); + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly(); + MemorySegment srcSeg2 = srcSeg.asSlice(0, srcSeg.byteSize() - 8); + Sketch.heapify(srcSeg2); //throws because too small + } + + + @Test + public void checkSerVer1_1Value() { + UpdateSketch usk = Sketches.updateSketchBuilder().build(); + usk.update(1); + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly(); + Sketch sketch = Sketch.heapify(srcSeg); + assertEquals(sketch.isEmpty(), false); + assertEquals(sketch.isEstimationMode(), false); + assertEquals(sketch.isDirect(), false); + assertEquals(sketch.hasMemorySegment(), false); + assertEquals(sketch.isCompact(), true); + assertEquals(sketch.isOrdered(), true); + assertEquals(sketch.getEstimate(), 1.0); + assertTrue(sketch instanceof SingleItemSketch); + } + + @Test + public void checkSerVer2_1PreLong_Empty() { + CompactSketch csk = EmptyCompactSketch.getInstance(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + Sketch sketch = Sketch.heapify(srcSeg); + assertEquals(sketch.isEmpty(), true); + assertEquals(sketch.isEstimationMode(), false); + assertEquals(sketch.isDirect(), false); + assertEquals(sketch.hasMemorySegment(), false); + assertEquals(sketch.isCompact(), true); + assertEquals(sketch.isOrdered(), true); + assertTrue(sketch instanceof EmptyCompactSketch); + } + + @Test + public void checkSerVer2_2PreLongs_Empty() { + UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); + for (int i = 0; i < 2; i++) { usk.update(i); } //exact mode + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + + MemorySegment srcWseg = MemorySegment.ofArray(new byte[16]); + MemorySegment.copy(srcSeg, 0, srcWseg, 0, 16); + PreambleUtil.setEmpty(srcWseg); //Force + assertTrue(PreambleUtil.isEmptyFlag(srcWseg)); + srcWseg.set(JAVA_INT_UNALIGNED, 8, 0); //corrupt curCount = 0 + + Sketch sketch = Sketch.heapify(srcWseg); + assertTrue(sketch instanceof EmptyCompactSketch); + } + + @Test + public void checkSerVer2_3PreLongs_Empty() { + UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); + for (int i = 0; i < 32; i++) { usk.update(i); } //est mode + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + + MemorySegment srcWseg = MemorySegment.ofArray(new byte[24]); + MemorySegment.copy(srcSeg, 0, srcWseg, 0, 24); + PreambleUtil.setEmpty(srcWseg); //Force + assertTrue(PreambleUtil.isEmptyFlag(srcWseg)); + srcWseg.set(JAVA_INT_UNALIGNED, 8, 0); //corrupt curCount = 0 + srcWseg.set(JAVA_LONG_UNALIGNED, 16, Long.MAX_VALUE); //corrupt to make it look empty + + Sketch sketch = Sketch.heapify(srcWseg); //now serVer=3, EmptyCompactSketch + assertTrue(sketch instanceof EmptyCompactSketch); + } + + @Test + public void checkSerVer2_2PreLongs_1Value() { + UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); + usk.update(1); //exact mode + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + + Sketch sketch = Sketch.heapify(srcSeg); + assertEquals(sketch.isEmpty(), false); + assertEquals(sketch.isEstimationMode(), false); + assertEquals(sketch.isDirect(), false); + assertEquals(sketch.hasMemorySegment(), false); + assertEquals(sketch.isCompact(), true); + assertEquals(sketch.isOrdered(), true); + assertTrue(sketch instanceof SingleItemSketch); + } + + @Test + public void checkSerVer2_3PreLongs_1Value() { + UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); + for (int i = 0; i < 32; i++) { usk.update(i); } //est mode + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + + MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); + MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); + srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1 + srcWseg.set(JAVA_LONG_UNALIGNED, 16, Long.MAX_VALUE); //corrupt theta to make it look exact + long[] cache = csk.getCache(); + srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value + + Sketch sketch = Sketch.heapify(srcWseg); + assertEquals(sketch.isEmpty(), false); + assertEquals(sketch.isEstimationMode(), false); + assertEquals(sketch.isDirect(), false); + assertEquals(sketch.hasMemorySegment(), false); + assertEquals(sketch.isCompact(), true); + assertEquals(sketch.isOrdered(), true); + assertTrue(sketch instanceof SingleItemSketch); + } + + @Test + public void checkSerVer2_3PreLongs_1Value_ThLessthan1() { + UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); + for (int i = 0; i < 32; i++) { usk.update(i); } //est mode + CompactSketch csk = usk.compact(true, null); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + + MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); + MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); + srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1 + //srcMemW.putLong(16, Long.MAX_VALUE); + long[] cache = csk.getCache(); + srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value + + Sketch sketch = Sketch.heapify(srcWseg); + assertEquals(sketch.isEmpty(), false); + assertEquals(sketch.isEstimationMode(), true); + assertEquals(sketch.isDirect(), false); + assertEquals(sketch.hasMemorySegment(), false); + assertEquals(sketch.isCompact(), true); + assertEquals(sketch.isOrdered(), true); + assertTrue(sketch instanceof HeapCompactSketch); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java new file mode 100644 index 000000000..47420ae0e --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java @@ -0,0 +1,696 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.Family.ALPHA; +import static org.apache.datasketches.common.ResizeFactor.X1; +import static org.apache.datasketches.common.ResizeFactor.X2; +import static org.apache.datasketches.common.ResizeFactor.X8; +import static org.apache.datasketches.common.Util.clear; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; +import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class HeapAlphaSketchTest { + private Family fam_ = ALPHA; + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadSerVer() { + int k = 512; + int u = k; + long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) + .setNominalEntries(k).build(); + HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { + sk1.update(i); + } + + assertFalse(usk.isEmpty()); + assertEquals(usk.getEstimate(), u, 0.0); + assertEquals(sk1.getRetainedEntries(false), u); + + byte[] byteArray = usk.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + + Sketch.heapify(seg, seed); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkConstructorKtooSmall() { + int k = 256; + UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkAlphaIncompatibleWithMem() { + MemorySegment seg = MemorySegment.ofArray(new byte[(512*16)+24]); + UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(512).build(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkIllegalSketchID_UpdateSketch() { + int k = 512; + int u = k; + long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) + .setNominalEntries(k).build(); + HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { + usk.update(i); + } + + assertFalse(usk.isEmpty()); + assertEquals(usk.getEstimate(), u, 0.0); + assertEquals(sk1.getRetainedEntries(false), u); + byte[] byteArray = usk.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + + //try to heapify the corrupted mem + Sketch.heapify(seg, seed); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifySeedConflict() { + int k = 512; + long seed1 = 1021; + long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1) + .setNominalEntries(k).build(); + byte[] byteArray = usk.toByteArray(); + MemorySegment srcSeg = MemorySegment.ofArray(byteArray).asReadOnly(); + Sketch.heapify(srcSeg, seed2); + } + + @Test + public void checkHeapifyByteArrayExact() { + int k = 512; + int u = k; + long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) + .setNominalEntries(k).build(); + + for (int i=0; i k); + } + + @Test + public void checkSamplingMode() { + int k = 4096; + int u = k; + float p = (float)0.5; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setP(p) + .setNominalEntries(k).build(); + HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks + + for (int i = 0; i < u; i++ ) { + usk.update(i); + } + + double p2 = sk1.getP(); + double theta = sk1.getTheta(); + assertTrue(theta <= p2); + + double est = usk.getEstimate(); + double kdbl = k; + assertEquals(kdbl, est, kdbl*.05); + double ub = usk.getUpperBound(1); + assertTrue(ub > est); + double lb = usk.getLowerBound(1); + assertTrue(lb < est); + } + + @Test + public void checkErrorBounds() { + int k = 512; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X1) + .setNominalEntries(k).build(); + + //Exact mode + for (int i = 0; i < k; i++ ) { + usk.update(i); + } + + double est = usk.getEstimate(); + double lb = usk.getLowerBound(2); + double ub = usk.getUpperBound(2); + assertEquals(est, ub, 0.0); + assertEquals(est, lb, 0.0); + + //Est mode + int u = 10*k; + for (int i = k; i < u; i++ ) { + usk.update(i); + usk.update(i); //test duplicate rejection + } + est = usk.getEstimate(); + lb = usk.getLowerBound(2); + ub = usk.getUpperBound(2); + assertTrue(est <= ub); + assertTrue(est >= lb); + } + + //Empty Tests + @Test + public void checkEmptyAndP() { + //virgin, p = 1.0 + int k = 1024; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); + HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + usk.update(1); + assertEquals(sk1.getRetainedEntries(true), 1); + assertFalse(usk.isEmpty()); + + //virgin, p = .001 + UpdateSketch usk2 = UpdateSketch.builder().setFamily(fam_).setP((float)0.001) + .setNominalEntries(k).build(); + sk1 = (HeapAlphaSketch)usk2; + assertTrue(usk2.isEmpty()); + usk2.update(1); //will be rejected + assertEquals(sk1.getRetainedEntries(true), 0); + assertFalse(usk2.isEmpty()); + double est = usk2.getEstimate(); + //println("Est: "+est); + assertEquals(est, 0.0, 0.0); //because curCount = 0 + double ub = usk2.getUpperBound(2); //huge because theta is tiny! + //println("UB: "+ub); + assertTrue(ub > 0.0); + double lb = usk2.getLowerBound(2); + assertTrue(lb <= est); + //println("LB: "+lb); + } + + @Test + public void checkUpperAndLowerBounds() { + int k = 512; + int u = 2*k; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X2) + .setNominalEntries(k).build(); + + for (int i = 0; i < u; i++ ) { + usk.update(i); + } + + double est = usk.getEstimate(); + double ub = usk.getUpperBound(1); + double lb = usk.getLowerBound(1); + assertTrue(ub > est); + assertTrue(lb < est); + } + + @Test + public void checkRebuild() { + int k = 512; + int u = 4*k; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); + HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { + usk.update(i); + } + + assertFalse(usk.isEmpty()); + assertTrue(usk.getEstimate() > 0.0); + assertNotEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true)); + + sk1.rebuild(); + assertEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true)); + sk1.rebuild(); + assertEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true)); + } + + @Test + public void checkResetAndStartingSubMultiple() { + int k = 1024; + int u = 4*k; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X8) + .setNominalEntries(k).build(); + HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i=0; i k); + println("Est: "+est); + } + + @Test + public void checkHeapifyAndWrap() { + final int lgK = 9; + final int k = 1< k); // in general it might be exactly k, but in this case must be greater + } + + @Test + public void checkSamplingMode() { + int k = 4096; + int u = k; + float p = (float)0.5; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setP(p).setNominalEntries(k).build(); + HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks + + for (int i = 0; i < u; i++ ) { + usk.update(i); + } + + double p2 = sk1.getP(); + double theta = sk1.getTheta(); + assertTrue(theta <= p2); + + double est = usk.getEstimate(); + double kdbl = k; + assertEquals(kdbl, est, kdbl*.05); + double ub = usk.getUpperBound(1); + assertTrue(ub > est); + double lb = usk.getLowerBound(1); + assertTrue(lb < est); + } + + @Test + public void checkErrorBounds() { + int k = 512; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X1).setNominalEntries(k).build(); + + //Exact mode + for (int i = 0; i < k; i++ ) { + usk.update(i); + } + + double est = usk.getEstimate(); + double lb = usk.getLowerBound(2); + double ub = usk.getUpperBound(2); + assertEquals(est, ub, 0.0); + assertEquals(est, lb, 0.0); + + //Est mode + int u = 10*k; + for (int i = k; i < u; i++ ) { + usk.update(i); + usk.update(i); //test duplicate rejection + } + est = usk.getEstimate(); + lb = usk.getLowerBound(2); + ub = usk.getUpperBound(2); + assertTrue(est <= ub); + assertTrue(est >= lb); + } + + //Empty Tests + @Test + public void checkEmptyAndP() { + //virgin, p = 1.0 + int k = 1024; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); + HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + usk.update(1); + assertEquals(sk1.getRetainedEntries(true), 1); + assertFalse(usk.isEmpty()); + + //virgin, p = .001 + UpdateSketch usk2 = UpdateSketch.builder().setFamily(fam_).setP((float)0.001).setNominalEntries(k).build(); + sk1 = (HeapQuickSelectSketch)usk2; + assertTrue(usk2.isEmpty()); + usk2.update(1); //will be rejected + assertEquals(sk1.getRetainedEntries(true), 0); + assertFalse(usk2.isEmpty()); + double est = usk2.getEstimate(); + //println("Est: "+est); + assertEquals(est, 0.0, 0.0); //because curCount = 0 + double ub = usk2.getUpperBound(2); //huge because theta is tiny! + //println("UB: "+ub); + assertTrue(ub > 0.0); + double lb = usk2.getLowerBound(2); + assertTrue(lb <= est); + //println("LB: "+lb); + } + + @Test + public void checkUpperAndLowerBounds() { + int k = 512; + int u = 2*k; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X2).setNominalEntries(k).build(); + + for (int i = 0; i < u; i++ ) { + usk.update(i); + } + + double est = usk.getEstimate(); + double ub = usk.getUpperBound(1); + double lb = usk.getLowerBound(1); + assertTrue(ub > est); + assertTrue(lb < est); + } + + @Test + public void checkRebuild() { + int k = 16; + int u = 4*k; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); + HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i = 0; i< u; i++) { + usk.update(i); + } + + assertFalse(usk.isEmpty()); + assertTrue(usk.getEstimate() > 0.0); + assertTrue(sk1.getRetainedEntries(false) > k); + + sk1.rebuild(); + assertEquals(sk1.getRetainedEntries(false), k); + assertEquals(sk1.getRetainedEntries(true), k); + sk1.rebuild(); + assertEquals(sk1.getRetainedEntries(false), k); + assertEquals(sk1.getRetainedEntries(true), k); + } + + @Test + public void checkResetAndStartingSubMultiple() { + int k = 1024; + int u = 4*k; + + UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X8).setNominalEntries(k).build(); + HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks + + assertTrue(usk.isEmpty()); + + for (int i=0; i>> 1; + SingleItemSketch sis = SingleItemSketch.create(i); + long halfMax = Long.MAX_VALUE >> 1; + int count = sis.getCountLessThanThetaLong(halfMax); + assertEquals(count, (h < halfMax) ? 1 : 0); + } + } + + @Test + public void checkSerDe() { + SingleItemSketch sis = SingleItemSketch.create(1); + byte[] byteArr = sis.toByteArray(); + MemorySegment mem = MemorySegment.ofArray(byteArr); + final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + SingleItemSketch sis2 = SingleItemSketch.heapify(mem, defaultSeedHash); + assertEquals(sis2.getEstimate(), 1.0); + + SingleItemSketch sis3 = SingleItemSketch.heapify(mem, defaultSeedHash); + assertEquals(sis3.getEstimate(), 1.0); + + Union union = Sketches.setOperationBuilder().buildUnion(); + union.union(sis); + union.union(sis2); + union.union(sis3); + CompactSketch csk = union.getResult(); + assertTrue(csk instanceof SingleItemSketch); + assertEquals(union.getResult().getEstimate(), 1.0); + } + + @Test + public void checkRestricted() { + SingleItemSketch sis = SingleItemSketch.create(1); + assertNull(sis.getMemorySegment()); + assertEquals(sis.getCompactPreambleLongs(), 1); + } + + @Test + public void unionWrapped() { + Sketch sketch = SingleItemSketch.create(1); + Union union = Sketches.setOperationBuilder().buildUnion(); + MemorySegment mem = MemorySegment.ofArray(sketch.toByteArray()); + union.union(mem); + assertEquals(union.getResult().getEstimate(), 1, 0); + } + + @Test + public void buildAndCompact() { + UpdateSketch sk1; + CompactSketch csk; + int bytes; + //On-heap + sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk1.update(1); + csk = sk1.compact(true, null); + assertTrue(csk instanceof SingleItemSketch); + csk = sk1.compact(false, null); + assertTrue(csk instanceof SingleItemSketch); + + //Off-heap + bytes = Sketches.getMaxUpdateSketchBytes(32); + MemorySegment wmem = MemorySegment.ofArray(new byte[bytes]); + sk1= Sketches.updateSketchBuilder().setNominalEntries(32).build(wmem); + sk1.update(1); + csk = sk1.compact(true, null); + assertTrue(csk instanceof SingleItemSketch); + csk = sk1.compact(false, null); + assertTrue(csk instanceof SingleItemSketch); + + bytes = Sketches.getMaxCompactSketchBytes(1); + wmem = MemorySegment.ofArray(new byte[bytes]); + csk = sk1.compact(true, wmem); + assertTrue(csk.isOrdered()); + csk = sk1.compact(false, wmem); + assertTrue(csk.isOrdered()); + } + + @Test + public void intersection() { + UpdateSketch sk1, sk2; + CompactSketch csk; + int bytes; + //Intersection on-heap + sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk1.update(1); + sk1.update(2); + sk2.update(1); + Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + inter.intersect(sk1); + inter.intersect(sk2); + csk = inter.getResult(true, null); + assertTrue(csk instanceof SingleItemSketch); + + //Intersection off-heap + bytes = Sketches.getMaxIntersectionBytes(32); + MemorySegment wmem = MemorySegment.ofArray(new byte[bytes]); + inter = Sketches.setOperationBuilder().buildIntersection(wmem); + inter.intersect(sk1); + inter.intersect(sk2); + csk = inter.getResult(true, null); + assertTrue(csk instanceof SingleItemSketch); + csk = inter.getResult(false, null); + assertTrue(csk instanceof SingleItemSketch); + } + + @Test + public void union() { + UpdateSketch sk1, sk2; + CompactSketch csk; + int bytes; + //Union on-heap + sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk1.update(1); + sk2.update(1); + Union union = Sketches.setOperationBuilder().buildUnion(); + union.union(sk1); + union.union(sk2); + csk = union.getResult(true, null); + assertTrue(csk instanceof SingleItemSketch); + + //Union off-heap + bytes = Sketches.getMaxUnionBytes(32); + MemorySegment wmem = MemorySegment.ofArray(new byte[bytes]); + union = Sketches.setOperationBuilder().buildUnion(wmem); + union.union(sk1); + union.union(sk2); + csk = union.getResult(true, null); + assertTrue(csk instanceof SingleItemSketch); + csk = union.getResult(false, null); + assertTrue(csk instanceof SingleItemSketch); + } + + @Test + public void aNotB() { + UpdateSketch sk1, sk2; + CompactSketch csk; + //AnotB on-heap + sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); + sk1.update(1); + sk2.update(2); + AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); + aNotB.setA(sk1); + aNotB.notB(sk2); + csk = aNotB.getResult(true, null, true); + assertTrue(csk instanceof SingleItemSketch); + //not AnotB off-heap form + } + + @Test + public void checkHeapifyInstance() { + UpdateSketch sk1 = new UpdateSketchBuilder().build(); + sk1.update(1); + UpdateSketch sk2 = new UpdateSketchBuilder().build(); + sk2.update(1); + Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + inter.intersect(sk1); + inter.intersect(sk2); + MemorySegment wmem = MemorySegment.ofArray(new byte[16]); + CompactSketch csk = inter.getResult(false, wmem); + assertTrue(csk.isOrdered()); + Sketch csk2 = Sketches.heapifySketch(wmem); + assertTrue(csk2 instanceof SingleItemSketch); + println(csk2.toString(true, true, 1, true)); + } + + @Test + public void checkSingleItemBadFlags() { + final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch sk1 = new UpdateSketchBuilder().build(); + sk1.update(1); + MemorySegment wmem = MemorySegment.ofArray(new byte[16]); + sk1.compact(true, wmem); + wmem.set(JAVA_BYTE, 5, (byte) 0); //corrupt flags to zero + try { + SingleItemSketch.heapify(wmem, defaultSeedHash); //fails due to corrupted flags bytes + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void checkDirectUnionSingleItem2() { + Sketch sk = Sketch.wrap(siSkWoutSiFlag24Bytes()); + assertEquals(sk.getEstimate(), 1.0, 0.0); + //println(sk.toString()); + sk = Sketch.wrap(siSkWithSiFlag24Bytes()); + assertEquals(sk.getEstimate(), 1.0, 0.0); + //println(sk.toString()); + } + + @Test + public void checkSingleItemCompact() { + UpdateSketch sk1 = new UpdateSketchBuilder().build(); + sk1.update(1); + CompactSketch csk = sk1.compact(); + assertTrue(csk instanceof SingleItemSketch); + CompactSketch csk2 = csk.compact(); + assertEquals(csk, csk2); + CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[16])); + assertTrue(csk3 instanceof DirectCompactSketch); + assertEquals(csk2.getCurrentPreambleLongs(), 1); + assertEquals(csk3.getCurrentPreambleLongs(), 1); + } + + + static final long SiSkPre0WithSiFlag = 0x93cc3a0000030301L; + static final long SiSkPre0WoutSiFlag = 0x93cc1a0000030301L; + static final long Hash = 0x05a186bdcb7df915L; + + static MemorySegment siSkWithSiFlag24Bytes() { + int cap = 24; //8 extra bytes + MemorySegment wmem = MemorySegment.ofArray(new byte[cap]); + wmem.set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WithSiFlag); + wmem.set(JAVA_LONG_UNALIGNED, 8, Hash); + return wmem; + } + + static MemorySegment siSkWoutSiFlag24Bytes() { + int cap = 24; //8 extra bytes + MemorySegment wmem = MemorySegment.ofArray(new byte[cap]); + wmem.set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WoutSiFlag); + wmem.set(JAVA_LONG_UNALIGNED, 8, Hash); + return wmem; + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/SketchTest.java b/src/test/java/org/apache/datasketches/theta2/SketchTest.java new file mode 100644 index 000000000..686ad7ff9 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/SketchTest.java @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.Family.ALPHA; +import static org.apache.datasketches.common.Family.COMPACT; +import static org.apache.datasketches.common.Family.QUICKSELECT; +import static org.apache.datasketches.common.ResizeFactor.X1; +import static org.apache.datasketches.common.ResizeFactor.X2; +import static org.apache.datasketches.common.ResizeFactor.X4; +import static org.apache.datasketches.common.ResizeFactor.X8; +import static org.apache.datasketches.common.Util.isSameResource; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2; +import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; +import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; +import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; +import static org.apache.datasketches.theta2.Sketch.getMaxCompactSketchBytes; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +//import org.apache.datasketches.theta2.Skectches; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class SketchTest { + + @Test + public void checkGetMaxBytesWithEntries() { + assertEquals(getMaxCompactSketchBytes(10), (10*8) + (Family.COMPACT.getMaxPreLongs() << 3) ); + } + + @Test + public void checkGetCurrentBytes() { + int k = 64; + int lowQSPreLongs = Family.QUICKSELECT.getMinPreLongs(); + int lowCompPreLongs = Family.COMPACT.getMinPreLongs(); + UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build(); // QS Sketch + assertEquals(sketch.getCurrentPreambleLongs(), lowQSPreLongs); + assertEquals(sketch.getCompactPreambleLongs(), 1); //compact form + assertEquals(sketch.getCurrentDataLongs(), k*2); + assertEquals(sketch.getCurrentBytes(), (k*2*8) + (lowQSPreLongs << 3)); + assertEquals(sketch.getCompactBytes(), lowCompPreLongs << 3); + + CompactSketch compSk = sketch.compact(false, null); + assertEquals(compSk.getCompactBytes(), 8); + assertEquals(compSk.getCurrentBytes(), 8); + assertEquals(compSk.getCurrentDataLongs(), 0); + + int compPreLongs = computeCompactPreLongs(sketch.isEmpty(), sketch.getRetainedEntries(true), + sketch.getThetaLong()); + assertEquals(compPreLongs, 1); + + for (int i=0; i 1) { assertEquals(maxCompBytes, 24 + (i * 8)); } //assumes maybe estimation mode + } + } + + @Test + public void checkBuilder() { + int k = 2048; + int lgK = Integer.numberOfTrailingZeros(k); + long seed = 1021; + float p = (float)0.5; + ResizeFactor rf = X4; + Family fam = Family.ALPHA; + + UpdateSketch sk1 = UpdateSketch.builder().setSeed(seed) + .setP(p).setResizeFactor(rf).setFamily(fam).setNominalEntries(k).build(); + String nameS1 = sk1.getClass().getSimpleName(); + assertEquals(nameS1, "HeapAlphaSketch"); + assertEquals(sk1.getLgNomLongs(), lgK); + assertEquals(sk1.getSeed(), seed); + assertEquals(sk1.getP(), p); + + //check reset of defaults + + sk1 = UpdateSketch.builder().build(); + nameS1 = sk1.getClass().getSimpleName(); + assertEquals(nameS1, "HeapQuickSelectSketch"); + assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES)); + assertEquals(sk1.getSeed(), ThetaUtil.DEFAULT_UPDATE_SEED); + assertEquals(sk1.getP(), (float)1.0); + assertEquals(sk1.getResizeFactor(), ResizeFactor.X8); + } + + @Test + public void checkBuilderNonPowerOf2() { + int k = 1000; + UpdateSketch sk = UpdateSketch.builder().setNominalEntries(k).build(); + assertEquals(sk.getLgNomLongs(), 10); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBuilderIllegalP() { + float p = (float)1.5; + UpdateSketch.builder().setP(p).build(); + } + + @Test + public void checkBuilderResizeFactor() { + ResizeFactor rf; + rf = X1; + assertEquals(rf.getValue(), 1); + assertEquals(rf.lg(), 0); + assertEquals(ResizeFactor.getRF(0), X1); + rf = X2; + assertEquals(rf.getValue(), 2); + assertEquals(rf.lg(), 1); + assertEquals(ResizeFactor.getRF(1), X2); + rf = X4; + assertEquals(rf.getValue(), 4); + assertEquals(rf.lg(), 2); + assertEquals(ResizeFactor.getRF(2), X4); + rf = X8; + assertEquals(rf.getValue(), 8); + assertEquals(rf.lg(), 3); + assertEquals(ResizeFactor.getRF(3), X8); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkWrapBadFamily() { + UpdateSketch sketch = UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(1024).build(); + byte[] byteArr = sketch.toByteArray(); + MemorySegment srcSeg = MemorySegment.ofArray(byteArr); + Sketch.wrap(srcSeg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadFamily() { + UpdateSketch.builder().setFamily(Family.INTERSECTION).setNominalEntries(1024).build(); + } + + @SuppressWarnings("static-access") + @Test + public void checkSerVer() { + UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(1024).build(); + byte[] sketchArray = sketch.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(sketchArray); + int serVer = Sketch.getSerializationVersion(seg); + assertEquals(serVer, 3); + MemorySegment wseg = MemorySegment.ofArray(sketchArray); + UpdateSketch sk2 = UpdateSketch.wrap(wseg); + serVer = sk2.getSerializationVersion(wseg); + assertEquals(serVer, 3); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyAlphaCompactExcep() { + int k = 512; + Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); + byte[] byteArray = sketch1.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + //corrupt: + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.heapify(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyQSCompactExcep() { + int k = 512; + Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); + byte[] byteArray = sketch1.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + //corrupt: + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.heapify(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyNotCompactExcep() { + int k = 512; + UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); + int bytes = Sketch.getMaxCompactSketchBytes(0); + byte[] byteArray = new byte[bytes]; + MemorySegment seg = MemorySegment.ofArray(byteArray); + sketch1.compact(false, seg); + //corrupt: + Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.heapify(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyFamilyExcep() { + int k = 512; + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); + byte[] byteArray = union.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + //Improper use + Sketch.heapify(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkWrapAlphaCompactExcep() { + int k = 512; + Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); + byte[] byteArray = sketch1.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + //corrupt: + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.wrap(seg); + + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkWrapQSCompactExcep() { + int k = 512; + Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); + byte[] byteArray = sketch1.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + //corrupt: + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.wrap(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkWrapNotCompactExcep() { + int k = 512; + UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); + int bytes = Sketch.getMaxCompactSketchBytes(0); + byte[] byteArray = new byte[bytes]; + MemorySegment seg = MemorySegment.ofArray(byteArray); + sketch1.compact(false, seg); + //corrupt: + Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.wrap(seg); + } + + @Test + public void checkValidSketchID() { + assertFalse(Sketch.isValidSketchID(0)); + assertTrue(Sketch.isValidSketchID(ALPHA.getID())); + assertTrue(Sketch.isValidSketchID(QUICKSELECT.getID())); + assertTrue(Sketch.isValidSketchID(COMPACT.getID())); + } + + @Test + public void checkWrapToHeapifyConversion1() { + int k = 512; + UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build(); + for (int i = 0; i < k; i++) { + sketch1.update(i); + } + double uest1 = sketch1.getEstimate(); + + CompactSketch csk = sketch1.compact(); + assertEquals(csk.getEstimate(), uest1); + + MemorySegment v1seg = convertSerVer3toSerVer1(csk); + Sketch csk2 = Sketch.wrap(v1seg); //fails + assertFalse(csk2.isDirect()); + assertFalse(csk2.hasMemorySegment()); + assertEquals(uest1, csk2.getEstimate(), 0.0); + + MemorySegment v2seg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + csk2 = Sketch.wrap(v2seg); + assertFalse(csk2.isDirect()); + assertFalse(csk2.hasMemorySegment()); + assertEquals(uest1, csk2.getEstimate(), 0.0); + } + + @Test + public void checkIsSameResource() { + int k = 16; + MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]); //280 + MemorySegment cseg = MemorySegment.ofArray(new byte[32]); + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); + sketch.update(1); + sketch.update(2); + assertTrue(sketch.isSameResource(seg)); + DirectCompactSketch dcos = (DirectCompactSketch) sketch.compact(true, cseg); + assertTrue(isSameResource(dcos.getMemorySegment(), cseg)); + assertTrue(dcos.isOrdered()); + //never create 2 sketches with the same memory, so don't do as I do :) + DirectCompactSketch dcs = (DirectCompactSketch) sketch.compact(false, cseg); + assertTrue(isSameResource(dcs.getMemorySegment(), cseg)); + assertFalse(dcs.isOrdered()); + + Sketch sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + assertFalse(isSameResource(sk.getMemorySegment(),seg)); + } + + @Test + public void checkCountLessThanTheta() { + int k = 512; + UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build(); + for (int i = 0; i < (2*k); i++) { sketch1.update(i); } + + double theta = sketch1.rebuild().getTheta(); + final long thetaLong = (long) (LONG_MAX_VALUE_AS_DOUBLE * theta); + int count = sketch1.getCountLessThanThetaLong(thetaLong); + assertEquals(count, k); + } + + private static MemorySegment createCompactSketchMemory(int k, int u) { + UpdateSketch usk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + for (int i = 0; i < u; i++) { usk.update(i); } + int bytes = Sketch.getMaxCompactSketchBytes(usk.getRetainedEntries(true)); + MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + usk.compact(true, wseg); + return wseg; + } + + @Test + public void checkCompactFlagsOnWrap() { + MemorySegment wseg = createCompactSketchMemory(16, 32); + Sketch sk = Sketch.wrap(wseg); + assertTrue(sk instanceof CompactSketch); + int flags = PreambleUtil.extractFlags(wseg); + + int flagsNoCompact = flags & ~COMPACT_FLAG_MASK; + PreambleUtil.insertFlags(wseg, flagsNoCompact); + try { + sk = Sketch.wrap(wseg); + fail(); + } catch (SketchesArgumentException e) { } + + int flagsNoReadOnly = flags & ~READ_ONLY_FLAG_MASK; + PreambleUtil.insertFlags(wseg, flagsNoReadOnly); + try { + sk = Sketch.wrap(wseg); + fail(); + } catch (SketchesArgumentException e) { } + PreambleUtil.insertFlags(wseg, flags); //repair to original + PreambleUtil.insertSerVer(wseg, 5); + try { + sk = Sketch.wrap(wseg); + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void checkCompactSizeAndFlagsOnHeapify() { + MemorySegment wseg = createCompactSketchMemory(16, 32); + Sketch sk = Sketch.heapify(wseg); + assertTrue(sk instanceof CompactSketch); + int flags = PreambleUtil.extractFlags(wseg); + + int flagsNoCompact = flags & ~READ_ONLY_FLAG_MASK; + PreambleUtil.insertFlags(wseg, flagsNoCompact); + try { + sk = Sketch.heapify(wseg); + fail(); + } catch (SketchesArgumentException e) { } + + wseg = MemorySegment.ofArray(new byte[7]); + PreambleUtil.insertSerVer(wseg, 3); + //PreambleUtil.insertFamilyID(wseg, 3); + try { + sk = Sketch.heapify(wseg); + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void check2Methods() { + int k = 16; + Sketch sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + int bytes1 = sk.getCompactBytes(); + int bytes2 = sk.getCurrentBytes(); + assertEquals(bytes1, 8); + assertEquals(bytes2, 280); //32*8 + 24 + int retEnt = sk.getRetainedEntries(); + assertEquals(retEnt, 0); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java new file mode 100644 index 000000000..09daaad11 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.Util.isSameResource; +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.Arena; +import java.nio.ByteOrder; + +import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +public class UnionImplTest { + + @Test + public void checkGetCurrentAndMaxBytes() { + final int lgK = 10; + final Union union = Sketches.setOperationBuilder().setLogNominalEntries(lgK).buildUnion(); + assertEquals(union.getCurrentBytes(), 288); + assertEquals(union.getMaxUnionBytes(), 16416); + } + + @Test + public void checkUpdateWithSketch() { + final int k = 16; + final MemorySegment mem = MemorySegment.ofArray(new byte[k*8 + 24]); + final MemorySegment mem2 = MemorySegment.ofArray(new byte[k*8 + 24]); + final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + for (int i=0; i 0) && !(((lgT - lgA) % lgR) == 0); + boolean rf0 = (lgR == 0) && (lgA != lgT); + assertTrue((lgRbad == rf0) || (lgRbad == rf123)); + } + } + } + } + + + @SuppressWarnings("unused") + @Test + public void checkCompactOpsMemoryToCompact() { + MemorySegment skwseg, cskwseg1, cskwseg2, cskwseg3; + CompactSketch csk1, csk2, csk3; + int lgK = 6; + UpdateSketch sk = Sketches.updateSketchBuilder().setLogNominalEntries(lgK).build(); + int n = 1 << (lgK + 1); + for (int i = 2; i < n; i++) { sk.update(i); } + int cbytes = sk.getCompactBytes(); + byte[] byteArr = sk.toByteArray(); + skwseg = MemorySegment.ofArray(byteArr); + cskwseg1 = MemorySegment.ofArray(new byte[cbytes]); + cskwseg2 = MemorySegment.ofArray(new byte[cbytes]); + cskwseg3 = MemorySegment.ofArray(new byte[cbytes]); + csk1 = sk.compact(true, cskwseg1); + csk2 = CompactOperations.memoryToCompact(skwseg, true, cskwseg2); + csk3 = CompactOperations.memoryToCompact(cskwseg1, true, cskwseg3); + assertTrue(equalContents(cskwseg1,cskwseg2)); + assertTrue(equalContents(cskwseg1, cskwseg3)); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } +} From a3aa1419db766de9c56c154c76355687af59678d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 10 Jun 2025 11:46:16 -0700 Subject: [PATCH 11/25] partial progress 2 --- .../theta2/PairwiseSetOperationsTest.java | 410 ++++++++++++++++++ .../datasketches/theta2/PreambleUtilTest.java | 231 ++++++++++ 2 files changed, 641 insertions(+) create mode 100644 src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java diff --git a/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java b/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java new file mode 100644 index 000000000..c0d9faeed --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java @@ -0,0 +1,410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.annotations.Test; + +public class PairwiseSetOperationsTest { + + // Intersection + + @Test + public void checkIntersectionNoOverlap() { + int lgK = 9; + int k = 1< k); + println(quick1.toString()); + println(PreambleUtil.preambleToString(mem)); + + final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + union.union(quick1); + println(PreambleUtil.preambleToString(uMem)); + } + + @Test + public void checkToStringWithPrelongsOf2() { + final int k = 16; + final int u = k; + final UpdateSketch quick1 = UpdateSketch.builder().setNominalEntries(k).build(); + for (int i = 0; i< u; i++) { + quick1.update(i); + } + final byte[] bytes = quick1.compact().toByteArray(); + println(Sketch.toString(bytes)); + } + + @Test + public void checkPreambleToStringExceptions() { + byte[] byteArr = new byte[7]; + try { //check preLongs < 8 fails + Sketch.toString(byteArr); + fail("Did not throw SketchesArgumentException."); + } catch (final SketchesArgumentException e) { + //expected + } + byteArr = new byte[8]; + byteArr[0] = (byte) 2; //needs min capacity of 16 + try { //check preLongs == 2 fails + Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly()); + fail("Did not throw SketchesArgumentException."); + } catch (final SketchesArgumentException e) { + //expected + } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadSeedHashFromSeed() { + //In the first 64K values 50541 produces a seedHash of 0, + ThetaUtil.computeSeedHash(50541); + } + + @Test + public void checkPreLongs() { + final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(16).build(); + CompactSketch comp = sketch.compact(false, null); + byte[] byteArr = comp.toByteArray(); + println(Sketch.toString(byteArr)); //PreLongs = 1 + + sketch.update(1); + comp = sketch.compact(false, null); + byteArr = comp.toByteArray(); + println(Sketch.toString(byteArr)); //PreLongs = 2 + + for (int i=2; i<=32; i++) { + sketch.update(i); + } + comp = sketch.compact(false, null); + byteArr = comp.toByteArray(); + println(Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly())); //PreLongs = 3 + } + + @Test + public void checkInsertsAndExtracts() { + final byte[] arr = new byte[32]; + final MemorySegment wmem = MemorySegment.ofArray(arr); + + int v = 0; + insertPreLongs(wmem, ++v); + assertEquals(extractPreLongs(wmem), v); + insertPreLongs(wmem, 0); + + insertLgResizeFactor(wmem, 3); //limited to 2 bits + assertEquals(extractLgResizeFactor(wmem), 3); + insertLgResizeFactor(wmem, 0); + + insertSerVer(wmem, ++v); + assertEquals(extractSerVer(wmem), v); + insertSerVer(wmem, 0); + + insertFamilyID(wmem, ++v); + assertEquals(extractFamilyID(wmem), v); + insertFamilyID(wmem, 0); + + insertLgNomLongs(wmem, ++v); + assertEquals(extractLgNomLongs(wmem), v); + insertLgNomLongs(wmem, 0); + + insertLgArrLongs(wmem, ++v); + assertEquals(extractLgArrLongs(wmem), v); + insertLgArrLongs(wmem, 0); + + insertFlags(wmem, 3); + assertEquals(extractFlags(wmem), 3); + assertEquals(extractLgResizeRatioV1(wmem), 3); //also at byte 5, limited to 2 bits + insertFlags(wmem, 0); + + insertSeedHash(wmem, ++v); + assertEquals(extractSeedHash(wmem), v); + assertEquals(extractFlagsV1(wmem), v); //also at byte 6 + insertSeedHash(wmem, 0); + + insertCurCount(wmem, ++v); + assertEquals(extractCurCount(wmem), v); + insertCurCount(wmem, 0); + + insertP(wmem, (float) 1.0); + assertEquals(extractP(wmem), (float) 1.0); + insertP(wmem, (float) 0.0); + + insertThetaLong(wmem, ++v); + assertEquals(extractThetaLong(wmem), v); + insertThetaLong(wmem, 0L); + + insertUnionThetaLong(wmem, ++v); + assertEquals(extractUnionThetaLong(wmem), v); + insertUnionThetaLong(wmem, 0L); + + setEmpty(wmem); + assertTrue(isEmptyFlag(wmem)); + + clearEmpty(wmem); + assertFalse(isEmptyFlag(wmem)); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} From 5db9cccf23c47061daa3e5436993c0c776aa685b Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 16 Jun 2025 14:09:28 -0700 Subject: [PATCH 12/25] Theta Rework: completed all classes and tests. --- .../org/apache/datasketches/common/Util.java | 34 +- .../theta2/ConcurrentHeapThetaBuffer.java | 16 + .../theta2/ConcurrentSharedThetaSketch.java | 4 +- .../theta2/DirectQuickSelectSketch.java | 2 +- .../theta2/DirectQuickSelectSketchR.java | 3 +- .../theta2/JaccardSimilarity.java | 179 +++++ .../apache/datasketches/theta2/Sketch.java | 4 +- .../BoundsOnRatiosInThetaSketchedSets2.java | 121 +++ ...ConcurrentDirectQuickSelectSketchTest.java | 4 +- .../theta2/BackwardConversions.java | 2 +- ...ConcurrentDirectQuickSelectSketchTest.java | 718 +++++++++++++++++ .../ConcurrentHeapQuickSelectSketchTest.java | 745 ++++++++++++++++++ .../theta2/JaccardSimilarityTest.java | 248 ++++++ .../theta2/ReadOnlyMemoryTest.java | 211 +++++ .../datasketches/theta2/SetOperationTest.java | 438 ++++++++++ .../theta2/SetOpsCornerCasesTest.java | 501 ++++++++++++ .../datasketches/theta2/SketchesTest.java | 202 +++++ .../theta2/ThetaSketchCrossLanguageTest.java | 121 +++ ...oundsOnRatiosInThetaSketchedSets2Test.java | 94 +++ 19 files changed, 3626 insertions(+), 21 deletions(-) create mode 100644 src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java create mode 100644 src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/SetOperationTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/SketchesTest.java create mode 100644 src/test/java/org/apache/datasketches/theta2/ThetaSketchCrossLanguageTest.java create mode 100644 src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 11615a39a..4701ddf9e 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -909,21 +909,29 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme } /** - * Request a new heap MemorySegment with the given capacityBytes. + * Request a new heap MemorySegment with the given capacityBytes and 8-byte aligned or one byte aligned. * - *

The returned MemorySegment will be constructed from a long[] array. - * As a result, it will be on-heap and have a memory alignment of 8. - * If the requested capacity is not divisible by eight, the returned size - * will be rolled up to the next multiple of eight.

+ *

If aligned is true, the returned MemorySegment will be constructed from a long[] array, + * and, as a result, it will have a memory alignment of 8 bytes. + * If the requested capacity is not exactly divisible by eight, the returned size + * will be rolled up to the next multiple of eight bytes.

* - * @param capacityBytes The new capacity being requested. It must not be negative. - * @return a new MemorySegment with the requested capacity. - */ - public static MemorySegment newHeapSegment(final int capacityBytes) { - final long[] array = ((capacityBytes & 0x7) == 0) - ? new long[capacityBytes >>> 3] - : new long[(capacityBytes >>> 3) + 1]; - return MemorySegment.ofArray(array); + *

If aligned is false, the returned MemorySegment will be constructed from a byte[] array, + * and have a memory alignment of 1 byte. + * + * @param capacityBytes The new capacity being requested. It must not be negative and cannot exceed Integer.MAX_VALUE. + * @param aligned if true, the new heap segment will have an alignment of 8 bytes, otherwise the alignment will be 1 byte. + * @return a new MemorySegment with the requested capacity and alignment. + */ + public static MemorySegment newHeapSegment(final int capacityBytes, final boolean aligned) { + if (aligned) { + final int lenLongs = capacityBytes >>> 3; + final long[] array = ((capacityBytes & 0x7) == 0) + ? new long[lenLongs] + : new long[lenLongs + 1]; + return MemorySegment.ofArray(array); + } + return MemorySegment.ofArray(new byte[capacityBytes]); } /** diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java index c93ed892b..f8f5a0947 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java @@ -23,6 +23,7 @@ import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentPropagated; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; +import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.datasketches.common.ResizeFactor; @@ -147,6 +148,16 @@ public double getUpperBound(final int numStdDev) { return shared.getUpperBound(numStdDev); } + @Override + public boolean hasMemorySegment() { + return shared.hasMemorySegment(); + } + + @Override + public boolean isDirect() { + return shared.isDirect(); + } + @Override public boolean isEmpty() { return shared.isEmpty(); @@ -157,6 +168,11 @@ public boolean isEstimationMode() { return shared.isEstimationMode(); } + @Override + public boolean isSameResource(final MemorySegment that) { + return shared.isSameResource(that); + } + //End of proxies @Override diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java index 40746c3e6..5c89b3e68 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java @@ -22,6 +22,8 @@ import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.datasketches.common.MemorySegmentStatus; + /** * An internal interface to define the API of a concurrent shared theta sketch. * It reflects all data processed by a single or multiple update threads, and can serve queries at @@ -29,7 +31,7 @@ * * @author eshcar */ -interface ConcurrentSharedThetaSketch { +interface ConcurrentSharedThetaSketch extends MemorySegmentStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java index 32ae0d14d..193385a1f 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java @@ -323,7 +323,7 @@ UpdateReturnState hashUpdate(final long hash) { //} //final MemorySegment newDstSeg = memReqSvr_.request(wseg_, reqBytes); - final MemorySegment newDstSeg = newHeapSegment(reqBytes); + final MemorySegment newDstSeg = newHeapSegment(reqBytes, false); moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong); wseg_ = newDstSeg; diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java index c0db75b16..b7c47de47 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java @@ -223,8 +223,7 @@ long[] getCache() { final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final long[] cacheArr = new long[1 << lgArrLongs]; - final MemorySegment seg = MemorySegment.ofArray(cacheArr); - MemorySegment.copy(wseg_, preambleLongs << 3, seg, 0, 8 << lgArrLongs); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3, cacheArr, 0, 1 << lgArrLongs); return cacheArr; } diff --git a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java new file mode 100644 index 000000000..624dcc3d7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA; +import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA; +import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Jaccard similarity of two Theta Sketches. + * + * @author Lee Rhodes + */ +public final class JaccardSimilarity { + private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB + private static final double[] ONES = {1.0, 1.0, 1.0}; + + /** + * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index + * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each + * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are + * distinct from each other. A Jaccard of .95 means the overlap between the two + * populations is 95% of the union of the two populations. + * + *

Note: For very large pairs of sketches, where the configured nominal entries of the sketches + * are 2^25 or 2^26, this method may produce unpredictable results. + * + * @param sketchA given sketch A + * @param sketchB given sketch B + * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index. + * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations. + */ + public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) { + //Corner case checks + if (sketchA == null || sketchB == null) { return ZEROS.clone(); } + if (sketchA == sketchB) { return ONES.clone(); } + if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); } + if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); } + + final int countA = sketchA.getRetainedEntries(true); + final int countB = sketchB.getRetainedEntries(true); + + //Create the Union + final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS; + final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS; + final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK); + final Union union = + SetOperation.builder().setNominalEntries(newK).buildUnion(); + union.union(sketchA); + union.union(sketchB); + final Sketch unionAB = union.getResult(false, null); + final long thetaLongUAB = unionAB.getThetaLong(); + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + final int countUAB = unionAB.getRetainedEntries(true); + + //Check for identical data + if (countUAB == countA && countUAB == countB + && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { + return ONES.clone(); + } + + //Create the Intersection + final Intersection inter = SetOperation.builder().buildIntersection(); + inter.intersect(sketchA); + inter.intersect(sketchB); + inter.intersect(unionAB); //ensures that intersection is a subset of the union + final Sketch interABU = inter.getResult(false, null); + + final double lb = getLowerBoundForBoverA(unionAB, interABU); + final double est = getEstimateOfBoverA(unionAB, interABU); + final double ub = getUpperBoundForBoverA(unionAB, interABU); + return new double[] {lb, est, ub}; + } + + /** + * Returns true if the two given sketches have exactly the same hash values and the same + * theta values. Thus, they are equivalent. + * @param sketchA the given sketch A + * @param sketchB the given sketch B + * @return true if the two given sketches have exactly the same hash values and the same + * theta values. + */ + public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) { + //Corner case checks + if (sketchA == null || sketchB == null) { return false; } + if (sketchA == sketchB) { return true; } + if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; } + if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; } + + final int countA = sketchA.getRetainedEntries(true); + final int countB = sketchB.getRetainedEntries(true); + + //Create the Union + final Union union = + SetOperation.builder().setNominalEntries(ceilingPowerOf2(countA + countB)).buildUnion(); + union.union(sketchA); + union.union(sketchB); + final Sketch unionAB = union.getResult(); + final long thetaLongUAB = unionAB.getThetaLong(); + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + final int countUAB = unionAB.getRetainedEntries(true); + + //Check for identical counts and thetas + if (countUAB == countA && countUAB == countB + && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { + return true; + } + return false; + } + + /** + * Tests similarity of a measured Sketch against an expected Sketch. + * Computes the lower bound of the Jaccard index JLB of the measured and + * expected sketches. + * if JLB ≥ threshold, then the sketches are considered to be + * similar with a confidence of 97.7%. + * + * @param measured the sketch to be tested + * @param expected the reference sketch that is considered to be correct. + * @param threshold a real value between zero and one. + * @return if true, the similarity of the two sketches is greater than the given threshold + * with at least 97.7% confidence. + */ + public static boolean similarityTest(final Sketch measured, final Sketch expected, + final double threshold) { + //index 0: the lower bound + //index 1: the mean estimate + //index 2: the upper bound + final double jRatioLB = jaccard(measured, expected)[0]; //choosing the lower bound + return jRatioLB >= threshold; + } + + /** + * Tests dissimilarity of a measured Sketch against an expected Sketch. + * Computes the upper bound of the Jaccard index JUB of the measured and + * expected sketches. + * if JUB ≤ threshold, then the sketches are considered to be + * dissimilar with a confidence of 97.7%. + * + * @param measured the sketch to be tested + * @param expected the reference sketch that is considered to be correct. + * @param threshold a real value between zero and one. + * @return if true, the dissimilarity of the two sketches is greater than the given threshold + * with at least 97.7% confidence. + */ + public static boolean dissimilarityTest(final Sketch measured, final Sketch expected, + final double threshold) { + //index 0: the lower bound + //index 1: the mean estimate + //index 2: the upper bound + final double jRatioUB = jaccard(measured, expected)[2]; //choosing the upper bound + return jRatioUB <= threshold; + } + +} diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java index 82661aa27..e98396842 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -222,7 +222,7 @@ public CompactSketch compact() { * *

A new CompactSketch object is created:

*
  • if dstMem != null
  • - *
  • if dstMem == null and this.hasMemory() == true
  • + *
  • if dstMem == null and this.hasMemorySegment() == true
  • *
  • if dstMem == null and this has more than 1 item and this.isOrdered() == false * and dstOrdered == true.
  • *
@@ -564,7 +564,7 @@ public static String toString(final MemorySegment mem) { /** * Gets the internal cache array. For on-heap sketches this will return a reference to the actual - * cache array. For Memory-based sketches this returns a copy. + * cache array. For MemorySegment-based sketches this returns a copy. * @return the internal cache array. */ abstract long[] getCache(); diff --git a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java new file mode 100644 index 000000000..f8199cc4f --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon; + +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; + +import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.theta2.Sketch; + +/** + * This class is used to compute the bounds on the estimate of the ratio B / A, where: + *
    + *
  • A is a Theta Sketch of population PopA.
  • + *
  • B is a Theta Sketch of population PopB that is a subset of A, + * obtained by an intersection of A with some other Theta Sketch C, + * which acts like a predicate or selection clause.
  • + *
  • The estimate of the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
  • + *
  • The Upper Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
  • + *
  • The Lower Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
  • + *
+ * Note: The theta of A cannot be greater than the theta of B. + * If B is formed as an intersection of A and some other set C, + * then the theta of B is guaranteed to be less than or equal to the theta of B. + * + * @author Kevin Lang + * @author Lee Rhodes + */ +public final class BoundsOnRatiosInThetaSketchedSets2 { + + private BoundsOnRatiosInThetaSketchedSets2() {} + + /** + * Gets the approximate lower bound for B over A based on a 95% confidence interval + * @param sketchA the sketch A + * @param sketchB the sketch B + * @return the approximate lower bound for B over A + */ + public static double getLowerBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = (thetaLongB == thetaLongA) + ? sketchA.getRetainedEntries(true) + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate upper bound for B over A based on a 95% confidence interval + * @param sketchA the sketch A + * @param sketchB the sketch B + * @return the approximate upper bound for B over A + */ + public static double getUpperBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = (thetaLongB == thetaLongA) + ? sketchA.getRetainedEntries(true) + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 1.0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); + } + + /** + * Gets the estimate for B over A + * @param sketchA the sketch A + * @param sketchB the sketch B + * @return the estimate for B over A + */ + public static double getEstimateOfBoverA(final Sketch sketchA, final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = (thetaLongB == thetaLongA) + ? sketchA.getRetainedEntries(true) + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0.5; } + + return (double) countB / (double) countA; + } + + static void checkThetas(final long thetaLongA, final long thetaLongB) { + if (thetaLongB > thetaLongA) { + throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); + } + } +} diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java index 6d6af7047..fe2b138ca 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java @@ -696,7 +696,9 @@ static void println(String s) { } private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) { - assertEquals(local.hasMemory(), shared.hasMemory()); + assertEquals( + local.hasMemory(), + shared.hasMemory()); assertEquals(local.isDirect(), shared.isDirect()); } diff --git a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java index 0e1348684..bec67b219 100644 --- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java +++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java @@ -220,7 +220,7 @@ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, fi final int entries = skV3.getRetainedEntries(true); final boolean unordered = !(skV3.isOrdered()); final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE - wseg = Util.newHeapSegment((preLongs + entries) << 3); + wseg = Util.newHeapSegment((preLongs + entries) << 3, false); wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java new file mode 100644 index 000000000..7a7b89cef --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java @@ -0,0 +1,718 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.waitForBgPropagationToComplete; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.SharedLocal; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author eshcar + */ +public class ConcurrentDirectQuickSelectSketchTest { + private static final long SEED = ThetaUtil.DEFAULT_UPDATE_SEED; + + @Test + public void checkDirectCompactConversion() { + int lgK = 9; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + assertTrue(sl.shared instanceof ConcurrentDirectQuickSelectSketch); + assertTrue(sl.shared.compact().isCompact()); + } + + @Test + public void checkHeapifyMemoryEstimating() { + int lgK = 9; + int k = 1 << lgK; + int u = 2*k; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + + UpdateSketch shared = sl.shared; //off-heap + UpdateSketch local = sl.local; + + for (int i=0; i k); + + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + for (int i=0; i k); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer"); + assertFalse(local.isDirect()); + assertTrue(local.hasMemorySegment()); + + for (int i=0; i k); + } + + @Test + public void checkErrorBounds() { + int lgK = 9; + int k = 1 << lgK; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + //Exact mode + for (int i = 0; i < k; i++ ) { local.update(i); } + waitForBgPropagationToComplete(shared); + + double est = local.getEstimate(); + double lb = local.getLowerBound(2); + double ub = local.getUpperBound(2); + assertEquals(est, ub, 0.0); + assertEquals(est, lb, 0.0); + + //Est mode + int u = 100*k; + for (int i = k; i < u; i++ ) { + local.update(i); + local.update(i); //test duplicate rejection + } + waitForBgPropagationToComplete(shared); + est = local.getEstimate(); + lb = local.getLowerBound(2); + ub = local.getUpperBound(2); + assertTrue(est <= ub); + assertTrue(est >= lb); + } + + + @Test + public void checkUpperAndLowerBounds() { + int lgK = 9; + int k = 1 << lgK; + int u = 2*k; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + for (int i = 0; i < u; i++ ) { local.update(i); } + waitForBgPropagationToComplete(shared); + + double est = local.getEstimate(); + double ub = local.getUpperBound(1); + double lb = local.getLowerBound(1); + assertTrue(ub > est); + assertTrue(lb < est); + } + + @Test + public void checkRebuild() { + int lgK = 9; + int k = 1 << lgK; + int u = 4*k; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertTrue(local.isEmpty()); + + for (int i = 0; i< u; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertTrue(local.getEstimate() > 0.0); + assertTrue(shared.getRetainedEntries(false) >= k); + + shared.rebuild(); + assertEquals(shared.getRetainedEntries(false), k); + assertEquals(shared.getRetainedEntries(true), k); + local.rebuild(); + assertEquals(shared.getRetainedEntries(false), k); + assertEquals(shared.getRetainedEntries(true), k); + } + + @Test + public void checkResetAndStartingSubMultiple() { + int lgK = 9; + int k = 1 << lgK; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertTrue(local.isEmpty()); + + int u = 4*k; + for (int i = 0; i< u; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertTrue(shared.getRetainedEntries(false) >= k); + assertTrue(local.getThetaLong() < Long.MAX_VALUE); + + shared.reset(); + local.reset(); + assertTrue(local.isEmpty()); + assertEquals(shared.getRetainedEntries(false), 0); + assertEquals(local.getEstimate(), 0.0, 0.0); + assertEquals(local.getThetaLong(), Long.MAX_VALUE); + } + + @Test + public void checkExactModeMemoryArr() { + int lgK = 12; + int k = 1 << lgK; + int u = k; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertTrue(local.isEmpty()); + + for (int i = 0; i< u; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + + assertEquals(local.getEstimate(), u, 0.0); + assertEquals(shared.getRetainedEntries(false), u); + } + + @Test + public void checkEstModeMemoryArr() { + int lgK = 12; + int k = 1 << lgK; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertTrue(local.isEmpty()); + + int u = 3*k; + for (int i = 0; i< u; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + + double est = local.getEstimate(); + assertTrue((est < (u * 1.05)) && (est > (u * 0.95))); + assertTrue(shared.getRetainedEntries(false) >= k); + } + + @Test + public void checkEstModeNativeMemory() { + int lgK = 12; + int k = 1 << lgK; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertTrue(local.isEmpty()); + + int u = 3*k; + for (int i = 0; i< u; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + double est = local.getEstimate(); + assertTrue((est < (u * 1.05)) && (est > (u * 0.95))); + assertTrue(shared.getRetainedEntries(false) >= k); + } + + @Test + public void checkConstructReconstructFromMemory() { + int lgK = 12; + int k = 1 << lgK; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertTrue(local.isEmpty()); + int u = 3*k; + + for (int i = 0; i< u; i++) { local.update(i); } //force estimation + waitForBgPropagationToComplete(shared); + + double est1 = local.getEstimate(); + int count1 = shared.getRetainedEntries(false); + assertTrue((est1 < (u * 1.05)) && (est1 > (u * 0.95))); + assertTrue(count1 >= k); + + byte[] serArr; + double est2; + + serArr = shared.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(serArr); + UpdateSketch recoveredShared = Sketches.wrapUpdateSketch(seg); + + //reconstruct to Native/Direct + final int bytes = Sketch.getMaxUpdateSketchBytes(k); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg); + UpdateSketch local2 = sl.bldr.buildLocal(shared); + est2 = local2.getEstimate(); + + assertEquals(est2, est1, 0.0); + } + + @Test + public void checkNullMemory() { + UpdateSketchBuilder bldr = new UpdateSketchBuilder(); + final UpdateSketch sk = bldr.build(); + for (int i = 0; i < 1000; i++) { sk.update(i); } + final UpdateSketch shared = bldr.buildSharedFromSketch(sk, null); + assertEquals(shared.getRetainedEntries(true), 1000); + assertFalse(shared.hasMemorySegment()); + } + + //checks Alex's bug where lgArrLongs > lgNomLongs +1. + @Test + public void checkResizeInBigMem() { + int lgK = 14; + int u = 1 << 20; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useSeg, true, 8); //seg is 8X larger than needed + UpdateSketch local = sl.local; + + for (int i = 0; i < u; i++) { local.update(i); } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkConstructorKtooSmall() { + int lgK = 3; + boolean useSeg = true; + new SharedLocal(lgK, lgK, useSeg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkConstructorMemTooSmall() { + int lgK = 4; + int k = 1 << lgK; + MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]); + UpdateSketchBuilder bldr = new UpdateSketchBuilder(); + bldr.setLogNominalEntries(lgK); + bldr.buildShared(wseg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyIllegalFamilyID_heapify() { + int lgK = 9; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte + //try to heapify the corrupted seg + Sketch.heapify(sl.wseg); //catch in Sketch.constructHeapSketch + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadLgNomLongs() { + int lgK = 4; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte + DirectQuickSelectSketch.writableWrap(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + @Test + public void checkBackgroundPropagation() { + int lgK = 4; + int k = 1 << lgK; + int u = 10*k; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertTrue(local.isEmpty()); + ConcurrentHeapThetaBuffer sk1 = (ConcurrentHeapThetaBuffer)local; //for internal checks + + int i = 0; + for (; i< k; i++) { + local.update(i); + } + waitForBgPropagationToComplete(shared); + assertFalse(local.isEmpty()); + assertTrue(local.getEstimate() > 0.0); + long theta1 = ((ConcurrentSharedThetaSketch)shared).getVolatileTheta(); + + for (; i< u; i++) { + local.update(i); + } + waitForBgPropagationToComplete(shared); + + long theta2 = ((ConcurrentSharedThetaSketch)shared).getVolatileTheta(); + int entries = shared.getRetainedEntries(false); + assertTrue((entries > k) || (theta2 < theta1), + "entries="+entries+" k="+k+" theta1="+theta1+" theta2="+theta2); + + shared.rebuild(); + assertEquals(shared.getRetainedEntries(false), k); + assertEquals(shared.getRetainedEntries(true), k); + sk1.rebuild(); + assertEquals(shared.getRetainedEntries(false), k); + assertEquals(shared.getRetainedEntries(true), k); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadSerVer() { + int lgK = 9; + int k = 1 << lgK; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertTrue(local.isEmpty()); + + for (int i = 0; i< k; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertEquals(local.getEstimate(), k, 0.0); + assertEquals(shared.getRetainedEntries(false), k); + + sl.wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + Sketch.wrap(sl.wseg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkWrapIllegalFamilyID_wrap() { + int lgK = 9; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + + sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + //try to wrap the corrupted seg + Sketch.wrap(sl.wseg); //catch in Sketch.constructDirectSketch + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkWrapIllegalFamilyID_direct() { + int lgK = 9; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + + sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + //try to wrap the corrupted seg + DirectQuickSelectSketch.writableWrap(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifySeedConflict() { + int lgK = 9; + long seed1 = 1021; + long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1); + UpdateSketch shared = sl.shared; + + MemorySegment srcSeg = MemorySegment.ofArray(shared.toByteArray()).asReadOnly(); + Sketch.heapify(srcSeg, seed2); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkCorruptLgNomLongs() { + int lgK = 4; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + + sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt + Sketch.heapify(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + @Test(expectedExceptions = UnsupportedOperationException.class) + public void checkIllegalHashUpdate() { + int lgK = 4; + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + UpdateSketch shared = sl.shared; + shared.hashUpdate(1); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + + private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) { + assertEquals( + local.hasMemorySegment(), + shared.hasMemorySegment()); + assertEquals(local.isDirect(), shared.isDirect()); + } + + //Does not check hasMemorySegment(), isDirect() + private static void checkOtherProxyMethods(Sketch local, Sketch shared) { + assertEquals(local.getCompactBytes(), shared.getCompactBytes()); + assertEquals(local.getCurrentBytes(), shared.getCurrentBytes()); + assertEquals(local.getEstimate(), shared.getEstimate()); + assertEquals(local.getLowerBound(2), shared.getLowerBound(2)); + assertEquals(local.getUpperBound(2), shared.getUpperBound(2)); + assertEquals(local.isEmpty(), shared.isEmpty()); + assertEquals(local.isEstimationMode(), shared.isEstimationMode()); + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java new file mode 100644 index 000000000..4685639ec --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java @@ -0,0 +1,745 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author eshcar + */ +public class ConcurrentHeapQuickSelectSketchTest { + + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadSerVer() { + int lgK = 9; + int k = 1 << lgK; + int u = k; + SharedLocal sl = new SharedLocal(lgK); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertTrue(local.isEmpty()); + + for (int i = 0; i< u; i++) { + local.update(i); + } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertEquals(local.getEstimate(), u, 0.0); + assertEquals(shared.getRetainedEntries(false), u); + + byte[] serArr = shared.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(serArr); + Sketch sk = Sketch.heapify(seg, sl.seed); + assertTrue(sk instanceof HeapQuickSelectSketch); //Intentional promotion to Parent + + seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + Sketch.heapify(seg, sl.seed); + } + + @Test + public void checkPropagationNotOrdered() { + int lgK = 8; + int k = 1 << lgK; + int u = 200*k; + SharedLocal sl = new SharedLocal(lgK, 4, false, false); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertEquals((sl.bldr.getLocalLgNominalEntries()), 4); + assertTrue(local.isEmpty()); + + for (int i = 0; i < u; i++) { + local.update(i); + } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertTrue(shared.getRetainedEntries(true) <= u); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkIllegalSketchID_UpdateSketch() { + int lgK = 9; + int k = 1 << lgK; + int u = k; + SharedLocal sl = new SharedLocal(lgK); + + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + assertTrue(local.isEmpty()); + assertTrue(shared instanceof ConcurrentHeapQuickSelectSketch); + for (int i = 0; i< u; i++) { + local.update(i); + } + assertTrue(shared.compact().isCompact()); + + assertFalse(local.isEmpty()); + assertEquals(local.getEstimate(), u, 0.0); + assertEquals(shared.getRetainedEntries(false), u); + byte[] byteArray = shared.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(byteArray); + seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + + //try to heapify the corrupted seg + Sketch.heapify(seg, sl.seed); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifySeedConflict() { + int lgK = 9; + long seed = 1021; + long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + SharedLocal sl = new SharedLocal(lgK, lgK, seed); + byte[] byteArray = sl.shared.toByteArray(); + MemorySegment srcSeg = MemorySegment.ofArray(byteArray); + Sketch.heapify(srcSeg, seed2); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyCorruptLgNomLongs() { + int lgK = 4; + SharedLocal sl = new SharedLocal(lgK); + byte[] serArr = sl.shared.toByteArray(); + MemorySegment srcSeg = MemorySegment.ofArray(serArr); + srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt + Sketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + @Test(expectedExceptions = UnsupportedOperationException.class) + public void checkIllegalHashUpdate() { + int lgK = 4; + SharedLocal sl = new SharedLocal(lgK); + sl.shared.hashUpdate(1); + } + + @Test + public void checkHeapifyByteArrayExact() { + int lgK = 9; + int k = 1 << lgK; + int u = k; + SharedLocal sl = new SharedLocal(lgK); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + for (int i=0; i k); + // it could be exactly k, but in this case must be greater + } + + @Test + public void checkErrorBounds() { + int lgK = 9; + int k = 1 << lgK; + SharedLocal sl = new SharedLocal(lgK); + UpdateSketch local = sl.local; + UpdateSketch shared = sl.shared; + + //Exact mode + //int limit = (int)ConcurrentSharedThetaSketch.computeExactLimit(lim, 0); //? ask Eshcar + for (int i = 0; i < k; i++ ) { + local.update(i); + } + + double est = local.getEstimate(); + double lb = local.getLowerBound(2); + double ub = local.getUpperBound(2); + assertEquals(est, ub, 0.0); + assertEquals(est, lb, 0.0); + + //Est mode + int u = 2 * k; + for (int i = k; i < u; i++ ) { + local.update(i); + local.update(i); //test duplicate rejection + } + waitForBgPropagationToComplete(shared); + est = local.getEstimate(); + lb = local.getLowerBound(2); + ub = local.getUpperBound(2); + assertTrue(est <= ub); + assertTrue(est >= lb); + } + + @Test + public void checkRebuild() { + int lgK = 4; + int k = 1 << lgK; + SharedLocal sl = new SharedLocal(lgK); + //must build shared first + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertTrue(local.isEmpty()); + int t = ((ConcurrentHeapThetaBuffer)local).getHashTableThreshold(); + + for (int i = 0; i< t; i++) { + local.update(i); + } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertTrue(local.getEstimate() > 0.0); + assertTrue(shared.getRetainedEntries(false) > k); + + shared.rebuild(); + assertEquals(shared.getRetainedEntries(false), k); + assertEquals(shared.getRetainedEntries(true), k); + shared.rebuild(); + assertEquals(shared.getRetainedEntries(false), k); + assertEquals(shared.getRetainedEntries(true), k); + } + + @Test + public void checkBuilder() { + int lgK = 4; + SharedLocal sl = new SharedLocal(lgK); + assertEquals(sl.bldr.getLocalLgNominalEntries(), lgK); + assertEquals(sl.bldr.getLgNominalEntries(), lgK); + println(sl.bldr.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBuilderSmallNominal() { + int lgK = 2; //too small + new SharedLocal(lgK); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkNegativeHashes() { + int lgK = 9; + SharedLocal sl = new SharedLocal(lgK); + UpdateSketch local = sl.local; + local.hashUpdate(-1L); + } + + @Test + public void checkResetAndStartingSubMultiple() { + int lgK = 9; + int k = 1 << lgK; + SharedLocal sl = new SharedLocal(lgK); + UpdateSketch shared = sl.shared; + UpdateSketch local = sl.local; + + assertTrue(local.isEmpty()); + int u = 3*k; + + for (int i = 0; i< u; i++) { local.update(i); } + waitForBgPropagationToComplete(shared); + + assertFalse(local.isEmpty()); + assertTrue(shared.getRetainedEntries(false) >= k); + assertTrue(local.getThetaLong() < Long.MAX_VALUE); + + shared.reset(); + local.reset(); + assertTrue(local.isEmpty()); + assertEquals(shared.getRetainedEntries(false), 0); + assertEquals(local.getEstimate(), 0.0, 0.0); + assertEquals(local.getThetaLong(), Long.MAX_VALUE); + } + + @Test + public void checkDQStoCompactEmptyForms() { + int lgK = 9; + SharedLocal sl = new SharedLocal(lgK); + UpdateSketch local = sl.local; + UpdateSketch shared = sl.shared; + + //empty + local.toString(false, true, 0, false); //exercise toString + assertTrue(local instanceof ConcurrentHeapThetaBuffer); + double localEst = local.getEstimate(); + double localLB = local.getLowerBound(2); + double uskUB = local.getUpperBound(2); + assertFalse(local.isEstimationMode()); + + int bytes = local.getCompactBytes(); + assertEquals(bytes, 8); + byte[] segArr2 = new byte[bytes]; + MemorySegment seg2 = MemorySegment.ofArray(segArr2); + + CompactSketch csk2 = shared.compact(false, seg2); + assertEquals(csk2.getEstimate(), localEst); + assertEquals(csk2.getLowerBound(2), localLB); + assertEquals(csk2.getUpperBound(2), uskUB); + assertTrue(csk2.isEmpty()); + assertFalse(csk2.isEstimationMode()); + assertTrue(csk2.isOrdered()); + + CompactSketch csk3 = shared.compact(true, seg2); + csk3.toString(false, true, 0, false); + csk3.toString(); + assertEquals(csk3.getEstimate(), localEst); + assertEquals(csk3.getLowerBound(2), localLB); + assertEquals(csk3.getUpperBound(2), uskUB); + assertTrue(csk3.isEmpty()); + assertFalse(csk3.isEstimationMode()); + assertTrue(csk2.isOrdered()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkMinReqBytes() { + int lgK = 4; + int k = 1 << lgK; + SharedLocal sl = new SharedLocal(lgK); + for (int i = 0; i < (4 * k); i++) { sl.local.update(i); } + waitForBgPropagationToComplete(sl.shared); + byte[] byteArray = sl.shared.toByteArray(); + byte[] badBytes = Arrays.copyOfRange(byteArray, 0, 24); //corrupt no. bytes + MemorySegment seg = MemorySegment.ofArray(badBytes).asReadOnly(); + Sketch.heapify(seg); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkThetaAndLgArrLongs() { + int lgK = 4; + int k = 1 << lgK; + SharedLocal sl = new SharedLocal(lgK); + for (int i = 0; i < k; i++) { sl.local.update(i); } + waitForBgPropagationToComplete(sl.shared); + byte[] badArray = sl.shared.toByteArray(); + MemorySegment seg = MemorySegment.ofArray(badArray); + PreambleUtil.insertLgArrLongs(seg, 4); //corrupt + PreambleUtil.insertThetaLong(seg, Long.MAX_VALUE / 2); //corrupt + Sketch.heapify(seg); + } + + @Test + public void checkFamily() { + SharedLocal sl = new SharedLocal(); + UpdateSketch local = sl.local; + assertEquals(local.getFamily(), Family.QUICKSELECT); + } + + @Test + public void checkBackgroundPropagation() { + int lgK = 4; + int k = 1 << lgK; + int u = 5*k; + SharedLocal sl = new SharedLocal(lgK); + assertTrue(sl.local.isEmpty()); + + int i = 0; + for (; i < k; i++) { sl.local.update(i); } //exact + waitForBgPropagationToComplete(sl.shared); + + assertFalse(sl.local.isEmpty()); + assertTrue(sl.local.getEstimate() > 0.0); + long theta1 = sl.sharedIf.getVolatileTheta(); + + for (; i < u; i++) { sl.local.update(i); } //continue, make it estimating + waitForBgPropagationToComplete(sl.shared); + + long theta2 = sl.sharedIf.getVolatileTheta(); + int entries = sl.shared.getRetainedEntries(false); + assertTrue((entries > k) || (theta2 < theta1), + "entries= " + entries + " k= " + k + " theta1= " + theta1 + " theta2= " + theta2); + + sl.shared.rebuild(); + assertEquals(sl.shared.getRetainedEntries(false), k); + assertEquals(sl.shared.getRetainedEntries(true), k); + sl.local.rebuild(); + assertEquals(sl.shared.getRetainedEntries(false), k); + assertEquals(sl.shared.getRetainedEntries(true), k); + } + + @Test + public void checkBuilderExceptions() { + UpdateSketchBuilder bldr = new UpdateSketchBuilder(); + try { + bldr.setNominalEntries(8); + fail(); + } catch (SketchesArgumentException e) { } + try { + bldr.setLocalNominalEntries(8); + fail(); + } catch (SketchesArgumentException e) { } + try { + bldr.setLocalLogNominalEntries(3); + fail(); + } catch (SketchesArgumentException e) { } + bldr.setNumPoolThreads(4); + assertEquals(bldr.getNumPoolThreads(), 4); + bldr.setMaxConcurrencyError(0.04); + assertEquals(bldr.getMaxConcurrencyError(), 0.04); + bldr.setMaxNumLocalThreads(4); + assertEquals(bldr.getMaxNumLocalThreads(), 4); + } + + @Test(expectedExceptions = UnsupportedOperationException.class) + public void checkToByteArray() { + SharedLocal sl = new SharedLocal(); + sl.local.toByteArray(); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + + static class SharedLocal { + static final long DefaultSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + final UpdateSketch shared; + final ConcurrentSharedThetaSketch sharedIf; + final UpdateSketch local; + final int sharedLgK; + final int localLgK; + final long seed; + final MemorySegment wseg; + final UpdateSketchBuilder bldr = new UpdateSketchBuilder(); + + SharedLocal() { + this(9, 9, DefaultSeed, false, true, 1); + } + + SharedLocal(int lgK) { + this(lgK, lgK, DefaultSeed, false, true, 1); + } + + SharedLocal(int sharedLgK, int localLgK) { + this(sharedLgK, localLgK, DefaultSeed, false, true, 1); + } + + SharedLocal(int sharedLgK, int localLgK, long seed) { + this(sharedLgK, localLgK, seed, false, true, 1); + } + + SharedLocal(int sharedLgK, int localLgK, boolean useSeg) { + this(sharedLgK, localLgK, DefaultSeed, useSeg, true, 1); + } + + SharedLocal(int sharedLgK, int localLgK, boolean useSeg, boolean ordered) { + this(sharedLgK, localLgK, DefaultSeed, useSeg, ordered, 1); + } + + SharedLocal(int sharedLgK, int localLgK, long seed, boolean useSeg, boolean ordered, int segMult) { + this.sharedLgK = sharedLgK; + this.localLgK = localLgK; + this.seed = seed; + if (useSeg) { + int bytes = (((4 << sharedLgK) * segMult) + (Family.QUICKSELECT.getMaxPreLongs())) << 3; + wseg = MemorySegment.ofArray(new byte[bytes]); + } else { + wseg = null; + } + bldr.setLogNominalEntries(sharedLgK); + bldr.setLocalLogNominalEntries(localLgK); + bldr.setPropagateOrderedCompact(ordered); + bldr.setSeed(this.seed); + shared = bldr.buildShared(wseg); + local = bldr.buildLocal(shared); + sharedIf = (ConcurrentSharedThetaSketch) shared; + } + } + + static void waitForBgPropagationToComplete(UpdateSketch shared) { + try { + Thread.sleep(10); + } catch (InterruptedException e) { + e.printStackTrace(); + } + ConcurrentSharedThetaSketch csts = (ConcurrentSharedThetaSketch)shared; + csts.awaitBgPropagationTermination(); + ConcurrentPropagationService.resetExecutorService(Thread.currentThread().getId()); + csts.initBgPropagationService(); + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java new file mode 100644 index 000000000..5d0e42176 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.theta2.JaccardSimilarity.exactlyEqual; +import static org.apache.datasketches.theta2.JaccardSimilarity.jaccard; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class JaccardSimilarityTest { + + @Test + public void checkNullsEmpties() { + int minK = 1 << 12; + double threshold = 0.95; + println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold); + //check both null + double[] jResults = jaccard(null, null); + boolean state = jResults[1] > threshold; + println("null \t null:\t" + state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(null, null); + assertFalse(state); + + UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build(); + UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build(); + + //check both empty + jResults = jaccard(measured, expected); + state = jResults[1] > threshold; + println("empty\tempty:\t" + state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected); + assertTrue(state); + + state = exactlyEqual(measured, measured); + assertTrue(state); + + //adjust one + expected.update(1); + jResults = jaccard(measured, expected); + state = jResults[1] > threshold; + println("empty\t 1:\t" + state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected); + assertFalse(state); + + println(""); + } + + @Test + public void checkExactMode() { + int k = 1 << 12; + int u = k; + double threshold = 0.9999; + println("Exact Mode, minK: " + k + "\t Th: " + threshold); + + UpdateSketch measured = UpdateSketch.builder().setNominalEntries(k).build(); + UpdateSketch expected = UpdateSketch.builder().setNominalEntries(k).build(); + + for (int i = 0; i < (u-1); i++) { //one short + measured.update(i); + expected.update(i); + } + + double[] jResults = jaccard(measured, expected); + boolean state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected); + assertTrue(state); + + measured.update(u-1); //now exactly k entries + expected.update(u); //now exactly k entries but differs by one + jResults = jaccard(measured, expected); + state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected); + assertFalse(state); + + println(""); + } + + @Test + public void checkEstMode() { + int k = 1 << 12; + int u = 1 << 20; + double threshold = 0.9999; + println("Estimation Mode, minK: " + k + "\t Th: " + threshold); + + UpdateSketch measured = UpdateSketch.builder().setNominalEntries(k).build(); + UpdateSketch expected = UpdateSketch.builder().setNominalEntries(k).build(); + + for (int i = 0; i < u; i++) { + measured.update(i); + expected.update(i); + } + + double[] jResults = jaccard(measured, expected); + boolean state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected); + assertTrue(state); + + for (int i = u; i < (u + 50); i++) { //empirically determined + measured.update(i); + } + + jResults = jaccard(measured, expected); + state = jResults[1] >= threshold; + println(state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected); + assertFalse(state); + + println(""); + } + + /** + * Enable printing on this test and you will see that the distribution is pretty tight, + * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about + * +/- 1.56%. + */ + @Test + public void checkSimilarity() { + int minK = 1 << 12; + int u1 = 1 << 20; + int u2 = (int) (u1 * 0.95); + double threshold = 0.943; + println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); + + UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build(); + UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build(); + + for (int i = 0; i < u1; i++) { + expected.update(i); + } + + for (int i = 0; i < u2; i++) { + measured.update(i); + } + + double[] jResults = JaccardSimilarity.jaccard(measured, expected); + boolean state = JaccardSimilarity.similarityTest(measured, expected, threshold); + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + //check identity case + state = JaccardSimilarity.similarityTest(measured, measured, threshold); + assertTrue(state); + } + + /** + * Enable printing on this test and you will see that the distribution is much looser, + * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of + * intersection to the union becomes a small number. + */ + @Test + public void checkDissimilarity() { + int minK = 1 << 12; + int u1 = 1 << 20; + int u2 = (int) (u1 * 0.05); + double threshold = 0.061; + println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); + + UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build(); + UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build(); + + for (int i = 0; i < u1; i++) { + expected.update(i); + } + + for (int i = 0; i < u2; i++) { + measured.update(i); + } + + double[] jResults = JaccardSimilarity.jaccard(measured, expected); + boolean state = JaccardSimilarity.dissimilarityTest(measured, expected, threshold); + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + } + + private static String jaccardString(double[] jResults) { + double lb = jResults[0]; + double est = jResults[1]; + double ub = jResults[2]; + return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0); + } + + @Test + public void checkMinK() { + UpdateSketch skA = UpdateSketch.builder().build(); //4096 + UpdateSketch skB = UpdateSketch.builder().build(); //4096 + skA.update(1); + skB.update(1); + double[] result = JaccardSimilarity.jaccard(skA, skB); + println(result[0] + ", " + result[1] + ", " + result[2]); + for (int i = 1; i < 4096; i++) { + skA.update(i); + skB.update(i); + } + result = JaccardSimilarity.jaccard(skA, skB); + println(result[0] + ", " + result[1] + ", " + result[2]); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java new file mode 100644 index 000000000..ab0ed1495 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.datasketches.common.SketchesReadOnlyException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ReadOnlyMemoryTest { + + @Test + public void wrapAndTryUpdatingUpdateSketch() { + UpdateSketch updateSketch = UpdateSketch.builder().build(); + updateSketch.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(updateSketch.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + UpdateSketch sketch = (UpdateSketch) Sketch.wrap(seg); + assertEquals(sketch.getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + + boolean thrown = false; + try { + sketch.update(2); + } catch (SketchesReadOnlyException e) { + thrown = true; + } + Assert.assertTrue(thrown); + } + + @Test + public void wrapCompactUnorderedSketch() { + UpdateSketch updateSketch = UpdateSketch.builder().build(); + updateSketch.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(updateSketch.compact(false, null).toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Sketch sketch = Sketch.wrap(seg); + assertEquals(sketch.getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void wrapCompactOrderedSketch() { + UpdateSketch updateSketch = UpdateSketch.builder().build(); + updateSketch.update(1); + MemorySegment seg = MemorySegment.ofBuffer(ByteBuffer.wrap(updateSketch.compact().toByteArray()) + .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Sketch sketch = Sketch.wrap(seg); + assertEquals(sketch.getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void heapifyUpdateSketch() { + UpdateSketch us1 = UpdateSketch.builder().build(); + us1.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(us1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + // downcasting is not recommended, for testing only + UpdateSketch us2 = (UpdateSketch) Sketch.heapify(seg); + us2.update(2); + assertEquals(us2.getEstimate(), 2.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void heapifyCompactUnorderedSketch() { + UpdateSketch updateSketch = UpdateSketch.builder().build(); + updateSketch.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(updateSketch.compact(false, null).toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Sketch sketch = Sketch.heapify(seg); + assertEquals(sketch.getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void heapifyCompactOrderedSketch() { + UpdateSketch updateSketch = UpdateSketch.builder().build(); + updateSketch.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(updateSketch.compact().toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Sketch sketch = Sketch.heapify(seg); + assertEquals(sketch.getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void heapifyUnion() { + Union u1 = SetOperation.builder().buildUnion(); + u1.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(u1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Union u2 = (Union) SetOperation.heapify(seg); + u2.update(2); + Assert.assertEquals(u2.getResult().getEstimate(), 2.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void wrapAndTryUpdatingUnion() { + Union u1 = SetOperation.builder().buildUnion(); + u1.update(1); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(u1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + + Union u2 = (Union) Sketches.wrapSetOperation(seg); + Union u3 = Sketches.wrapUnion(seg); + Assert.assertEquals(u2.getResult().getEstimate(), 1.0); + Assert.assertEquals(u3.getResult().getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + + try { + u2.update(2); + fail(); + } catch (SketchesReadOnlyException e) { + //expected + } + + try { + u3.update(2); + fail(); + } catch (SketchesReadOnlyException e) { + //expected + } + } + + @Test + public void heapifyIntersection() { + UpdateSketch us1 = UpdateSketch.builder().build(); + us1.update(1); + us1.update(2); + UpdateSketch us2 = UpdateSketch.builder().build(); + us2.update(2); + us2.update(3); + + Intersection i1 = SetOperation.builder().buildIntersection(); + i1.intersect(us1); + i1.intersect(us2); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(i1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Intersection i2 = (Intersection) SetOperation.heapify(seg); + i2.intersect(us1); + Assert.assertEquals(i2.getResult().getEstimate(), 1.0); + assertTrue(seg.isReadOnly()); + } + + @Test + public void wrapIntersection() { + UpdateSketch us1 = UpdateSketch.builder().build(); + us1.update(1); + us1.update(2); + UpdateSketch us2 = UpdateSketch.builder().build(); + us2.update(2); + us2.update(3); + + Intersection i1 = SetOperation.builder().buildIntersection(); + i1.intersect(us1); + i1.intersect(us2); + MemorySegment seg = MemorySegment.ofBuffer( + ByteBuffer.wrap(i1.toByteArray()).asReadOnlyBuffer().order(ByteOrder.nativeOrder())); + Intersection i2 = (Intersection) SetOperation.wrap(seg); + Assert.assertEquals(i2.getResult().getEstimate(), 1.0); + + boolean thrown = false; + try { + i2.intersect(us1); + } catch (SketchesReadOnlyException e) { + thrown = true; + } + Assert.assertTrue(thrown); + assertTrue(seg.isReadOnly()); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java new file mode 100644 index 000000000..02efffd75 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.common.ResizeFactor.X4; +import static org.apache.datasketches.theta2.Sketch.getMaxUpdateSketchBytes; +import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class SetOperationTest { + + @Test + public void checkBuilder() { + final int k = 2048; + final long seed = 1021; + + final UpdateSketch usk1 = UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build(); + final UpdateSketch usk2 = UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build(); + + for (int i=0; i Next, we recover the Union SetOp and the 3 sketches and the space for the result. Then + * recompute the union using a Union of the same size as the input sketches, where the end result + * will be an estimate. + */ + @Test + public void checkDirectUnionExample() { + //The first task is to compute how much direct memory we need and set the heap large enough. + //For the first trial, we will set the Union large enough for an exact result for THIS example. + final int sketchNomEntries = 1 << 14; //16K + int unionNomEntries = 1 << 15; //32K + final int[] heapLayout = getHeapLayout(sketchNomEntries, unionNomEntries); + + //This BB belongs to you and you always retain a link to it until you are completely + // done and then let java garbage collect it. + //I use a heap backing array, because for this example it is easier to peak into it and + // see what is going on. + final byte[] backingArr = new byte[heapLayout[5]]; + final ByteBuffer heapBuf = ByteBuffer.wrap(backingArr).order(ByteOrder.nativeOrder()); + + // Attaches a MemorySegment object to the underlying memory of heapBuf. + // heapMem will have a Read/Write view of the complete backing memory of heapBuf (direct or not). + // Any R/W action from heapMem will be visible via heapBuf and visa versa. + // + // However, if you had created this WM object directly in raw, off-heap "native" memory + // you would have the responsibility to close it when you are done. + // But, since it was allocated via BB, it closes it for you. + final MemorySegment heapMem = MemorySegment.ofBuffer(heapBuf); + + double result = directUnionTrial1(heapMem, heapLayout, sketchNomEntries, unionNomEntries); + println("1st est: "+result); + final int expected = sketchNomEntries*2; + assertEquals(result, expected, 0.0); //est must be exact. + + //For trial 2, we will use the same union space but use only part of it. + unionNomEntries = 1 << 14; //16K + result = directUnionTrial2(heapMem, heapLayout, sketchNomEntries, unionNomEntries); + + //intentionally loose bounds + assertEquals(result, expected, expected*0.05); + println("2nd est: "+result); + println("Error %: "+(result/expected -1.0)*100); + } + + @Test + public void setOpsExample() { + println("Set Operations Example:"); + final int k = 4096; + final UpdateSketch skA = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + final UpdateSketch skB = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + final UpdateSketch skC = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + + for (int i=1; i<=10; i++) { skA.update(i); } + for (int i=1; i<=20; i++) { skB.update(i); } + for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set + + final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); + union.union(skA); + union.union(skB); + // ... continue to iterate on the input sketches to union + + final CompactSketch unionSk = union.getResult(); //the result union sketch + println("A U B : "+unionSk.getEstimate()); //the estimate of the union + + //Intersection is similar + + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + inter.intersect(unionSk); + inter.intersect(skC); + // ... continue to iterate on the input sketches to intersect + + final CompactSketch interSk = inter.getResult(); //the result intersection sketch + println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection + + //The AnotB operation is a little different as it is stateless: + + final AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); + final CompactSketch not = aNotB.aNotB(skA, skC); + + println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation + } + + @Test + public void checkIsSameResource() { + final int k = 16; + final MemorySegment wmem = MemorySegment.ofArray(new byte[k*16 + 32]);//288 + final MemorySegment emptyMem = MemorySegment.ofArray(new byte[8]); + final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(wmem); + assertTrue(union.isSameResource(wmem)); + assertFalse(union.isSameResource(emptyMem)); + + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(wmem); + assertTrue(inter.isSameResource(wmem)); + assertFalse(inter.isSameResource(emptyMem)); + + final AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); + + assertFalse(aNotB.isSameResource(emptyMem)); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + + /** + * Compute offsets for MyHeap for Union, sketch1, sketch2, sketch3, resultSketch, total layout. + * @param sketchNomEntries the configured nominal entries of the sketch + * @param unionNomEntries configured nominal entries of the union + * @return array of offsets for Union, sketch1, sketch2, sketch3, resultSketch, total layout + */ + private static int[] getHeapLayout(final int sketchNomEntries, final int unionNomEntries) { + final int[] heapLayout = new int[6]; + final int unionBytes = SetOperation.getMaxUnionBytes(unionNomEntries); + final int sketchBytes = getMaxUpdateSketchBytes(sketchNomEntries); + final int resultBytes = Sketch.getMaxCompactSketchBytes(unionNomEntries); + heapLayout[0] = 0; //offset for Union + heapLayout[1] = unionBytes; //offset for sketch1 + heapLayout[2] = unionBytes + sketchBytes; //offset for sketch2 + heapLayout[3] = unionBytes + 2*sketchBytes; //offset for sketch3 + heapLayout[4] = unionBytes + 3*sketchBytes; //offset for result + heapLayout[5] = unionBytes + 3*sketchBytes + resultBytes; //total + return heapLayout; + } + + private static double directUnionTrial1( + final MemorySegment heapMem, final int[] heapLayout, final int sketchNomEntries, final int unionNomEntries) { + + final int offset = heapLayout[0]; + final int bytes = heapLayout[1] - offset; + final MemorySegment unionMem = heapMem.asSlice(offset, bytes); + + Union union = SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionMem); + + final MemorySegment sketch1mem = heapMem.asSlice(heapLayout[1], heapLayout[2]-heapLayout[1]); + final MemorySegment sketch2mem = heapMem.asSlice(heapLayout[2], heapLayout[3]-heapLayout[2]); + final MemorySegment sketch3mem = heapMem.asSlice(heapLayout[3], heapLayout[4]-heapLayout[3]); + final MemorySegment resultMem = heapMem.asSlice(heapLayout[4], heapLayout[5]-heapLayout[4]); + + //Initialize the 3 sketches + final UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch1mem); + final UpdateSketch sk2 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch2mem); + final UpdateSketch sk3 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch3mem); + + //This little trial has sk1 and sk2 distinct and sk2 overlap both. + //Build the sketches. + for (int i=0; i< sketchNomEntries; i++) { + sk1.update(i); + sk2.update(i + sketchNomEntries/2); + sk3.update(i + sketchNomEntries); + } + + //confirm that each of these 3 sketches is exact. + assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0); + assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0); + assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0); + + //Let's union the first 2 sketches + union.union(sk1); + union.union(sk2); + + //Let's recover the union and the 3rd sketch + union = Sketches.wrapUnion(unionMem); + union.union(Sketch.wrap(sketch3mem)); + + final Sketch resSk = union.getResult(true, resultMem); + final double est = resSk.getEstimate(); + + return est; + } + + private static double directUnionTrial2( + final MemorySegment heapMem, final int[] heapLayout, final int sketchNomEntries, final int unionNomEntries) { + + final MemorySegment unionMem = heapMem.asSlice(heapLayout[0], heapLayout[1]-heapLayout[0]); + final MemorySegment sketch1mem = heapMem.asSlice(heapLayout[1], heapLayout[2]-heapLayout[1]); + final MemorySegment sketch2mem = heapMem.asSlice(heapLayout[2], heapLayout[3]-heapLayout[2]); + final MemorySegment sketch3mem = heapMem.asSlice(heapLayout[3], heapLayout[4]-heapLayout[3]); + final MemorySegment resultMem = heapMem.asSlice(heapLayout[4], heapLayout[5]-heapLayout[4]); + + //Recover the 3 sketches + final UpdateSketch sk1 = (UpdateSketch) Sketch.wrap(sketch1mem); + final UpdateSketch sk2 = (UpdateSketch) Sketch.wrap(sketch2mem); + final UpdateSketch sk3 = (UpdateSketch) Sketch.wrap(sketch3mem); + + //confirm that each of these 3 sketches is exact. + assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0); + assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0); + assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0); + + //Create a new union in the same space with a smaller size. + Util.clear(unionMem); + final Union union = SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionMem); + union.union(sk1); + union.union(sk2); + union.union(sk3); + + final Sketch resSk = union.getResult(true, resultMem); + final double est = resSk.getEstimate(); + + return est; + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java new file mode 100644 index 000000000..6848c224e --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java @@ -0,0 +1,501 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EMPTY; +import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_HEAP; +import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_MEMORY_UNORDERED; +import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EXACT; +import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.NULL; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; + +import java.lang.foreign.MemorySegment; +import java.util.Random; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SetOpsCornerCasesTest { + + /*******************************************/ + Random rand = new Random(9001); //deterministic + + @Test + public void checkSetOpsRandom() { + int hiA = 0, loB = 0, hiB = 0; + for (int i = 0; i < 1000; i++) { + hiA = rand.nextInt(128); //skA fed values between 0 and 127 + loB = rand.nextInt(64); + hiB = loB + rand.nextInt(64); //skB fed up to 63 values starting at loB + compareSetOpsRandom(64, 0, hiA, loB, hiB); + } + } + + private static void compareSetOpsRandom(int k, int loA, int hiA, int loB, int hiB) { + UpdateSketch tskA = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + UpdateSketch tskB = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + + for (int i = loA; i < hiA; i++) { tskA.update(i); } + for (int i = loB; i < hiB; i++) { tskB.update(i); } + + CompactSketch rcskStdU = doStdUnion(tskA, tskB, k, null); + CompactSketch rcskPwU = doPwUnion(tskA, tskB, k); + checkCornerCase(rcskPwU, rcskStdU); + + CompactSketch rcskStdPairU = doStdPairUnion(tskA, tskB, k, null); + checkCornerCase(rcskStdPairU, rcskStdU); + + CompactSketch rcskStdI = doStdIntersection(tskA, tskB, null); + CompactSketch rcskPwI = doPwIntersection(tskA, tskB); + checkCornerCase(rcskPwI, rcskStdI); + + CompactSketch rcskStdPairI = doStdPairIntersection(tskA, tskB, null); + checkCornerCase(rcskStdPairI, rcskStdI); + + CompactSketch rcskStdAnotB = doStdAnotB(tskA, tskB, null); + CompactSketch rcskPwAnotB = doPwAnotB(tskA, tskB); + checkCornerCase(rcskPwAnotB, rcskStdAnotB); + + CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tskA, tskB, null); + checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); + } + + /*******************************************/ + + @Test + //Check all corner cases against standard Union, Intersection, and AnotB. + //The unordered case is not tested + public void compareCornerCases() { + int k = 64; + for (State stateA : State.values()) { + for (State stateB : State.values()) { + if ((stateA == EST_MEMORY_UNORDERED) || (stateB == EST_MEMORY_UNORDERED)) { continue; } + if ((stateA == NULL) || (stateB == NULL)) { continue; } + cornerCaseChecks(stateA, stateB, k); + cornerCaseChecksMemory(stateA, stateB, k); + } + } + } + +// @Test +// public void checkExactNullSpecificCase() { +// cornerCaseChecksMemory(State.EXACT, State.NULL, 64); +// } + + private static void cornerCaseChecksMemory(State stateA, State stateB, int k) { + println("StateA: " + stateA + ", StateB: " + stateB); + CompactSketch tcskA = generate(stateA, k); + CompactSketch tcskB = generate(stateB, k); + + MemorySegment wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxUnionBytes(k)]); + + CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null); + CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k); + checkCornerCase(rcskPwU, rcskStdU); //heap, heap + + rcskStdU = doStdUnion(tcskA, tcskB, k, wseg); + CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, wseg); + checkCornerCase(rcskStdPairU, rcskStdU); //direct, direct + + wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxIntersectionBytes(k)]); + + CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null); + CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB); + checkCornerCase(rcskPwI, rcskStdI); //empty, empty + + rcskStdI = doStdIntersection(tcskA, tcskB, wseg); + CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, wseg); + checkCornerCase(rcskStdPairI, rcskStdI); //empty, empty //direct, direct??? + + wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxAnotBResultBytes(k)]); + + CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null); + CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB); + checkCornerCase(rcskPwAnotB, rcskStdAnotB); //heap, heap + + rcskStdAnotB = doStdAnotB(tcskA, tcskB, wseg); + CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB, wseg); + checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); //direct, heap + } + + private static void cornerCaseChecks(State stateA, State stateB, int k) { + println("StateA: " + stateA + ", StateB: " + stateB); + CompactSketch tcskA = generate(stateA, k); + CompactSketch tcskB = generate(stateB, k); + + CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null); + CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k); + checkCornerCase(rcskPwU, rcskStdU); + + CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, null); + checkCornerCase(rcskStdPairU, rcskStdU); + + CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null); + CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB); + checkCornerCase(rcskPwI, rcskStdI); + + CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, null); + checkCornerCase(rcskStdPairI, rcskStdI); + + CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null); + CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB); + checkCornerCase(rcskPwAnotB, rcskStdAnotB); + + CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB, null); + checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); + } + + private static CompactSketch doStdUnion(Sketch tskA, Sketch tskB, int k, MemorySegment wseg) { + Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); + union.union(tskA); + union.union(tskB); + return union.getResult(true, wseg); + } + + private static CompactSketch doStdPairUnion(Sketch tskA, Sketch tskB, int k, MemorySegment wseg) { + Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); + return union.union(tskA, tskB, true, wseg); + } + + private static CompactSketch doStdIntersection(Sketch tskA, Sketch tskB, MemorySegment wseg) { + Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + inter.intersect(tskA); + inter.intersect(tskB); + return inter.getResult(true, wseg); + } + + private static CompactSketch doStdPairIntersection(Sketch tskA, Sketch tskB, MemorySegment wseg) { + Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + return inter.intersect(tskA, tskB, true, wseg); + } + + private static CompactSketch doStdAnotB(Sketch tskA, Sketch tskB, MemorySegment wseg) { + AnotB anotb = Sketches.setOperationBuilder().buildANotB(); + return anotb.aNotB(tskA, tskB, true, wseg); + } + + private static CompactSketch doStdStatefulAnotB(Sketch tskA, Sketch tskB, MemorySegment wseg) { + AnotB anotb = Sketches.setOperationBuilder().buildANotB(); + anotb.setA(tskA); + anotb.notB(tskB); + anotb.getResult(false); + return anotb.getResult(true, wseg, true); + } + + private static CompactSketch doPwUnion(Sketch tskA, Sketch tskB, int k) { + CompactSketch tcskA, tcskB; + if (tskA == null) { tcskA = null; } + else { tcskA = (tskA instanceof CompactSketch) ? (CompactSketch) tskA : tskA.compact(); } + if (tskB == null) { tcskB = null; } + else { tcskB = (tskB instanceof CompactSketch) ? (CompactSketch) tskB : tskB.compact(); } + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); + return union.union(tcskA, tcskB); + } + + private static CompactSketch doPwIntersection(Sketch tskA, Sketch tskB) { + Intersection inter = SetOperation.builder().buildIntersection(); + return inter.intersect(tskA, tskB); + } + + private static CompactSketch doPwAnotB(Sketch tskA, Sketch tskB) { + AnotB aNotB = SetOperation.builder().buildANotB(); + return aNotB.aNotB(tskA, tskB); + } + + + private static void checkCornerCase(Sketch rskA, Sketch rskB) { + double estA = rskA.getEstimate(); + double estB = rskB.getEstimate(); + boolean emptyA = rskA.isEmpty(); + boolean emptyB = rskB.isEmpty(); + long thetaLongA = rskA.getThetaLong(); + long thetaLongB = rskB.getThetaLong(); + int countA = rskA.getRetainedEntries(true); + int countB = rskB.getRetainedEntries(true); + Assert.assertEquals(estB, estA, 0.0); + Assert.assertEquals(emptyB, emptyA); + Assert.assertEquals(thetaLongB, thetaLongA); + Assert.assertEquals(countB, countA); + Assert.assertEquals(rskA.getClass().getSimpleName(), rskB.getClass().getSimpleName()); + } + + /*******************************************/ + + @Test + public void checkUnionNotOrdered() { + int k = 64; + CompactSketch skNull = generate(NULL, k); + CompactSketch skEmpty = generate(EMPTY, k); + CompactSketch skHeap = generate(EST_HEAP, k); + CompactSketch skHeapUO = generate(EST_MEMORY_UNORDERED, k); + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); + union.union(skNull, skHeapUO); + union.union(skEmpty, skHeapUO); + union.union(skHeapUO, skNull); + union.union(skHeapUO, skEmpty); + union.union(skHeapUO, skHeap); + union.union(skHeap, skHeapUO); + } + + @Test + public void checkSeedHash() { + int k = 64; + UpdateSketch tmp1 = Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build(); + tmp1.update(1); + tmp1.update(3); + CompactSketch skSmallSeed2A = tmp1.compact(true, null); + + UpdateSketch tmp2 = Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build(); + tmp2.update(1); + tmp2.update(2); + CompactSketch skSmallSeed2B = tmp2.compact(true, null); + + CompactSketch skExact = generate(EXACT, k); + CompactSketch skHeap = generate(EST_HEAP, 2 * k); + + Intersection inter = SetOperation.builder().buildIntersection(); + AnotB aNotB = SetOperation.builder().buildANotB(); + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); + + //Intersect + try { + inter.intersect(skExact, skSmallSeed2A); + Assert.fail(); + } catch (Exception e) { } //pass + try { + inter.intersect(skExact, skSmallSeed2B); + Assert.fail(); + } catch (Exception e) { } //pass + try { + inter.intersect(skSmallSeed2B, skExact); + Assert.fail(); + } catch (Exception e) { } //pass + try { + inter.intersect(skHeap, skSmallSeed2B); + Assert.fail(); + } catch (Exception e) { } //pass + //A NOT B + try { + aNotB.aNotB(skExact, skSmallSeed2A); + Assert.fail(); + } catch (Exception e) { } //pass + try { + aNotB.aNotB(skExact, skSmallSeed2B); + Assert.fail(); + } catch (Exception e) { } //pass + try { + aNotB.aNotB(skSmallSeed2B, skExact); + Assert.fail(); + } catch (Exception e) { } //pass + try { + aNotB.aNotB(skHeap, skSmallSeed2B); + Assert.fail(); + } catch (Exception e) { } //pass + //Union + try { + union.union(skExact, skSmallSeed2A); + Assert.fail(); + } catch (Exception e) { } //pass + try { + union.union(skExact, skSmallSeed2B); + Assert.fail(); + } catch (Exception e) { } //pass + try { + union.union(skSmallSeed2B, skExact); + Assert.fail(); + } catch (Exception e) { } //pass + try { + union.union(skHeap, skSmallSeed2B); + Assert.fail(); + } catch (Exception e) { } //pass + } + + @Test + public void checkPwUnionReduceToK() { + int k = 16; + CompactSketch skNull = generate(NULL, k); + CompactSketch skEmpty = generate(EMPTY, k); + CompactSketch skHeap1 = generate(EST_HEAP, k); + CompactSketch skHeap2 = generate(EST_HEAP, k); + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); + CompactSketch csk; + csk = union.union(skNull, skHeap1); + Assert.assertEquals(csk.getRetainedEntries(true), k); + csk = union.union(skEmpty, skHeap1); + Assert.assertEquals(csk.getRetainedEntries(true), k); + csk = union.union(skHeap1, skNull); + Assert.assertEquals(csk.getRetainedEntries(true), k); + csk = union.union(skHeap1, skEmpty); + Assert.assertEquals(csk.getRetainedEntries(true), k); + csk = union.union(skHeap1, skHeap2); + Assert.assertEquals(csk.getRetainedEntries(true), k); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); //disable here + } + + @Test + public void checkGenerator() { + int k = 16; + CompactSketch csk; + + csk = generate(State.NULL, 0); + assertNull(csk); + + csk = generate(State.EMPTY, k); + assertEquals(csk.isEmpty(), true); + assertEquals(csk.isEstimationMode(), false); + assertEquals(csk.getRetainedEntries(true), 0); + assertEquals(csk.getThetaLong(), Long.MAX_VALUE); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), false); + assertEquals(csk.isOrdered(), true); + + csk = generate(State.SINGLE, k); + assertEquals(csk.isEmpty(), false); + assertEquals(csk.isEstimationMode(), false); + assertEquals(csk.getRetainedEntries(true), 1); + assertEquals(csk.getThetaLong(), Long.MAX_VALUE); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), false); + assertEquals(csk.isOrdered(), true); + + csk = generate(State.EXACT, k); + assertEquals(csk.isEmpty(), false); + assertEquals(csk.isEstimationMode(), false); + assertEquals(csk.getRetainedEntries(true), k); + assertEquals(csk.getThetaLong(), Long.MAX_VALUE); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), false); + assertEquals(csk.isOrdered(), true); + + csk = generate(State.EST_HEAP, k); + assertEquals(csk.isEmpty(), false); + assertEquals(csk.isEstimationMode(), true); + assertEquals(csk.getRetainedEntries(true) > k, true); + assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), false); + assertEquals(csk.isOrdered(), true); + + csk = generate(State.THLT1_CNT0_FALSE, k); + assertEquals(csk.isEmpty(), false); + assertEquals(csk.isEstimationMode(), true); + assertEquals(csk.getRetainedEntries(true), 0); + assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), false); + assertEquals(csk.isOrdered(), true); + + csk = generate(State.THEQ1_CNT0_TRUE, k); + assertEquals(csk.isEmpty(), true); + assertEquals(csk.isEstimationMode(), false); + assertEquals(csk.getRetainedEntries(true), 0); + assertEquals(csk.getThetaLong() < Long.MAX_VALUE, false); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), false); + assertEquals(csk.isOrdered(), true); + + csk = generate(State.EST_MEMORY_UNORDERED, k); + assertEquals(csk.isEmpty(), false); + assertEquals(csk.isEstimationMode(), true); + assertEquals(csk.getRetainedEntries(true) > k, true); + assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); + assertEquals(csk.isDirect(), false); + assertEquals(csk.hasMemorySegment(), true); + assertEquals(csk.isOrdered(), false); + } + + enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_MEMORY_UNORDERED} + + private static CompactSketch generate(State state, int k) { + UpdateSketch sk = null; + CompactSketch csk = null; + + switch(state) { + case NULL : { + //already null + break; + } + case EMPTY : { //results in EmptyCompactSketch + csk = Sketches.updateSketchBuilder().setNominalEntries(k).build().compact(true, null); + break; + } + case SINGLE : { //results in SingleItemSketches most of the time + sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + sk.update(1); + csk = sk.compact(true, null); + break; + } + case EXACT : { + sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + for (int i = 0; i < k; i++) { + sk.update(i); + } + csk = sk.compact(true, null); + break; + } + case EST_HEAP : { + sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + for (int i = 0; i < (4 * k); i++) { + sk.update(i); + } + csk = sk.compact(true, null); + break; + } + case THLT1_CNT0_FALSE : { + sk = Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build(); + sk.update(7); //above theta + assert(sk.getRetainedEntries(true) == 0); + csk = sk.compact(true, null); //compact as {Th < 1.0, 0, F} + break; + } + case THEQ1_CNT0_TRUE : { + sk = Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build(); + assert(sk.getRetainedEntries(true) == 0); + csk = sk.compact(true, null); //compact as {Th < 1.0, 0, T} + break; + } + case EST_MEMORY_UNORDERED : { + sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); + for (int i = 0; i < (4 * k); i++) { + sk.update(i); + } + int bytes = Sketch.getMaxCompactSketchBytes(sk.getRetainedEntries(true)); + byte[] byteArr = new byte[bytes]; + MemorySegment wseg = MemorySegment.ofArray(byteArr); + csk = sk.compact(false, wseg); + break; + } + } + return csk; + } + +} diff --git a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java new file mode 100644 index 000000000..277aae961 --- /dev/null +++ b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.theta2; + +import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; +import static org.apache.datasketches.theta2.Sketches.getCompactSketchMaxBytes; +import static org.apache.datasketches.theta2.Sketches.getMaxCompactSketchBytes; +import static org.apache.datasketches.theta2.Sketches.getMaxIntersectionBytes; +import static org.apache.datasketches.theta2.Sketches.getMaxUnionBytes; +import static org.apache.datasketches.theta2.Sketches.getMaxUpdateSketchBytes; +import static org.apache.datasketches.theta2.Sketches.getSerializationVersion; +import static org.apache.datasketches.theta2.Sketches.heapifySetOperation; +import static org.apache.datasketches.theta2.Sketches.heapifySketch; +import static org.apache.datasketches.theta2.Sketches.setOperationBuilder; +import static org.apache.datasketches.theta2.Sketches.updateSketchBuilder; +import static org.apache.datasketches.theta2.Sketches.wrapSetOperation; +import static org.apache.datasketches.theta2.Sketches.wrapSketch; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class SketchesTest { + + private static MemorySegment getCompactSketchMemory(final int k, final int from, final int to) { + final UpdateSketch sk1 = updateSketchBuilder().setNominalEntries(k).build(); + for (int i=from; i previous); + previous = it.get(); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppCompressed() throws IOException { + final int[] nArr = {10, 100, 1000, 10000, 100000, 1000000}; + for (int n: nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_compressed_n" + n + "_cpp.sk")); + final CompactSketch sketch = CompactSketch.wrap(MemorySegment.ofArray(bytes)); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + assertTrue(sketch.isOrdered()); + final HashIterator it = sketch.iterator(); + long previous = 0; + while (it.next()) { + assertTrue(it.get() < sketch.getThetaLong()); + assertTrue(it.get() > previous); + previous = it.get(); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppNonEmptyNoEntries() throws IOException { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_non_empty_no_entries_cpp.sk")); + final CompactSketch sketch = CompactSketch.wrap(MemorySegment.ofArray(bytes)); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getRetainedEntries(), 0); + } + +} diff --git a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java new file mode 100644 index 000000000..88dd009c0 --- /dev/null +++ b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.theta2.CompactSketch; +import org.apache.datasketches.theta2.Intersection; +import org.apache.datasketches.theta2.Sketches; +import org.apache.datasketches.theta2.UpdateSketch; +import org.testng.annotations.Test; + +public class BoundsOnRatiosInThetaSketchedSets2Test { + + @Test + public void checkNormalReturns() { + final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K + final UpdateSketch skC = Sketches.updateSketchBuilder().build(); + final int uA = 10000; + final int uC = 100000; + for (int i = 0; i < uA; i++) { skA.update(i); } + for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); + inter.intersect(skA); + inter.intersect(skC); + final CompactSketch skB = inter.getResult(); + + double est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB); + double lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB); + double ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB); + assertTrue(ub > est); + assertTrue(est > lb); + assertEquals(est, 0.5, .03); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + skA.reset(); //skA is now empty + est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB); + lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB); + ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + skC.reset(); //Now both are empty + est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC); + lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skC); + ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skC); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkAbnormalReturns() { + final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K + final UpdateSketch skC = Sketches.updateSketchBuilder().build(); + final int uA = 100000; + final int uC = 10000; + for (int i = 0; i < uA; i++) { skA.update(i); } + for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } + BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC); + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } +} From e0a9710ebde3030f6a40d1a134058c6e40250e6e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 16 Jun 2025 16:54:17 -0700 Subject: [PATCH 13/25] Remove use of aligned heap segments. Consider in the future. --- src/main/java/org/apache/datasketches/common/Util.java | 4 ++-- .../datasketches/theta2/DirectQuickSelectSketch.java | 10 +--------- .../datasketches/theta2/BackwardConversions.java | 2 +- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 4701ddf9e..493ad7879 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -909,7 +909,7 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme } /** - * Request a new heap MemorySegment with the given capacityBytes and 8-byte aligned or one byte aligned. + * Request a new heap MemorySegment with the given capacityBytes and either 8-byte aligned or one byte aligned. * *

If aligned is true, the returned MemorySegment will be constructed from a long[] array, * and, as a result, it will have a memory alignment of 8 bytes. @@ -923,7 +923,7 @@ public static boolean isSameResource(final MemorySegment seg1, final MemorySegme * @param aligned if true, the new heap segment will have an alignment of 8 bytes, otherwise the alignment will be 1 byte. * @return a new MemorySegment with the requested capacity and alignment. */ - public static MemorySegment newHeapSegment(final int capacityBytes, final boolean aligned) { + public static MemorySegment alignedHeapSegment(final int capacityBytes, final boolean aligned) { if (aligned) { final int lenLongs = capacityBytes >>> 3; final long[] array = ((capacityBytes & 0x7) == 0) diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java index 193385a1f..213dd7f4a 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java @@ -24,7 +24,6 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.common.Util.newHeapSegment; import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; @@ -316,14 +315,7 @@ UpdateReturnState hashUpdate(final long hash) { tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1); final int tgtArrBytes = 8 << tgtLgArrLongs; final int reqBytes = tgtArrBytes + preBytes; - - //memReqSvr_ = (memReqSvr_ == null) ? wseg_.getMemoryRequestServer() : memReqSvr_; - //if (memReqSvr_ == null) { //in case the MRS is not enabled or null. - // throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); - //} - //final MemorySegment newDstSeg = memReqSvr_.request(wseg_, reqBytes); - - final MemorySegment newDstSeg = newHeapSegment(reqBytes, false); + final MemorySegment newDstSeg = MemorySegment.ofArray(new byte[reqBytes]); moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong); wseg_ = newDstSeg; diff --git a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java index bec67b219..74aec9bb8 100644 --- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java +++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java @@ -220,7 +220,7 @@ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, fi final int entries = skV3.getRetainedEntries(true); final boolean unordered = !(skV3.isOrdered()); final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE - wseg = Util.newHeapSegment((preLongs + entries) << 3, false); + wseg = MemorySegment.ofArray(new byte[(preLongs + entries) << 3]); wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch From 6b2d7ab2dd0fbb4e4e543c7c4c2f003ff81bed31 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 18 Jun 2025 15:01:56 -0700 Subject: [PATCH 14/25] Make classes Final where possible Make use of MemorySegmentStatus where required --- .../apache/datasketches/theta2/BitPacking.java | 4 +++- .../BytesCompactCompressedHashIterator.java | 2 +- .../theta2/BytesCompactHashIterator.java | 2 +- .../ConcurrentBackgroundThetaPropagation.java | 2 +- .../theta2/DirectCompactCompressedSketch.java | 2 +- .../theta2/ForwardCompatibility.java | 2 ++ .../theta2/HeapCompactHashIterator.java | 2 +- .../datasketches/theta2/HeapCompactSketch.java | 2 +- .../datasketches/theta2/HeapHashIterator.java | 2 +- .../datasketches/theta2/IntersectionImpl.java | 2 +- .../datasketches/theta2/JaccardSimilarity.java | 2 ++ .../MemoryCompactCompressedHashIterator.java | 2 +- .../datasketches/theta2/MemoryHashIterator.java | 2 +- .../theta2/SetOperationBuilder.java | 2 +- .../org/apache/datasketches/theta2/Sketch.java | 17 ----------------- .../theta2/UpdateSketchBuilder.java | 2 +- .../theta2/WrappedCompactCompressedSketch.java | 6 +++--- 17 files changed, 22 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/apache/datasketches/theta2/BitPacking.java b/src/main/java/org/apache/datasketches/theta2/BitPacking.java index e2b6be2fd..66d5a245e 100644 --- a/src/main/java/org/apache/datasketches/theta2/BitPacking.java +++ b/src/main/java/org/apache/datasketches/theta2/BitPacking.java @@ -24,7 +24,9 @@ /** * Used as part of Theta compression. */ -public class BitPacking { +public final class BitPacking { + + private BitPacking() { } /** * The bit packing operation diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java index 81a985922..6a2ddddd7 100644 --- a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java @@ -22,7 +22,7 @@ /* * This is to uncompress serial version 4 sketch incrementally */ -class BytesCompactCompressedHashIterator implements HashIterator { +final class BytesCompactCompressedHashIterator implements HashIterator { private byte[] bytes; private int offset; private int entryBits; diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java index 9a4754574..3586f54c4 100644 --- a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java @@ -24,7 +24,7 @@ /* * This is to iterate over serial version 3 sketch representation */ -class BytesCompactHashIterator implements HashIterator { +final class BytesCompactHashIterator implements HashIterator { final private byte[] bytes; final private int offset; final private int numEntries; diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java index 2d529c4ce..f578dc6a1 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java @@ -29,7 +29,7 @@ * * @author eshcar */ -class ConcurrentBackgroundThetaPropagation implements Runnable { +final class ConcurrentBackgroundThetaPropagation implements Runnable { // Shared sketch to absorb the data private final ConcurrentSharedThetaSketch sharedThetaSketch; diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java index 9be51c379..8ed907321 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java @@ -40,7 +40,7 @@ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

*/ -class DirectCompactCompressedSketch extends DirectCompactSketch { +final class DirectCompactCompressedSketch extends DirectCompactSketch { /** * Construct this sketch with the given MemorySegment. * @param seg Read-only MemorySegment object. diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java index 9791a7902..a6635653c 100644 --- a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java +++ b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java @@ -38,6 +38,8 @@ */ final class ForwardCompatibility { + private ForwardCompatibility() { } + /** * Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch. * Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java index b10ffcaaf..bd06f6ecd 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java @@ -19,7 +19,7 @@ package org.apache.datasketches.theta2; -class HeapCompactHashIterator implements HashIterator { +final class HeapCompactHashIterator implements HashIterator { private long[] cache; private int index; diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java index 6cffd9818..1c0cbb0cc 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java @@ -38,7 +38,7 @@ * * @author Lee Rhodes */ -class HeapCompactSketch extends CompactSketch { +final class HeapCompactSketch extends CompactSketch { private final long thetaLong_; //computed private final int curCount_; private final int preLongs_; //computed diff --git a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java index c2b098c25..29ae42a0e 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java @@ -22,7 +22,7 @@ /** * @author Lee Rhodes */ -class HeapHashIterator implements HashIterator { +final class HeapHashIterator implements HashIterator { private long[] cache; private long thetaLong; private int index; diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java index 92ca096c3..74228b1fa 100644 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -79,7 +79,7 @@ * @author Lee Rhodes * @author Kevin Lang */ -class IntersectionImpl extends Intersection { +final class IntersectionImpl extends Intersection { protected final short seedHash_; protected final boolean readOnly_; //True if this sketch is to be treated as read only protected final MemorySegment wseg_; diff --git a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java index 624dcc3d7..de5fff58c 100644 --- a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java +++ b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java @@ -37,6 +37,8 @@ public final class JaccardSimilarity { private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB private static final double[] ONES = {1.0, 1.0, 1.0}; + private JaccardSimilarity() { } + /** * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java index 31aa6ff92..11d0168a0 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java @@ -30,7 +30,7 @@ /* * This is to uncompress serial version 4 sketch incrementally */ -class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus { +final class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus { private MemorySegment seg; private int offset; private int entryBits; diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java index eb2137afd..3022d59ff 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java @@ -26,7 +26,7 @@ /** * @author Lee Rhodes */ -class MemoryHashIterator implements HashIterator { +final class MemoryHashIterator implements HashIterator { private MemorySegment seg; private int arrLongs; private long thetaLong; diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java index cf64326b8..5a05a7d1f 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java @@ -35,7 +35,7 @@ * * @author Lee Rhodes */ -public class SetOperationBuilder { +public final class SetOperationBuilder { private int bLgNomLongs; private long bSeed; private ResizeFactor bRF; diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java index e98396842..5d619c580 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -409,23 +409,6 @@ public boolean isEstimationMode() { */ public abstract boolean isOrdered(); - /** - * Returns true if the backing MemorySegment of this object refers to the same MemorySegment of that. - * They can either have the same off-heap memory location and size, or refer to the same on-heap array object. - * - *

If both segment are off-heap, they both must have the same starting address and the same size.

- * - *

For on-heap segments, both segments must be based on or derived from the same array object and neither segment - * can be read-only.

- * - *

Returns false if either argument is null;

- * - * @param that The given MemorySegment. - * @return true if the backing MemorySegment of this object hierarchy refers to the same MemorySegment of that. - */ - @Override - public abstract boolean isSameResource(final MemorySegment that); - /** * Returns a HashIterator that can be used to iterate over the retained hash values of the * Theta sketch. diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java index e8353888f..0326ceb06 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java @@ -37,7 +37,7 @@ * * @author Lee Rhodes */ -public class UpdateSketchBuilder { +public final class UpdateSketchBuilder { private int bLgNomLongs; private long bSeed; private ResizeFactor bRF; diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java index 1558c49e7..3ba16c3fa 100644 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java @@ -32,8 +32,8 @@ * *

This sketch can only be associated with a Serialization Version 4 format binary image.

*/ -class WrappedCompactCompressedSketch extends WrappedCompactSketch { - +final class WrappedCompactCompressedSketch extends WrappedCompactSketch { + /** * Construct this sketch with the given bytes. * @param bytes containing serialized compact compressed sketch. @@ -66,7 +66,7 @@ public int getCurrentBytes() { private static final int START_PACKED_DATA_EXACT_MODE = 8; private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; - + @Override public int getRetainedEntries(final boolean valid) { //compact is always valid // number of entries is stored using variable length encoding From 8fffa8c36c940a0fda82e212408e31dd525e12e3 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 24 Jun 2025 15:46:49 -0700 Subject: [PATCH 15/25] All of Tuple2 converted to FFM --- .../org/apache/datasketches/common/Util.java | 2 +- .../BoundsOnRatiosInTupleSketchedSets2.java | 204 ++++++ .../org/apache/datasketches/tuple2/AnotB.java | 636 ++++++++++++++++++ .../datasketches/tuple2/CompactSketch.java | 256 +++++++ .../tuple2/DeserializeResult.java | 55 ++ .../apache/datasketches/tuple2/Filter.java | 76 +++ .../datasketches/tuple2/HashTables.java | 169 +++++ .../datasketches/tuple2/Intersection.java | 254 +++++++ .../tuple2/JaccardSimilarity.java | 370 ++++++++++ .../tuple2/QuickSelectSketch.java | 621 +++++++++++++++++ .../tuple2/SerializerDeserializer.java | 99 +++ .../apache/datasketches/tuple2/Sketch.java | 224 ++++++ .../apache/datasketches/tuple2/Sketches.java | 72 ++ .../apache/datasketches/tuple2/Summary.java | 46 ++ .../tuple2/SummaryDeserializer.java | 42 ++ .../datasketches/tuple2/SummaryFactory.java | 34 + .../tuple2/SummarySetOperations.java | 56 ++ .../tuple2/TupleSketchIterator.java | 75 +++ .../org/apache/datasketches/tuple2/Union.java | 225 +++++++ .../datasketches/tuple2/UpdatableSketch.java | 190 ++++++ .../tuple2/UpdatableSketchBuilder.java | 107 +++ .../datasketches/tuple2/UpdatableSummary.java | 36 + .../org/apache/datasketches/tuple2/Util.java | 172 +++++ .../tuple2/adouble/DoubleSketch.java | 85 +++ .../tuple2/adouble/DoubleSummary.java | 162 +++++ .../adouble/DoubleSummaryDeserializer.java | 38 ++ .../tuple2/adouble/DoubleSummaryFactory.java | 46 ++ .../adouble/DoubleSummarySetOperations.java | 83 +++ .../tuple2/adouble/package-info.java | 23 + .../tuple2/aninteger/IntegerSketch.java | 86 +++ .../tuple2/aninteger/IntegerSummary.java | 162 +++++ .../aninteger/IntegerSummaryDeserializer.java | 38 ++ .../aninteger/IntegerSummaryFactory.java | 46 ++ .../IntegerSummarySetOperations.java | 67 ++ .../tuple2/aninteger/package-info.java | 23 + .../arrayofdoubles/ArrayOfDoublesAnotB.java | 57 ++ .../ArrayOfDoublesAnotBImpl.java | 237 +++++++ .../ArrayOfDoublesCombiner.java | 35 + .../ArrayOfDoublesCompactSketch.java | 64 ++ .../ArrayOfDoublesIntersection.java | 184 +++++ .../ArrayOfDoublesQuickSelectSketch.java | 196 ++++++ .../ArrayOfDoublesSetOperationBuilder.java | 137 ++++ .../arrayofdoubles/ArrayOfDoublesSketch.java | 290 ++++++++ .../ArrayOfDoublesSketchIterator.java | 50 ++ .../ArrayOfDoublesSketches.java | 145 ++++ .../arrayofdoubles/ArrayOfDoublesUnion.java | 207 ++++++ .../ArrayOfDoublesUpdatableSketch.java | 229 +++++++ .../ArrayOfDoublesUpdatableSketchBuilder.java | 131 ++++ .../DirectArrayOfDoublesCompactSketch.java | 288 ++++++++ .../DirectArrayOfDoublesIntersection.java | 52 ++ ...DirectArrayOfDoublesQuickSelectSketch.java | 433 ++++++++++++ ...irectArrayOfDoublesQuickSelectSketchR.java | 42 ++ .../DirectArrayOfDoublesSketchIterator.java | 83 +++ .../DirectArrayOfDoublesUnion.java | 92 +++ .../DirectArrayOfDoublesUnionR.java | 47 ++ .../tuple2/arrayofdoubles/HashTables.java | 130 ++++ .../HeapArrayOfDoublesCompactSketch.java | 233 +++++++ .../HeapArrayOfDoublesIntersection.java | 42 ++ .../HeapArrayOfDoublesQuickSelectSketch.java | 363 ++++++++++ .../HeapArrayOfDoublesSketchIterator.java | 65 ++ .../HeapArrayOfDoublesUnion.java | 73 ++ .../tuple2/arrayofdoubles/package-info.java | 24 + .../datasketches/tuple2/package-info.java | 25 + .../tuple2/strings/ArrayOfStringsSketch.java | 103 +++ .../tuple2/strings/ArrayOfStringsSummary.java | 185 +++++ .../ArrayOfStringsSummaryDeserializer.java | 51 ++ .../strings/ArrayOfStringsSummaryFactory.java | 35 + .../ArrayOfStringsSummarySetOperations.java | 40 ++ .../tuple2/strings/package-info.java | 24 + .../CompactSketchWithDoubleSummaryTest.java | 189 ++++++ .../datasketches/tuple2/IntegerSummary.java | 81 +++ .../tuple2/IntegerSummaryDeserializer.java | 31 + .../tuple2/IntegerSummaryFactory.java | 32 + .../tuple2/JaccardSimilarityTest.java | 457 +++++++++++++ .../apache/datasketches/tuple2/MiscTest.java | 95 +++ .../tuple2/ReadOnlyMemoryTest.java | 121 ++++ .../tuple2/SerializerDeserializerTest.java | 59 ++ .../tuple2/TupleCrossLanguageTest.java | 126 ++++ .../tuple2/TupleExamples2Test.java | 287 ++++++++ .../tuple2/TupleExamplesTest.java | 191 ++++++ .../tuple2/adouble/AdoubleAnotBTest.java | 299 ++++++++ .../adouble/AdoubleIntersectionTest.java | 305 +++++++++ .../tuple2/adouble/AdoubleTest.java | 421 ++++++++++++ .../tuple2/adouble/AdoubleUnionTest.java | 173 +++++ .../tuple2/adouble/FilterTest.java | 152 +++++ .../CornerCaseTupleSetOperationsTest.java | 630 +++++++++++++++++ .../tuple2/aninteger/EngagementTest.java | 143 ++++ .../tuple2/aninteger/IntegerSketchTest.java | 140 ++++ .../aninteger/MikhailsBugTupleTest.java | 74 ++ .../aninteger/ParameterLeakageTest.java | 180 +++++ .../AodSketchCrossLanguageTest.java | 118 ++++ .../ArrayOfDoublesAnotBTest.java | 323 +++++++++ .../ArrayOfDoublesCompactSketchTest.java | 139 ++++ .../ArrayOfDoublesIntersectionTest.java | 311 +++++++++ .../ArrayOfDoublesQuickSelectSketchTest.java | 164 +++++ .../ArrayOfDoublesUnionTest.java | 513 ++++++++++++++ ...erCaseArrayOfDoublesSetOperationsTest.java | 581 ++++++++++++++++ ...DirectArrayOfDoublesCompactSketchTest.java | 134 ++++ ...ctArrayOfDoublesQuickSelectSketchTest.java | 281 ++++++++ .../HeapArrayOfDoublesCompactSketchTest.java | 137 ++++ ...apArrayOfDoublesQuickSelectSketchTest.java | 244 +++++++ 101 files changed, 16402 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/AnotB.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/CompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Filter.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/HashTables.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Intersection.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Sketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Sketches.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Summary.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Union.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/Util.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/package-info.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java create mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/package-info.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/MiscTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 493ad7879..88b7ad7b7 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -937,7 +937,7 @@ public static MemorySegment alignedHeapSegment(final int capacityBytes, final bo /** * Sets the bits defined by the bitMask * @param seg the given MemorySegment - * @param offsetBytes offset bytes relative to this Memory start + * @param offsetBytes offset bytes relative to this MemorySegment start * @param bitMask the bits set to one will be set */ public static void setBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) { diff --git a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java new file mode 100644 index 000000000..ea8a20828 --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon; + +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; + +import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.Summary; + +/** + * This class is used to compute the bounds on the estimate of the ratio B / A, where: + *
    + *
  • A is a Tuple Sketch of population PopA.
  • + *
  • B is a Tuple or Theta Sketch of population PopB that is a subset of A, + * obtained by an intersection of A with some other Tuple or Theta Sketch C, + * which acts like a predicate or selection clause.
  • + *
  • The estimate of the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
  • + *
  • The Upper Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
  • + *
  • The Lower Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
  • + *
+ * Note: The theta of A cannot be greater than the theta of B. + * If B is formed as an intersection of A and some other set C, + * then the theta of B is guaranteed to be less than or equal to the theta of B. + * + * @author Kevin Lang + * @author Lee Rhodes + * @author David Cromberge + */ +public final class BoundsOnRatiosInTupleSketchedSets2 { + + private BoundsOnRatiosInTupleSketchedSets2() {} + + /** + * Gets the approximate lower bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Tuple sketch B with summary type S + * @param Summary + * @return the approximate lower bound for B over A + */ + public static double getLowerBoundForBoverA( + final Sketch sketchA, + final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate lower bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Theta sketch B + * @param Summary + * @return the approximate lower bound for B over A + */ + public static double getLowerBoundForBoverA( + final Sketch sketchA, + final org.apache.datasketches.theta.Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate upper bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Tuple sketch B with summary type S + * @param Summary + * @return the approximate upper bound for B over A + */ + public static double getUpperBoundForBoverA( + final Sketch sketchA, + final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 1.0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate upper bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Theta sketch B + * @param Summary + * @return the approximate upper bound for B over A + */ + public static double getUpperBoundForBoverA( + final Sketch sketchA, + final org.apache.datasketches.theta.Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 1.0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); + } + + /** + * Gets the estimate for B over A + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Tuple sketch B with summary type S + * @param Summary + * @return the estimate for B over A + */ + public static double getEstimateOfBoverA( + final Sketch sketchA, + final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0.5; } + + return (double) countB / (double) countA; + } + + /** + * Gets the estimate for B over A + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Theta sketch B + * @param Summary + * @return the estimate for B over A + */ + public static double getEstimateOfBoverA( + final Sketch sketchA, + final org.apache.datasketches.theta.Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0.5; } + + return (double) countB / (double) countA; + } + + static void checkThetas(final long thetaLongA, final long thetaLongB) { + if (thetaLongB > thetaLongA) { + throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); + } + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/AnotB.java b/src/main/java/org/apache/datasketches/tuple2/AnotB.java new file mode 100644 index 000000000..46ff084ae --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/AnotB.java @@ -0,0 +1,636 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.exactLog2OfLong; +import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; + +import java.lang.reflect.Method; +import java.util.Arrays; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.thetacommon.SetOperationCornerCases; +import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; +import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Computes a set difference, A-AND-NOT-B, of two generic tuple sketches. + * This class includes both stateful and stateless operations. + * + *

The stateful operation is as follows:

+ *

+ * AnotB anotb = new AnotB();
+ *
+ * anotb.setA(Sketch skA); //The first argument.
+ * anotb.notB(Sketch skB); //The second (subtraction) argument.
+ * anotb.notB(Sketch skC); // ...any number of additional subtractions...
+ * anotb.getResult(false); //Get an interim result.
+ * anotb.notB(Sketch skD); //Additional subtractions.
+ * anotb.getResult(true);  //Final result and resets the AnotB operator.
+ * 
+ * + *

The stateless operation is as follows:

+ *

+ * AnotB anotb = new AnotB();
+ *
+ * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
+ * 
+ * + *

Calling the setA operation a second time essentially clears the internal state and loads + * the new sketch.

+ * + *

The stateless and stateful operations are independent of each other.

+ * + * @param Type of Summary + * + * @author Lee Rhodes + */ +@SuppressFBWarnings(value = "DP_DO_INSIDE_DO_PRIVILEGED", justification = "Defer fix") +public final class AnotB { + private boolean empty_ = true; + private long thetaLong_ = Long.MAX_VALUE; + private long[] hashArr_ = null; //always in compact form, not necessarily sorted + private S[] summaryArr_ = null; //always in compact form, not necessarily sorted + private int curCount_ = 0; + + private static final Method GET_CACHE; + + static { + try { + GET_CACHE = org.apache.datasketches.theta2.Sketch.class.getDeclaredMethod("getCache"); + GET_CACHE.setAccessible(true); + } catch (final Exception e) { + throw new SketchesStateException("Could not reflect getCache(): " + e); + } + } + + /** + * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the + * first argument A of A-AND-NOT-B. This overwrites the internal state of this + * AnotB operator with the contents of the given sketch. + * This sets the stage for multiple following notB steps. + * + *

An input argument of null will throw an exception.

+ * + *

Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. + * That is distinctly different from the java null, which represents a nonexistent object. + * In most cases it is a programming error due to some object that was not properly initialized. + * With a null as the first argument, we cannot know what the user's intent is. + * Since it is very likely that a null is a programming error, we throw a an exception.

+ * + *

An empty input argument will set the internal state to empty.

+ * + *

Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent, + * valid argument for B irrelevant, we must allow this and assume the user knows what they are + * doing.

+ * + *

Performing {@link #getResult(boolean)} just after this step will return a compact form of + * the given argument.

+ * + * @param skA The incoming sketch for the first argument, A. + */ + public void setA(final Sketch skA) { + if (skA == null) { + reset(); + throw new SketchesArgumentException("The input argument A may not be null"); + } + + empty_ = skA.isEmpty(); + thetaLong_ = skA.getThetaLong(); + final DataArrays da = getCopyOfDataArraysTuple(skA); + summaryArr_ = da.summaryArr; //it may be null + hashArr_ = da.hashArr; //it may be null + curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; + } + + /** + * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the + * second (or n+1th) argument B of A-AND-NOT-B. + * Performs an AND NOT operation with the existing internal state of this AnotB operator. + * + *

An input argument of null or empty is ignored.

+ * + *

Rationale: A null for the second or following arguments is more tolerable because + * A NOT null is still A even if we don't know exactly what the null represents. It + * clearly does not have any content that overlaps with A. Also, because this can be part of + * a multistep operation with multiple notB steps. Other following steps can still produce + * a valid result.

+ * + *

Use {@link #getResult(boolean)} to obtain the result.

+ * + * @param skB The incoming Tuple sketch for the second (or following) argument B. + */ + public void notB(final Sketch skB) { + if (skB == null) { return; } //ignore + + final long thetaLongB = skB.getThetaLong(); + final int countB = skB.getRetainedEntries(); + final boolean emptyB = skB.isEmpty(); + + final int id = + SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB); + final CornerCase cCase = CornerCase.caseIdToCornerCase(id); + final AnotbAction anotbAction = cCase.getAnotbAction(); + + switch (anotbAction) { + case EMPTY_1_0_T: { + reset(); + break; + } + case DEGEN_MIN_0_F: { + reset(); + thetaLong_ = min(thetaLong_, thetaLongB); + empty_ = false; + break; + } + case DEGEN_THA_0_F: { + empty_ = false; + curCount_ = 0; + //thetaLong_ is ok + break; + } + case TRIM_A: { + thetaLong_ = min(thetaLong_, thetaLongB); + final DataArrays da = trimAndCopyDataArrays(hashArr_, summaryArr_, thetaLong_, true); + hashArr_ = da.hashArr; + curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; + summaryArr_ = da.summaryArr; + //empty_ = is whatever SkA is, + break; + } + case SKETCH_A: { + break; //result is already in A + } + case FULL_ANOTB: { //both A and B should have valid entries. + thetaLong_ = min(thetaLong_, thetaLongB); + final DataArrays daR = getCopyOfResultArraysTuple(thetaLong_, curCount_, hashArr_, summaryArr_, skB); + hashArr_ = daR.hashArr; + curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; + summaryArr_ = daR.summaryArr; + //empty_ = is whatever SkA is, + } + //default: not possible + } + } + + /** + * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the + * second (or n+1th) argument B of A-AND-NOT-B. + * Performs an AND NOT operation with the existing internal state of this AnotB operator. + * Calls to this method can be intermingled with calls to + * {@link #notB(org.apache.datasketches.theta2.Sketch)}. + * + *

An input argument of null or empty is ignored.

+ * + *

Rationale: A null for the second or following arguments is more tolerable because + * A NOT null is still A even if we don't know exactly what the null represents. It + * clearly does not have any content that overlaps with A. Also, because this can be part of + * a multistep operation with multiple notB steps. Other following steps can still produce + * a valid result.

+ * + *

Use {@link #getResult(boolean)} to obtain the result.

+ * + * @param skB The incoming Theta sketch for the second (or following) argument B. + */ + public void notB(final org.apache.datasketches.theta2.Sketch skB) { + if (skB == null) { return; } //ignore + + final long thetaLongB = skB.getThetaLong(); + final int countB = skB.getRetainedEntries(); + final boolean emptyB = skB.isEmpty(); + + final int id = + SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB); + final CornerCase cCase = CornerCase.caseIdToCornerCase(id); + final AnotbAction anotbAction = cCase.getAnotbAction(); + + switch (anotbAction) { + case EMPTY_1_0_T: { + reset(); + break; + } + case DEGEN_MIN_0_F: { + reset(); + thetaLong_ = min(thetaLong_, thetaLongB); + empty_ = false; + break; + } + case DEGEN_THA_0_F: { + empty_ = false; + curCount_ = 0; + //thetaLong_ is ok + break; + } + case TRIM_A: { + thetaLong_ = min(thetaLong_, thetaLongB); + final DataArrays da = trimAndCopyDataArrays(hashArr_, summaryArr_,thetaLong_, true); + hashArr_ = da.hashArr; + curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; + summaryArr_ = da.summaryArr; + break; + } + case SKETCH_A: { + break; //result is already in A + } + case FULL_ANOTB: { //both A and B should have valid entries. + thetaLong_ = min(thetaLong_, thetaLongB); + final DataArrays daB = getCopyOfResultArraysTheta(thetaLong_, curCount_, hashArr_, summaryArr_, skB); + hashArr_ = daB.hashArr; + curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; + summaryArr_ = daB.summaryArr; + //empty_ = is whatever SkA is, + } + //default: not possible + } + } + + /** + * Gets the result of the multistep, stateful operation AnotB that have been executed with calls + * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or + * {@link #notB(org.apache.datasketches.theta2.Sketch)}). + * + * @param reset If true, clears this operator to the empty state after this result is + * returned. Set this to false if you wish to obtain an intermediate result. + * @return the result of this operation as an unordered {@link CompactSketch}. + */ + public CompactSketch getResult(final boolean reset) { + final CompactSketch result; + if (curCount_ == 0) { + result = new CompactSketch<>(null, null, thetaLong_, thetaLong_ == Long.MAX_VALUE); + } else { + + result = new CompactSketch<>(hashArr_, Util.copySummaryArray(summaryArr_), thetaLong_, false); + } + if (reset) { reset(); } + return result; + } + + /** + * Returns the A-and-not-B set operation on the two given Tuple sketches. + * + *

This a stateless operation and has no impact on the internal state of this operator. + * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)}, + * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta2.Sketch)}, and + * {@link #getResult(boolean)} methods.

+ * + *

If either argument is null an exception is thrown.

+ * + *

Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. + * That is distinctly different from the java null, which represents a nonexistent object. + * In most cases it is a programming error due to some object that was not properly initialized. + * With a null as the first argument, we cannot know what the user's intent is. + * With a null as the second argument, we can't ignore it as we must return a result and there is + * no following possible viable arguments for the second argument. + * Since it is very likely that a null is a programming error, we throw an exception.

+ * + * @param skA The incoming Tuple sketch for the first argument + * @param skB The incoming Tuple sketch for the second argument + * @param Type of Summary + * @return the result as an unordered {@link CompactSketch} + */ + @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", + justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase") + public static CompactSketch aNotB( + final Sketch skA, + final Sketch skB) { + if (skA == null || skB == null) { + throw new SketchesArgumentException("Neither argument may be null for this stateless operation."); + } + + final long thetaLongA = skA.getThetaLong(); + final int countA = skA.getRetainedEntries(); + final boolean emptyA = skA.isEmpty(); + + final long thetaLongB = skB.getThetaLong(); + final int countB = skB.getRetainedEntries(); + final boolean emptyB = skB.isEmpty(); + + final int id = + SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); + final CornerCase cCase = CornerCase.caseIdToCornerCase(id); + final AnotbAction anotbAction = cCase.getAnotbAction(); + + CompactSketch result = null; + + switch (anotbAction) { + case EMPTY_1_0_T: { + result = new CompactSketch<>(null, null, Long.MAX_VALUE, true); + break; + } + case DEGEN_MIN_0_F: { + final long thetaLong = min(thetaLongA, thetaLongB); + result = new CompactSketch<>(null, null, thetaLong, false); + break; + } + case DEGEN_THA_0_F: { + result = new CompactSketch<>(null, null, thetaLongA, false); + break; + } + case TRIM_A: { + final DataArrays daA = getCopyOfDataArraysTuple(skA); + final long[] hashArrA = daA.hashArr; + final S[] summaryArrA = daA.summaryArr; + final long minThetaLong = min(thetaLongA, thetaLongB); + final DataArrays da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false); + result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_); + break; + } + case SKETCH_A: { + final DataArrays daA = getCopyOfDataArraysTuple(skA); + result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_); + break; + } + case FULL_ANOTB: { //both A and B should have valid entries. + final DataArrays daA = getCopyOfDataArraysTuple(skA); + final long minThetaLong = min(thetaLongA, thetaLongB); + final DataArrays daR = + getCopyOfResultArraysTuple(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB); + final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length; + if (countR == 0) { + result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE); + } else { + result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false); + } + } + //default: not possible + } + return result; + } + + /** + * Returns the A-and-not-B set operation on a Tuple sketch and a Theta sketch. + * + *

This a stateless operation and has no impact on the internal state of this operator. + * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)}, + * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta2.Sketch)}, and + * {@link #getResult(boolean)} methods.

+ * + *

If either argument is null an exception is thrown.

+ * + *

Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. + * That is distinctly different from the java null, which represents a nonexistent object. + * In most cases it is a programming error due to some object that was not properly initialized. + * With a null as the first argument, we cannot know what the user's intent is. + * With a null as the second argument, we can't ignore it as we must return a result and there is + * no following possible viable arguments for the second argument. + * Since it is very likely that a null is a programming error for either argument + * we throw a an exception.

+ * + * @param skA The incoming Tuple sketch for the first argument + * @param skB The incoming Theta sketch for the second argument + * @param Type of Summary + * @return the result as an unordered {@link CompactSketch} + */ + @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", + justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase") + public static CompactSketch aNotB( + final Sketch skA, + final org.apache.datasketches.theta2.Sketch skB) { + if (skA == null || skB == null) { + throw new SketchesArgumentException("Neither argument may be null for this stateless operation."); + } + + final long thetaLongA = skA.getThetaLong(); + final int countA = skA.getRetainedEntries(); + final boolean emptyA = skA.isEmpty(); + + final long thetaLongB = skB.getThetaLong(); + final int countB = skB.getRetainedEntries(); + final boolean emptyB = skB.isEmpty(); + + final int id = + SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); + final CornerCase cCase = CornerCase.caseIdToCornerCase(id); + final AnotbAction anotbAction = cCase.getAnotbAction(); + + CompactSketch result = null; + + switch (anotbAction) { + case EMPTY_1_0_T: { + result = new CompactSketch<>(null, null, Long.MAX_VALUE, true); + break; + } + case DEGEN_MIN_0_F: { + final long thetaLong = min(thetaLongA, thetaLongB); + result = new CompactSketch<>(null, null, thetaLong, false); + break; + } + case DEGEN_THA_0_F: { + result = new CompactSketch<>(null, null, thetaLongA, false); + break; + } + case TRIM_A: { + final DataArrays daA = getCopyOfDataArraysTuple(skA); + final long[] hashArrA = daA.hashArr; + final S[] summaryArrA = daA.summaryArr; + final long minThetaLong = min(thetaLongA, thetaLongB); + final DataArrays da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false); + result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_); + break; + } + case SKETCH_A: { + final DataArrays daA = getCopyOfDataArraysTuple(skA); + result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_); + break; + } + case FULL_ANOTB: { //both A and B have valid entries. + final DataArrays daA = getCopyOfDataArraysTuple(skA); + final long minThetaLong = min(thetaLongA, thetaLongB); + @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", + justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase") + final DataArrays daR = + getCopyOfResultArraysTheta(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB); + final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length; + if (countR == 0) { + result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE); + } else { + result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false); + } + } + //default: not possible + } + return result; + } + + //restricted + + static class DataArrays { + DataArrays() {} + + long[] hashArr; + S[] summaryArr; + } + + private static DataArrays getCopyOfDataArraysTuple( + final Sketch sk) { + final CompactSketch csk; + final DataArrays da = new DataArrays<>(); + if (sk instanceof CompactSketch) { + csk = (CompactSketch) sk; + } else { + csk = ((QuickSelectSketch)sk).compact(); + } + final int count = csk.getRetainedEntries(); + if (count == 0) { + da.hashArr = null; + da.summaryArr = null; + } else { + da.hashArr = csk.getHashArr().clone(); //deep copy, may not be sorted + da.summaryArr = Util.copySummaryArray(csk.getSummaryArr()); + } + return da; + } + + @SuppressWarnings("unchecked") + //Both skA and skB must have entries (count > 0) + private static DataArrays getCopyOfResultArraysTuple( + final long minThetaLong, + final int countA, + final long[] hashArrA, + final S[] summaryArrA, + final Sketch skB) { + final DataArrays daR = new DataArrays<>(); + + //Rebuild/get hashtable of skB + final long[] hashTableB; + + if (skB instanceof CompactSketch) { + final CompactSketch cskB = (CompactSketch) skB; + final int countB = skB.getRetainedEntries(); + hashTableB = convertToHashTable(cskB.getHashArr(), countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD); + } else { + final QuickSelectSketch qskB = (QuickSelectSketch) skB; + hashTableB = qskB.getHashTable(); + } + + //build temporary arrays of skA + final long[] tmpHashArrA = new long[countA]; + final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA); + + //search for non matches and build temp arrays + final int lgHTBLen = exactLog2OfLong(hashTableB.length); + int nonMatches = 0; + for (int i = 0; i < countA; i++) { + final long hash = hashArrA[i]; + if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta + final int index = hashSearch(hashTableB, lgHTBLen, hash); + if (index == -1) { + tmpHashArrA[nonMatches] = hash; + tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy(); + nonMatches++; + } + } + } + daR.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches); + daR.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches); + return daR; + } + + @SuppressWarnings("unchecked") + private static DataArrays getCopyOfResultArraysTheta( + final long minThetaLong, + final int countA, + final long[] hashArrA, + final S[] summaryArrA, + final org.apache.datasketches.theta2.Sketch skB) { + final DataArrays daB = new DataArrays<>(); + + //Rebuild/get hashtable of skB + final long[] hashTableB; //read only + + final long[] hashCacheB; + try { hashCacheB = (long[])GET_CACHE.invoke(skB); + } catch (final Exception e) { throw new SketchesStateException("Reflection Exception " + e); } + + if (skB instanceof org.apache.datasketches.theta2.CompactSketch) { + final int countB = skB.getRetainedEntries(true); + hashTableB = convertToHashTable(hashCacheB, countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD); + } else { + hashTableB = hashCacheB; + } + + //build temporary result arrays of skA + final long[] tmpHashArrA = new long[countA]; + final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA); + + //search for non matches and build temp arrays + final int lgHTBLen = exactLog2OfLong(hashTableB.length); + int nonMatches = 0; + for (int i = 0; i < countA; i++) { + final long hash = hashArrA[i]; + if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta + final int index = hashSearch(hashTableB, lgHTBLen, hash); + if (index == -1) { //not found + tmpHashArrA[nonMatches] = hash; + tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy(); + nonMatches++; + } + } + } + //trim the arrays + daB.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches); + daB.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches); + return daB; + } + + @SuppressWarnings("unchecked") + private static DataArrays trimAndCopyDataArrays( + final long[] hashArr, + final S[] summaryArr, + final long minThetaLong, + final boolean copy) { + + //build temporary arrays + final int countIn = hashArr.length; + final long[] tmpHashArr = new long[countIn]; + final S[] tmpSummaryArr = Util.newSummaryArray(summaryArr, countIn); + int countResult = 0; + for (int i = 0; i < countIn; i++) { + final long hash = hashArr[i]; + if (hash < minThetaLong) { + tmpHashArr[countResult] = hash; + tmpSummaryArr[countResult] = (S) (copy ? summaryArr[i].copy() : summaryArr[i]); + countResult++; + } else { continue; } + } + //Remove empty slots + final DataArrays da = new DataArrays<>(); + da.hashArr = Arrays.copyOfRange(tmpHashArr, 0, countResult); + da.summaryArr = Arrays.copyOfRange(tmpSummaryArr, 0, countResult); + return da; + } + + /** + * Resets this operation back to the empty state. + */ + public void reset() { + empty_ = true; + thetaLong_ = Long.MAX_VALUE; + hashArr_ = null; + summaryArr_ = null; + curCount_ = 0; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java new file mode 100644 index 000000000..36d7a9b5f --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.thetacommon.HashOperations.count; + +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Array; +import java.nio.ByteOrder; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * CompactSketches are never created directly. They are created as a result of + * the compact() method of an UpdatableSketch or as a result of the getResult() + * method of a set operation like Union, Intersection or AnotB. CompactSketch + * consists of a compact list (i.e. no intervening spaces) of hash values, + * corresponding list of Summaries, and a value for theta. The lists may or may + * not be ordered. CompactSketch is read-only. + * + * @param type of Summary + */ +public final class CompactSketch extends Sketch { + private static final byte serialVersionWithSummaryClassNameUID = 1; + private static final byte serialVersionUIDLegacy = 2; + private static final byte serialVersionUID = 3; + private static final short defaultSeedHash = (short) 37836; // for compatibility with C++ + private final long[] hashArr_; + private S[] summaryArr_; + + private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } + + private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED } + + /** + * Create a CompactSketch from correct components + * @param hashArr compacted hash array + * @param summaryArr compacted summary array + * @param thetaLong long value of theta + * @param empty empty flag + */ + CompactSketch(final long[] hashArr, final S[] summaryArr, final long thetaLong, final boolean empty) { + super(thetaLong, empty, null); + super.thetaLong_ = thetaLong; + super.empty_ = empty; + hashArr_ = hashArr; + summaryArr_ = summaryArr; + } + + /** + * This is to create an instance of a CompactSketch given a serialized form + * + * @param seg MemorySegment object with serialized CompactSketch + * @param deserializer the SummaryDeserializer + */ + CompactSketch(final MemorySegment seg, final SummaryDeserializer deserializer) { + super(Long.MAX_VALUE, true, null); + int offset = 0; + final byte preambleLongs = seg.get(JAVA_BYTE, offset++); + final byte version = seg.get(JAVA_BYTE, offset++); + final byte familyId = seg.get(JAVA_BYTE, offset++); + SerializerDeserializer.validateFamily(familyId, preambleLongs); + if (version > serialVersionUID) { + throw new SketchesArgumentException( + "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version); + } + SerializerDeserializer + .validateType(seg.get(JAVA_BYTE, offset++), SerializerDeserializer.SketchType.CompactSketch); + if (version <= serialVersionUIDLegacy) { // legacy serial format + final byte flags = seg.get(JAVA_BYTE, offset++); + final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Byte order mismatch"); + } + empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0; + final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0; + if (isThetaIncluded) { + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset); + offset += Long.BYTES; + } else { + thetaLong_ = Long.MAX_VALUE; + } + final boolean hasEntries = (flags & 1 << FlagsLegacy.HAS_ENTRIES.ordinal()) > 0; + if (hasEntries) { + int classNameLength = 0; + if (version == serialVersionWithSummaryClassNameUID) { + classNameLength = seg.get(JAVA_BYTE, offset++); + } + final int count = seg.get(JAVA_INT_UNALIGNED, offset); + offset += Integer.BYTES; + if (version == serialVersionWithSummaryClassNameUID) { + offset += classNameLength; + } + hashArr_ = new long[count]; + + for (int i = 0; i < count; i++) { + hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset); + offset += Long.BYTES; + } + for (int i = 0; i < count; i++) { + offset += readSummary(seg, offset, i, count, deserializer); + } + } else { + hashArr_ = new long[0]; + summaryArr_ = null; + } + } else { // current serial format + offset++; //skip unused byte + final byte flags = seg.get(JAVA_BYTE, offset++); + offset += 2; //skip 2 unused bytes + empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; + thetaLong_ = Long.MAX_VALUE; + int count = 0; + if (!empty_) { + if (preambleLongs == 1) { + count = 1; + } else { + count = seg.get(JAVA_INT_UNALIGNED, offset); + offset += Integer.BYTES; + offset += 4; // unused + if (preambleLongs > 2) { + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset); + offset += Long.BYTES; + } + } + } + hashArr_ = new long[count]; + + for (int i = 0; i < count; i++) { + hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset); + offset += Long.BYTES; + offset += readSummary(seg, offset, i, count, deserializer); + } + } + } + + @SuppressWarnings({"unchecked"}) + private int readSummary(final MemorySegment seg, final int offset, final int i, final int count, + final SummaryDeserializer deserializer) { + final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset); + final DeserializeResult result = deserializer.heapifySummary(segRegion); + final S summary = result.getObject(); + final Class summaryType = (Class) result.getObject().getClass(); + if (summaryArr_ == null) { + summaryArr_ = (S[]) Array.newInstance(summaryType, count); + } + summaryArr_[i] = summary; + return result.getSize(); + } + + @Override + public CompactSketch compact() { + return this; + } + + long[] getHashArr() { + return hashArr_; + } + + S[] getSummaryArr() { + return summaryArr_; + } + + @Override + public int getRetainedEntries() { + return hashArr_ == null ? 0 : hashArr_.length; + } + + @Override + public int getCountLessThanThetaLong(final long thetaLong) { + return count(hashArr_, thetaLong); + } + + // Layout of first 8 bytes: + // Long || Start Byte Adr: + // Adr: + // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + // 0 || seed hash | Flags | unused | SkType | FamID | SerVer | Preamble_Longs | + @Override + public byte[] toByteArray() { + final int count = getRetainedEntries(); + final boolean isSingleItem = count == 1 && !isEstimationMode(); + final int preambleLongs = isEmpty() || isSingleItem ? 1 : isEstimationMode() ? 3 : 2; + + int summariesSizeBytes = 0; + final byte[][] summariesBytes = new byte[count][]; + if (count > 0) { + for (int i = 0; i < count; i++) { + summariesBytes[i] = summaryArr_[i].toByteArray(); + summariesSizeBytes += summariesBytes[i].length; + } + } + + final int sizeBytes = Long.BYTES * preambleLongs + Long.BYTES * count + summariesSizeBytes; + final byte[] bytes = new byte[sizeBytes]; + int offset = 0; + bytes[offset++] = (byte) preambleLongs; + bytes[offset++] = serialVersionUID; + bytes[offset++] = (byte) Family.TUPLE.getID(); + bytes[offset++] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal(); + offset++; // unused + bytes[offset++] = (byte) ( + (1 << Flags.IS_COMPACT.ordinal()) + | (1 << Flags.IS_READ_ONLY.ordinal()) + | (isEmpty() ? 1 << Flags.IS_EMPTY.ordinal() : 0) + ); + ByteArrayUtil.putShortLE(bytes, offset, defaultSeedHash); + offset += Short.BYTES; + if (!isEmpty()) { + if (!isSingleItem) { + ByteArrayUtil.putIntLE(bytes, offset, count); + offset += Integer.BYTES; + offset += 4; // unused + if (isEstimationMode()) { + ByteArrayUtil.putLongLE(bytes, offset, thetaLong_); + offset += Long.BYTES; + } + } + } + for (int i = 0; i < count; i++) { + ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]); + offset += Long.BYTES; + System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length); + offset += summariesBytes[i].length; + } + return bytes; + } + + @Override + public TupleSketchIterator iterator() { + return new TupleSketchIterator<>(hashArr_, summaryArr_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java b/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java new file mode 100644 index 000000000..a5fc38c31 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * Returns an object and its size in bytes as a result of a deserialize operation + * @param Type of object + */ +public class DeserializeResult { + private final T object; + private final int size; + + /** + * Creates an instance. + * @param object Deserialized object. + * @param size Deserialized size in bytes. + */ + public DeserializeResult(final T object, final int size) { + this.object = object; + this.size = size; + } + + /** + * Returns Deserialized object + * @return Deserialized object + */ + public T getObject() { + return object; + } + + /** + * Returns size in bytes occupied by the object in the serialized form + * @return size in bytes occupied by the object in the serialized form + */ + public int getSize() { + return size; + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Filter.java b/src/main/java/org/apache/datasketches/tuple2/Filter.java new file mode 100644 index 000000000..dbd61b576 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Filter.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.reflect.Array; +import java.util.Arrays; +import java.util.function.Predicate; + +/** + * Class for filtering entries from a {@link Sketch} given a {@link Summary} + * + * @param Summary type against which apply the {@link Predicate} + */ +public class Filter { + private final Predicate predicate; + + /** + * Filter constructor with a {@link Predicate} + * @param predicate Predicate to use in this filter. If the Predicate returns False, the + * element is discarded. If the Predicate returns True, then the element is kept in the + * {@link Sketch} + */ + public Filter(final Predicate predicate) { + this.predicate = predicate; + } + + /** + * Filters elements on the provided {@link Sketch} + * + * @param sketchIn The sketch against which apply the {@link Predicate} + * @return A new Sketch with some of the entries filtered out based on the {@link Predicate} + */ + @SuppressWarnings("unchecked") + public CompactSketch filter(final Sketch sketchIn) { + if (sketchIn == null) { + return new CompactSketch<>(null, null, Long.MAX_VALUE, true); + } + final long[] hashes = new long[sketchIn.getRetainedEntries()]; + T[] summaries = null; // lazy init to get class from the first entry + int i = 0; + final TupleSketchIterator it = sketchIn.iterator(); + while (it.next()) { + final T summary = it.getSummary(); + if (predicate.test(summary)) { + hashes[i] = it.getHash(); + if (summaries == null) { + summaries = (T[]) Array.newInstance(summary.getClass(), sketchIn.getRetainedEntries()); + } + summaries[i++] = (T) summary.copy(); + } + } + final boolean isEmpty = i == 0 && !sketchIn.isEstimationMode(); + if (i == 0) { + return new CompactSketch<>(null, null, sketchIn.getThetaLong(), isEmpty); + } + return new CompactSketch<>(Arrays.copyOf(hashes, i), Arrays.copyOf(summaries, i), sketchIn.getThetaLong(), isEmpty); + } +} + diff --git a/src/main/java/org/apache/datasketches/tuple2/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/HashTables.java new file mode 100644 index 000000000..913c53196 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/HashTables.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.Math.ceil; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; + +import java.lang.reflect.Array; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +@SuppressWarnings("unchecked") +class HashTables { + long[] hashTable = null; + S[] summaryTable = null; + int lgTableSize = 0; + int numKeys = 0; + + HashTables() { } + + //must have valid entries + void fromSketch(final Sketch sketch) { + numKeys = sketch.getRetainedEntries(); + lgTableSize = getLgTableSize(numKeys); + + hashTable = new long[1 << lgTableSize]; + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + final long hash = it.getHash(); + final int index = hashInsertOnly(hashTable, lgTableSize, hash); + final S mySummary = (S)it.getSummary().copy(); + if (summaryTable == null) { + summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize); + } + summaryTable[index] = mySummary; + } + } + + //must have valid entries + void fromSketch(final org.apache.datasketches.theta2.Sketch sketch, final S summary) { + numKeys = sketch.getRetainedEntries(true); + lgTableSize = getLgTableSize(numKeys); + + hashTable = new long[1 << lgTableSize]; + final org.apache.datasketches.theta2.HashIterator it = sketch.iterator(); + while (it.next()) { + final long hash = it.get(); + final int index = hashInsertOnly(hashTable, lgTableSize, hash); + final S mySummary = (S)summary.copy(); + if (summaryTable == null) { + summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize); + } + summaryTable[index] = mySummary; + } + } + + private void fromArrays(final long[] hashArr, final S[] summaryArr, final int count) { + numKeys = count; + lgTableSize = getLgTableSize(count); + + summaryTable = null; + hashTable = new long[1 << lgTableSize]; + for (int i = 0; i < count; i++) { + final long hash = hashArr[i]; + final int index = hashInsertOnly(hashTable, lgTableSize, hash); + final S mySummary = summaryArr[i]; + if (summaryTable == null) { + summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize); + } + summaryTable[index] = summaryArr[i]; + } + } + + //For Tuple Sketches + HashTables getIntersectHashTables( + final Sketch nextTupleSketch, + final long thetaLong, + final SummarySetOperations summarySetOps) { + + //Match nextSketch data with local instance data, filtering by theta + final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries()); + final long[] matchHashArr = new long[maxMatchSize]; + final S[] matchSummariesArr = Util.newSummaryArray(summaryTable, maxMatchSize); + int matchCount = 0; + final TupleSketchIterator it = nextTupleSketch.iterator(); + + while (it.next()) { + final long hash = it.getHash(); + if (hash >= thetaLong) { continue; } + final int index = hashSearch(hashTable, lgTableSize, hash); + if (index < 0) { continue; } + //Copy the intersecting items from local hashTables_ + // sequentially into local matchHashArr_ and matchSummaries_ + matchHashArr[matchCount] = hash; + matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], it.getSummary()); + matchCount++; + } + final HashTables resultHT = new HashTables<>(); + resultHT.fromArrays(matchHashArr, matchSummariesArr, matchCount); + return resultHT; + } + + //For Theta Sketches + HashTables getIntersectHashTables( + final org.apache.datasketches.theta2.Sketch nextThetaSketch, + final long thetaLong, + final SummarySetOperations summarySetOps, + final S summary) { + + final Class summaryType = (Class) summary.getClass(); + + //Match nextSketch data with local instance data, filtering by theta + final int maxMatchSize = min(numKeys, nextThetaSketch.getRetainedEntries()); + final long[] matchHashArr = new long[maxMatchSize]; + final S[] matchSummariesArr = (S[]) Array.newInstance(summaryType, maxMatchSize); + int matchCount = 0; + final org.apache.datasketches.theta2.HashIterator it = nextThetaSketch.iterator(); + + //scan B & search A(hashTable) for match + while (it.next()) { + final long hash = it.get(); + if (hash >= thetaLong) { continue; } + final int index = hashSearch(hashTable, lgTableSize, hash); + if (index < 0) { continue; } + //Copy the intersecting items from local hashTables_ + // sequentially into local matchHashArr_ and matchSummaries_ + matchHashArr[matchCount] = hash; + matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], summary); + matchCount++; + } + final HashTables resultHT = new HashTables<>(); + resultHT.fromArrays(matchHashArr, matchSummariesArr, matchCount); + return resultHT; + } + + void clear() { + hashTable = null; + summaryTable = null; + lgTableSize = 0; + numKeys = 0; + } + + static int getLgTableSize(final int count) { + final int tableSize = max(ceilingPowerOf2((int) ceil(count / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS); + return Integer.numberOfTrailingZeros(tableSize); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Intersection.java b/src/main/java/org/apache/datasketches/tuple2/Intersection.java new file mode 100644 index 000000000..8731df0d4 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Intersection.java @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.Math.ceil; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.thetacommon.ThetaUtil; + + +/** + * Computes an intersection of two or more generic tuple sketches or generic tuple sketches + * combined with theta sketches. + * A new instance represents the Universal Set. Because the Universal Set + * cannot be realized a getResult() on a new instance will produce an error. + * Every update() computes an intersection with the internal state, which will never + * grow larger and may be reduced to zero. + * + * @param Type of Summary + */ +@SuppressWarnings("unchecked") +public class Intersection { + private final SummarySetOperations summarySetOps_; + private boolean empty_; + private long thetaLong_; + private HashTables hashTables_; + private boolean firstCall_; + + /** + * Creates new Intersection instance with instructions on how to process two summaries that + * intersect. + * @param summarySetOps instance of SummarySetOperations + */ + public Intersection(final SummarySetOperations summarySetOps) { + summarySetOps_ = summarySetOps; + empty_ = false; // universal set at the start + thetaLong_ = Long.MAX_VALUE; + hashTables_ = new HashTables<>(); + firstCall_ = true; + } + + /** + * Perform a stateless intersect set operation on the two given tuple sketches and returns the + * result as an unordered CompactSketch on the heap. + * @param tupleSketchA The first sketch argument. It must not be null. + * @param tupleSketchB The second sketch argument. It must not be null. + * @return an unordered CompactSketch on the heap + */ + public CompactSketch intersect( + final Sketch tupleSketchA, + final Sketch tupleSketchB) { + reset(); + intersect(tupleSketchA); + intersect(tupleSketchB); + final CompactSketch csk = getResult(); + reset(); + return csk; + } + + /** + * Perform a stateless intersect set operation on a tuple sketch and a theta sketch and returns the + * result as an unordered CompactSketch on the heap. + * @param tupleSketch The first sketch argument. It must not be null. + * @param thetaSketch The second sketch argument. It must not be null. + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * This must not be null. + * @return an unordered CompactSketch on the heap + */ + public CompactSketch intersect( + final Sketch tupleSketch, + final org.apache.datasketches.theta2.Sketch + thetaSketch, final S summary) { + reset(); + intersect(tupleSketch); + intersect(thetaSketch, summary); + final CompactSketch csk = getResult(); + reset(); + return csk; + } + + /** + * Performs a stateful intersection of the internal set with the given tupleSketch. + * @param tupleSketch input sketch to intersect with the internal state. It must not be null. + */ + public void intersect(final Sketch tupleSketch) { + if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } + + final boolean firstCall = firstCall_; + firstCall_ = false; + + // input sketch could be first or next call + + final boolean emptyIn = tupleSketch.isEmpty(); + if (empty_ || emptyIn) { //empty rule + //Whatever the current internal state, we make our local empty. + resetToEmpty(); + return; + } + + final long thetaLongIn = tupleSketch.getThetaLong(); + thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule + + if (tupleSketch.getRetainedEntries() == 0) { + hashTables_.clear(); + return; + } + // input sketch will have valid entries > 0 + + if (firstCall) { + //Copy firstSketch data into local instance hashTables_ + hashTables_.fromSketch(tupleSketch); + } + + //Next Call + else { + if (hashTables_.numKeys == 0) { return; } + //process intersect with current hashTables + hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, summarySetOps_); + } + } + + /** + * Performs a stateful intersection of the internal set with the given thetaSketch by combining entries + * using the hashes from the theta sketch and summary values from the given summary and rules + * from the summarySetOps defined by the Intersection constructor. + * @param thetaSketch input theta sketch to intersect with the internal state. It must not be null. + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * It will be copied for each matching index. It must not be null. + */ + public void intersect(final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) { + if (thetaSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } + if (summary == null) { throw new SketchesArgumentException("Summary cannot be null."); } + final boolean firstCall = firstCall_; + firstCall_ = false; + // input sketch is not null, could be first or next call + + final boolean emptyIn = thetaSketch.isEmpty(); + if (empty_ || emptyIn) { //empty rule + //Whatever the current internal state, we make our local empty. + resetToEmpty(); + return; + } + + final long thetaLongIn = thetaSketch.getThetaLong(); + thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule + + final int countIn = thetaSketch.getRetainedEntries(); + if (countIn == 0) { + hashTables_.clear(); + return; + } + // input sketch will have valid entries > 0 + + if (firstCall) { + final org.apache.datasketches.theta2.Sketch firstSketch = thetaSketch; + //Copy firstSketch data into local instance hashTables_ + hashTables_.fromSketch(firstSketch, summary); + } + + //Next Call + else { + if (hashTables_.numKeys == 0) { return; } + hashTables_ = hashTables_.getIntersectHashTables(thetaSketch, thetaLongIn, summarySetOps_, summary); + } + } + + /** + * Gets the internal set as an unordered CompactSketch + * @return result of the intersections so far + */ + public CompactSketch getResult() { + if (firstCall_) { + throw new SketchesStateException( + "getResult() with no intervening intersections is not a legal result."); + } + final int countIn = hashTables_.numKeys; + if (countIn == 0) { + return new CompactSketch<>(null, null, thetaLong_, empty_); + } + + final int tableSize = hashTables_.hashTable.length; + + final long[] hashArr = new long[countIn]; + final S[] summaryArr = Util.newSummaryArray(hashTables_.summaryTable, countIn); + + //compact the arrays + int cnt = 0; + for (int i = 0; i < tableSize; i++) { + final long hash = hashTables_.hashTable[i]; + if (hash == 0 || hash > thetaLong_) { continue; } + hashArr[cnt] = hash; + summaryArr[cnt] = (S) hashTables_.summaryTable[i].copy(); + cnt++; + } + assert cnt == countIn; + return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_); + } + + /** + * Returns true if there is a valid intersection result available + * @return true if there is a valid intersection result available + */ + public boolean hasResult() { + return !firstCall_; + } + + /** + * Resets the internal set to the initial state, which represents the Universal Set + */ + public void reset() { + hardReset(); + } + + private void hardReset() { + empty_ = false; + thetaLong_ = Long.MAX_VALUE; + hashTables_.clear(); + firstCall_ = true; + } + + private void resetToEmpty() { + empty_ = true; + thetaLong_ = Long.MAX_VALUE; + hashTables_.clear(); + firstCall_ = false; + } + + static int getLgTableSize(final int count) { + final int tableSize = max(ceilingPowerOf2((int) ceil(count / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS); + return Integer.numberOfTrailingZeros(tableSize); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java new file mode 100644 index 000000000..161478d86 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getEstimateOfBoverA; +import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getLowerBoundForBoverA; +import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getUpperBoundForBoverA; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Jaccard similarity of two Tuple Sketches, or alternatively, of a Tuple and Theta Sketch. + * + *

Note: only retained hash values are compared, and the Tuple summary values are not accounted for in the + * similarity measure.

+ * + * @author Lee Rhodes + * @author David Cromberge + */ +public final class JaccardSimilarity { + private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB + private static final double[] ONES = {1.0, 1.0, 1.0}; + + /** + * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index + * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each + * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are + * distinct from each other. A Jaccard of .95 means the overlap between the two + * populations is 95% of the union of the two populations. + * + *

Note: For very large pairs of sketches, where the configured nominal entries of the sketches + * are 2^25 or 2^26, this method may produce unpredictable results. + * + * @param sketchA The first argument, a Tuple sketch with summary type S + * @param sketchB The second argument, a Tuple sketch with summary type S + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param Summary + * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index. + * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations. + */ + public static double[] jaccard( + final Sketch sketchA, + final Sketch sketchB, + final SummarySetOperations summarySetOps) { + //Corner case checks + if (sketchA == null || sketchB == null) { return ZEROS.clone(); } + if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); } + if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); } + + final int countA = sketchA.getRetainedEntries(); + final int countB = sketchB.getRetainedEntries(); + + //Create the Union + final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS; + final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS; + final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK); + final Union union = new Union<>(newK, summarySetOps); + union.union(sketchA); + union.union(sketchB); + + final Sketch unionAB = union.getResult(); + final long thetaLongUAB = unionAB.getThetaLong(); + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + final int countUAB = unionAB.getRetainedEntries(); + + //Check for identical data + if (countUAB == countA && countUAB == countB + && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { + return ONES.clone(); + } + + //Create the Intersection + final Intersection inter = new Intersection<>(summarySetOps); + inter.intersect(sketchA); + inter.intersect(sketchB); + inter.intersect(unionAB); //ensures that intersection is a subset of the union + final Sketch interABU = inter.getResult(); + + final double lb = getLowerBoundForBoverA(unionAB, interABU); + final double est = getEstimateOfBoverA(unionAB, interABU); + final double ub = getUpperBoundForBoverA(unionAB, interABU); + return new double[] {lb, est, ub}; + } + + /** + * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index + * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each + * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are + * distinct from each other. A Jaccard of .95 means the overlap between the two + * populations is 95% of the union of the two populations. + * + *

Note: For very large pairs of sketches, where the configured nominal entries of the sketches + * are 2^25 or 2^26, this method may produce unpredictable results. + * + * @param sketchA The first argument, a Tuple sketch with summary type S + * @param sketchB The second argument, a Theta sketch + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * This may not be null. + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param Summary + * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index. + * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations. + */ + public static double[] jaccard( + final Sketch sketchA, + final org.apache.datasketches.theta2.Sketch sketchB, + final S summary, final SummarySetOperations summarySetOps) { + // Null case checks + if (summary == null) { + throw new SketchesArgumentException("Summary cannot be null."); } + + //Corner case checks + if (sketchA == null || sketchB == null) { return ZEROS.clone(); } + if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); } + if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); } + + final int countA = sketchA.getRetainedEntries(); + final int countB = sketchB.getRetainedEntries(true); + + //Create the Union + final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS; + final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS; + final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK); + final Union union = new Union<>(newK, summarySetOps); + union.union(sketchA); + union.union(sketchB, summary); + + final Sketch unionAB = union.getResult(); + final long thetaLongUAB = unionAB.getThetaLong(); + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + final int countUAB = unionAB.getRetainedEntries(); + + //Check for identical data + if (countUAB == countA && countUAB == countB + && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { + return ONES.clone(); + } + + //Create the Intersection + final Intersection inter = new Intersection<>(summarySetOps); + inter.intersect(sketchA); + inter.intersect(sketchB, summary); + inter.intersect(unionAB); //ensures that intersection is a subset of the union + final Sketch interABU = inter.getResult(); + + final double lb = getLowerBoundForBoverA(unionAB, interABU); + final double est = getEstimateOfBoverA(unionAB, interABU); + final double ub = getUpperBoundForBoverA(unionAB, interABU); + return new double[] {lb, est, ub}; + } + + /** + * Returns true if the two given sketches have exactly the same hash values and the same + * theta values. Thus, they are equivalent. + * @param sketchA The first argument, a Tuple sketch with summary type S + * @param sketchB The second argument, a Tuple sketch with summary type S + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param Summary + * @return true if the two given sketches have exactly the same hash values and the same + * theta values. + */ + public static boolean exactlyEqual( + final Sketch sketchA, + final Sketch sketchB, + final SummarySetOperations summarySetOps) { + //Corner case checks + if (sketchA == null || sketchB == null) { return false; } + if (sketchA == sketchB) { return true; } + if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; } + if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; } + + final int countA = sketchA.getRetainedEntries(); + final int countB = sketchB.getRetainedEntries(); + + //Create the Union + final Union union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps); + union.union(sketchA); + union.union(sketchB); + final Sketch unionAB = union.getResult(); + final long thetaLongUAB = unionAB.getThetaLong(); + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + final int countUAB = unionAB.getRetainedEntries(); + + //Check for identical counts and thetas + if (countUAB == countA && countUAB == countB + && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { + return true; + } + return false; + } + + /** + * Returns true if the two given sketches have exactly the same hash values and the same + * theta values. Thus, they are equivalent. + * @param sketchA The first argument, a Tuple sketch with summary type S + * @param sketchB The second argument, a Theta sketch + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * This may not be null. + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param Summary + * @return true if the two given sketches have exactly the same hash values and the same + * theta values. + */ + public static boolean exactlyEqual( + final Sketch sketchA, + final org.apache.datasketches.theta2.Sketch sketchB, + final S summary, final SummarySetOperations summarySetOps) { + // Null case checks + if (summary == null) { + throw new SketchesArgumentException("Summary cannot be null."); } + + //Corner case checks + if (sketchA == null || sketchB == null) { return false; } + if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; } + if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; } + + final int countA = sketchA.getRetainedEntries(); + final int countB = sketchB.getRetainedEntries(true); + + //Create the Union + final Union union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps); + union.union(sketchA); + union.union(sketchB, summary); + final Sketch unionAB = union.getResult(); + final long thetaLongUAB = unionAB.getThetaLong(); + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + final int countUAB = unionAB.getRetainedEntries(); + + //Check for identical counts and thetas + if (countUAB == countA && countUAB == countB + && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { + return true; + } + return false; + } + + /** + * Tests similarity of a measured Sketch against an expected Sketch. + * Computes the lower bound of the Jaccard index JLB of the measured and + * expected sketches. + * if JLB ≥ threshold, then the sketches are considered to be + * similar with a confidence of 97.7%. + * + * @param measured a Tuple sketch with summary type S to be tested + * @param expected the reference Tuple sketch with summary type S that is considered to be correct. + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param threshold a real value between zero and one. + * @param Summary + * @return if true, the similarity of the two sketches is greater than the given threshold + * with at least 97.7% confidence. + */ + public static boolean similarityTest( + final Sketch measured, final Sketch expected, + final SummarySetOperations summarySetOps, + final double threshold) { + //index 0: the lower bound + //index 1: the mean estimate + //index 2: the upper bound + final double jRatioLB = jaccard(measured, expected, summarySetOps)[0]; //choosing the lower bound + return jRatioLB >= threshold; + } + + /** + * Tests similarity of a measured Sketch against an expected Sketch. + * Computes the lower bound of the Jaccard index JLB of the measured and + * expected sketches. + * if JLB ≥ threshold, then the sketches are considered to be + * similar with a confidence of 97.7%. + * + * @param measured a Tuple sketch with summary type S to be tested + * @param expected the reference Theta sketch that is considered to be correct. + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * This may not be null. + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param threshold a real value between zero and one. + * @param Summary + * @return if true, the similarity of the two sketches is greater than the given threshold + * with at least 97.7% confidence. + */ + public static boolean similarityTest( + final Sketch measured, final org.apache.datasketches.theta2.Sketch expected, + final S summary, final SummarySetOperations summarySetOps, + final double threshold) { + //index 0: the lower bound + //index 1: the mean estimate + //index 2: the upper bound + final double jRatioLB = jaccard(measured, expected, summary, summarySetOps)[0]; //choosing the lower bound + return jRatioLB >= threshold; + } + + /** + * Tests dissimilarity of a measured Sketch against an expected Sketch. + * Computes the upper bound of the Jaccard index JUB of the measured and + * expected sketches. + * if JUB ≤ threshold, then the sketches are considered to be + * dissimilar with a confidence of 97.7%. + * + * @param measured a Tuple sketch with summary type S to be tested + * @param expected the reference Theta sketch that is considered to be correct. + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param threshold a real value between zero and one. + * @param Summary + * @return if true, the dissimilarity of the two sketches is greater than the given threshold + * with at least 97.7% confidence. + */ + public static boolean dissimilarityTest( + final Sketch measured, final Sketch expected, + final SummarySetOperations summarySetOps, + final double threshold) { + //index 0: the lower bound + //index 1: the mean estimate + //index 2: the upper bound + final double jRatioUB = jaccard(measured, expected, summarySetOps)[2]; //choosing the upper bound + return jRatioUB <= threshold; + } + + /** + * Tests dissimilarity of a measured Sketch against an expected Sketch. + * Computes the upper bound of the Jaccard index JUB of the measured and + * expected sketches. + * if JUB ≤ threshold, then the sketches are considered to be + * dissimilar with a confidence of 97.7%. + * + * @param measured a Tuple sketch with summary type S to be tested + * @param expected the reference Theta sketch that is considered to be correct. + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * This may not be null. + * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. + * @param threshold a real value between zero and one. + * @param Summary + * @return if true, the dissimilarity of the two sketches is greater than the given threshold + * with at least 97.7% confidence. + */ + public static boolean dissimilarityTest( + final Sketch measured, final org.apache.datasketches.theta2.Sketch expected, + final S summary, final SummarySetOperations summarySetOps, + final double threshold) { + //index 0: the lower bound + //index 1: the mean estimate + //index 2: the upper bound + final double jRatioUB = jaccard(measured, expected, summary, summarySetOps)[2]; //choosing the upper bound + return jRatioUB <= threshold; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java new file mode 100644 index 000000000..6823cec3d --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java @@ -0,0 +1,621 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.common.Util.checkBounds; +import static org.apache.datasketches.common.Util.exactLog2OfLong; +import static org.apache.datasketches.thetacommon.HashOperations.count; + +import java.lang.foreign.MemorySegment; +import java.lang.reflect.Array; +import java.nio.ByteOrder; +import java.util.Objects; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon.QuickSelect; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * A generic tuple sketch using the QuickSelect algorithm. + * + * @param type of Summary + */ +class QuickSelectSketch extends Sketch { + private static final byte serialVersionUID = 2; + + private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } + + private static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg(); + private final int nomEntries_; + private final int lgResizeFactor_; + private final float samplingProbability_; + private int lgCurrentCapacity_; + private int retEntries_; + private int rebuildThreshold_; + private long[] hashTable_; + S[] summaryTable_; + + /** + * This is to create a new instance of a QuickSelectSketch with default resize factor. + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * given value. + * @param summaryFactory An instance of a SummaryFactory. + */ + QuickSelectSketch( + final int nomEntries, + final SummaryFactory summaryFactory) { + this(nomEntries, DEFAULT_LG_RESIZE_FACTOR, summaryFactory); + } + + /** + * This is to create a new instance of a QuickSelectSketch with custom resize factor + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * given value. + * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: + *

+   * 0 - no resizing (max size allocated),
+   * 1 - double internal hash table each time it reaches a threshold
+   * 2 - grow four times
+   * 3 - grow eight times (default)
+   * 
+ * @param summaryFactory An instance of a SummaryFactory. + */ + QuickSelectSketch( + final int nomEntries, + final int lgResizeFactor, + final SummaryFactory summaryFactory) { + this(nomEntries, lgResizeFactor, 1f, summaryFactory); + } + + /** + * This is to create a new instance of a QuickSelectSketch with custom resize factor and sampling + * probability + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * or equal to the given value. + * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: + *
+   * 0 - no resizing (max size allocated),
+   * 1 - double internal hash table each time it reaches a threshold
+   * 2 - grow four times
+   * 3 - grow eight times (default)
+   * 
+ * @param samplingProbability the given sampling probability + * @param summaryFactory An instance of a SummaryFactory. + */ + QuickSelectSketch( + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final SummaryFactory summaryFactory) { + this( + nomEntries, + lgResizeFactor, + samplingProbability, + summaryFactory, + Util.getStartingCapacity(nomEntries, lgResizeFactor) + ); + } + + /** + * Target constructor for above constructors for a new instance. + * @param nomEntries Nominal number of entries. + * @param lgResizeFactor log2(resizeFactor) + * @param samplingProbability the given sampling probability + * @param summaryFactory An instance of a SummaryFactory. + * @param startingSize starting size of the sketch. + */ + private QuickSelectSketch( + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final SummaryFactory summaryFactory, + final int startingSize) { + super( + (long) (Long.MAX_VALUE * (double) samplingProbability), + true, + summaryFactory); + nomEntries_ = ceilingPowerOf2(nomEntries); + lgResizeFactor_ = lgResizeFactor; + samplingProbability_ = samplingProbability; + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingSize); + retEntries_ = 0; + hashTable_ = new long[startingSize]; //must be before setRebuildThreshold + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); + summaryTable_ = null; // wait for the first summary to call Array.newInstance() + } + + /** + * Copy constructor + * @param sketch the QuickSelectSketch to be deep copied. + */ + QuickSelectSketch(final QuickSelectSketch sketch) { + super( + sketch.thetaLong_, + sketch.empty_, + sketch.summaryFactory_); + nomEntries_ = sketch.nomEntries_; + lgResizeFactor_ = sketch.lgResizeFactor_; + samplingProbability_ = sketch.samplingProbability_; + lgCurrentCapacity_ = sketch.lgCurrentCapacity_; + retEntries_ = sketch.retEntries_; + hashTable_ = sketch.hashTable_.clone(); + rebuildThreshold_ = sketch.rebuildThreshold_; + summaryTable_ = Util.copySummaryArray(sketch.summaryTable_); + } + + /** + * This is to create an instance of a QuickSelectSketch given a serialized form + * @param seg MemorySegment object with serialized QuickSelectSketch + * @param deserializer the SummaryDeserializer + * @param summaryFactory the SummaryFactory + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Heapifying a CompactSketch is not deprecated. + */ + @Deprecated + QuickSelectSketch( + final MemorySegment seg, + final SummaryDeserializer deserializer, + final SummaryFactory summaryFactory) { + this(new Validate<>(), seg, deserializer, summaryFactory); + } + + /* + * This private constructor is used to protect against "Finalizer attacks". + * The private static inner class Validate performs validation and deserialization + * from the input Memory and may throw exceptions. In order to protect against the attack, we must + * perform this validation prior to the constructor's super reaches the Object class. + * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass. + * Using an empty final finalizer() is not recommended and is deprecated as of Java9. + */ + private QuickSelectSketch( + final Validate val, + final MemorySegment seg, + final SummaryDeserializer deserializer, + final SummaryFactory summaryFactory) { + super(val.validate(seg, deserializer), val.myEmpty, summaryFactory); + nomEntries_ = val.myNomEntries; + lgResizeFactor_ = val.myLgResizeFactor; + samplingProbability_ = val.mySamplingProbability; + lgCurrentCapacity_ = val.myLgCurrentCapacity; + retEntries_ = val.myRetEntries; + rebuildThreshold_ = val.myRebuildThreshold; + hashTable_ = val.myHashTable; + summaryTable_ = val.mySummaryTable; + } + + private static final class Validate { + //super fields + long myThetaLong; + boolean myEmpty; + //this fields + int myNomEntries; + int myLgResizeFactor; + float mySamplingProbability; + int myLgCurrentCapacity; + int myRetEntries; + int myRebuildThreshold; + long[] myHashTable; + S[] mySummaryTable; + + @SuppressWarnings("unchecked") + long validate( + final MemorySegment seg, + final SummaryDeserializer deserializer) { + Objects.requireNonNull(seg, "SourceMemory must not be null."); + Objects.requireNonNull(deserializer, "Deserializer must not be null."); + checkBounds(0, 8, seg.byteSize()); + + int offset = 0; + final byte preambleLongs = seg.get(JAVA_BYTE, offset++); //byte 0 PreLongs + final byte version = seg.get(JAVA_BYTE, offset++); //byte 1 SerVer + final byte familyId = seg.get(JAVA_BYTE, offset++); //byte 2 FamID + SerializerDeserializer.validateFamily(familyId, preambleLongs); + if (version > serialVersionUID) { + throw new SketchesArgumentException( + "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + + version); + } + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, offset++), //byte 3 + SerializerDeserializer.SketchType.QuickSelectSketch); + final byte flags = seg.get(JAVA_BYTE, offset++); //byte 4 + final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Endian byte order mismatch"); + } + myNomEntries = 1 << seg.get(JAVA_BYTE, offset++); //byte 5 + myLgCurrentCapacity = seg.get(JAVA_BYTE, offset++); //byte 6 + myLgResizeFactor = seg.get(JAVA_BYTE, offset++); //byte 7 + + checkBounds(0, preambleLongs * 8L, seg.byteSize()); + final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; + mySamplingProbability = isInSamplingMode ? seg.get(JAVA_FLOAT_UNALIGNED, offset) : 1f; //bytes 8 - 11 + if (isInSamplingMode) { + offset += Float.BYTES; + } + + final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; + if (isThetaIncluded) { + myThetaLong = seg.get(JAVA_LONG_UNALIGNED, offset); + offset += Long.BYTES; + } else { + myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability); + } + + int count = 0; + final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; + if (hasEntries) { + count = seg.get(JAVA_INT_UNALIGNED, offset); + offset += Integer.BYTES; + } + final int currentCapacity = 1 << myLgCurrentCapacity; + myHashTable = new long[currentCapacity]; + for (int i = 0; i < count; i++) { + final long hash = seg.get(JAVA_LONG_UNALIGNED, offset); + offset += Long.BYTES; + final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset); + final DeserializeResult summaryResult = deserializer.heapifySummary(segRegion); + final S summary = (S) summaryResult.getObject(); + offset += summaryResult.getSize(); + //in-place equivalent to insert(hash, summary): + final int index = HashOperations.hashInsertOnly(myHashTable, myLgCurrentCapacity, hash); + if (mySummaryTable == null) { + mySummaryTable = (S[]) Array.newInstance(summary.getClass(), myHashTable.length); + } + mySummaryTable[index] = summary; + myRetEntries++; + myEmpty = false; + } + myEmpty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; + myRebuildThreshold = setRebuildThreshold(myHashTable, myNomEntries); + return myThetaLong; + } + + } //end class Validate + + /** + * @return a deep copy of this sketch + */ + QuickSelectSketch copy() { + return new QuickSelectSketch<>(this); + } + + long[] getHashTable() { + return hashTable_; + } + + @Override + public int getRetainedEntries() { + return retEntries_; + } + + @Override + public int getCountLessThanThetaLong(final long thetaLong) { + return count(hashTable_, thetaLong); + } + + S[] getSummaryTable() { + return summaryTable_; + } + + /** + * Get configured nominal number of entries + * @return nominal number of entries + */ + public int getNominalEntries() { + return nomEntries_; + } + + /** + * Get log_base2 of Nominal Entries + * @return log_base2 of Nominal Entries + */ + public int getLgK() { + return exactLog2OfLong(nomEntries_); + } + + /** + * Get configured sampling probability + * @return sampling probability + */ + public float getSamplingProbability() { + return samplingProbability_; + } + + /** + * Get current capacity + * @return current capacity + */ + public int getCurrentCapacity() { + return 1 << lgCurrentCapacity_; + } + + /** + * Get configured resize factor + * @return resize factor + */ + public ResizeFactor getResizeFactor() { + return ResizeFactor.getRF(lgResizeFactor_); + } + + /** + * Rebuilds reducing the actual number of entries to the nominal number of entries if needed + */ + public void trim() { + if (retEntries_ > nomEntries_) { + updateTheta(); + resize(hashTable_.length); + } + } + + /** + * Resets this sketch an empty state. + */ + public void reset() { + empty_ = true; + retEntries_ = 0; + thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); + final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_); + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); + hashTable_ = new long[startingCapacity]; + summaryTable_ = null; // wait for the first summary to call Array.newInstance() + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); + } + + /** + * Converts the current state of the sketch into a compact sketch + * @return compact sketch + */ + @Override + @SuppressWarnings("unchecked") + public CompactSketch compact() { + if (getRetainedEntries() == 0) { + if (empty_) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); } + return new CompactSketch<>(null, null, thetaLong_, false); + } + final long[] hashArr = new long[getRetainedEntries()]; + final S[] summaryArr = Util.newSummaryArray(summaryTable_, getRetainedEntries()); + int i = 0; + for (int j = 0; j < hashTable_.length; j++) { + if (summaryTable_[j] != null) { + hashArr[i] = hashTable_[j]; + summaryArr[i] = (S)summaryTable_[j].copy(); + i++; + } + } + return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_); + } + + // Layout of first 8 bytes: + // Long || Start Byte Adr: + // Adr: + // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + // 0 || RF | lgArr | lgNom | Flags | SkType | FamID | SerVer | Preamble_Longs | + /** + * This serializes an UpdatableSketch (QuickSelectSketch). + * @return serialized representation of an UpdatableSketch (QuickSelectSketch). + * @deprecated As of 3.0.0, serializing an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Serializing a CompactSketch is not deprecated. + */ + @Deprecated + @Override + public byte[] toByteArray() { + byte[][] summariesBytes = null; + int summariesBytesLength = 0; + if (retEntries_ > 0) { + summariesBytes = new byte[retEntries_][]; + int i = 0; + for (int j = 0; j < summaryTable_.length; j++) { + if (summaryTable_[j] != null) { + summariesBytes[i] = summaryTable_[j].toByteArray(); + summariesBytesLength += summariesBytes[i].length; + i++; + } + } + } + int sizeBytes = + Byte.BYTES // preamble longs + + Byte.BYTES // serial version + + Byte.BYTES // family + + Byte.BYTES // sketch type + + Byte.BYTES // flags + + Byte.BYTES // log2(nomEntries) + + Byte.BYTES // log2(currentCapacity) + + Byte.BYTES; // log2(resizeFactor) + if (isInSamplingMode()) { + sizeBytes += Float.BYTES; // samplingProbability + } + final boolean isThetaIncluded = isInSamplingMode() + ? thetaLong_ < samplingProbability_ : thetaLong_ < Long.MAX_VALUE; + if (isThetaIncluded) { + sizeBytes += Long.BYTES; + } + if (retEntries_ > 0) { + sizeBytes += Integer.BYTES; // count + } + sizeBytes += Long.BYTES * retEntries_ + summariesBytesLength; + final byte[] bytes = new byte[sizeBytes]; + int offset = 0; + bytes[offset++] = PREAMBLE_LONGS; + bytes[offset++] = serialVersionUID; + bytes[offset++] = (byte) Family.TUPLE.getID(); + bytes[offset++] = (byte) SerializerDeserializer.SketchType.QuickSelectSketch.ordinal(); + final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + bytes[offset++] = (byte) ( + (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) + | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) + | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) + | (retEntries_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) + | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0) + ); + bytes[offset++] = (byte) Integer.numberOfTrailingZeros(nomEntries_); + bytes[offset++] = (byte) lgCurrentCapacity_; + bytes[offset++] = (byte) lgResizeFactor_; + if (samplingProbability_ < 1f) { + ByteArrayUtil.putFloatLE(bytes, offset, samplingProbability_); + offset += Float.BYTES; + } + if (isThetaIncluded) { + ByteArrayUtil.putLongLE(bytes, offset, thetaLong_); + offset += Long.BYTES; + } + if (retEntries_ > 0) { + ByteArrayUtil.putIntLE(bytes, offset, retEntries_); + offset += Integer.BYTES; + } + if (retEntries_ > 0) { + int i = 0; + for (int j = 0; j < hashTable_.length; j++) { + if (summaryTable_[j] != null) { + ByteArrayUtil.putLongLE(bytes, offset, hashTable_[j]); + offset += Long.BYTES; + System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length); + offset += summariesBytes[i].length; + i++; + } + } + } + return bytes; + } + + // non-public methods below + + // this is a special back door insert for merging + // not sufficient by itself without keeping track of theta of another sketch + @SuppressWarnings("unchecked") + void merge(final long hash, final S summary, final SummarySetOperations summarySetOps) { + empty_ = false; + if (hash > 0 && hash < thetaLong_) { + final int index = findOrInsert(hash); + if (index < 0) { + insertSummary(~index, (S)summary.copy()); //did not find, so insert + } else { + insertSummary(index, summarySetOps.union(summaryTable_[index], (S) summary.copy())); + } + rebuildIfNeeded(); + } + } + + boolean isInSamplingMode() { + return samplingProbability_ < 1f; + } + + void setThetaLong(final long theta) { + thetaLong_ = theta; + } + + void setEmpty(final boolean value) { + empty_ = value; + } + + int findOrInsert(final long hash) { + final int index = HashOperations.hashSearchOrInsert(hashTable_, lgCurrentCapacity_, hash); + if (index < 0) { + retEntries_++; + } + return index; + } + + boolean rebuildIfNeeded() { + if (retEntries_ <= rebuildThreshold_) { + return false; + } + if (hashTable_.length > nomEntries_) { + updateTheta(); + rebuild(); + } else { + resize(hashTable_.length * (1 << lgResizeFactor_)); + } + return true; + } + + void rebuild() { + resize(hashTable_.length); + } + + void insert(final long hash, final S summary) { + final int index = HashOperations.hashInsertOnly(hashTable_, lgCurrentCapacity_, hash); + insertSummary(index, summary); + retEntries_++; + empty_ = false; + } + + private void updateTheta() { + final long[] hashArr = new long[retEntries_]; + int i = 0; + //Because of the association of the hashTable with the summaryTable we cannot destroy the + // hashTable structure. So we must copy. May as well compact at the same time. + // Might consider a whole table clone and use the selectExcludingZeros method instead. + // Not sure if there would be any speed advantage. + for (int j = 0; j < hashTable_.length; j++) { + if (summaryTable_[j] != null) { + hashArr[i++] = hashTable_[j]; + } + } + thetaLong_ = QuickSelect.select(hashArr, 0, retEntries_ - 1, nomEntries_); + } + + private void resize(final int newSize) { + final long[] oldHashTable = hashTable_; + final S[] oldSummaryTable = summaryTable_; + hashTable_ = new long[newSize]; + summaryTable_ = Util.newSummaryArray(summaryTable_, newSize); + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize); + retEntries_ = 0; + for (int i = 0; i < oldHashTable.length; i++) { + if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) { + insert(oldHashTable[i], oldSummaryTable[i]); + } + } + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); + } + + private static int setRebuildThreshold(final long[] hashTable, final int nomEntries) { + if (hashTable.length > nomEntries) { + return (int) (hashTable.length * ThetaUtil.REBUILD_THRESHOLD); + } else { + return (int) (hashTable.length * ThetaUtil.RESIZE_THRESHOLD); + } + } + + @SuppressWarnings("unchecked") + protected void insertSummary(final int index, final S summary) { + if (summaryTable_ == null) { + summaryTable_ = (S[]) Array.newInstance(summary.getClass(), hashTable_.length); + } + summaryTable_[index] = summary; + } + + @Override + public TupleSketchIterator iterator() { + return new TupleSketchIterator<>(hashTable_, summaryTable_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java new file mode 100644 index 000000000..cbe40ae4c --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * Multipurpose serializer-deserializer for a collection of sketches defined by the enum. + */ +public final class SerializerDeserializer { + + /** + * Defines the sketch classes that this SerializerDeserializer can handle. + */ + public static enum SketchType { + /** QuickSelectSketch */ + QuickSelectSketch, + /** CompactSketch */ + CompactSketch, + /** ArrayOfDoublesQuickSelectSketch */ + ArrayOfDoublesQuickSelectSketch, + /** ArrayOfDoublesCompactSketch */ + ArrayOfDoublesCompactSketch, + /** ArrayOfDoublesUnion */ + ArrayOfDoublesUnion } + + static final int TYPE_BYTE_OFFSET = 3; + + /** + * Validates the preamble-Longs value given the family ID + * @param familyId the given family ID + * @param preambleLongs the given preambleLongs value + */ + public static void validateFamily(final byte familyId, final byte preambleLongs) { + final Family family = Family.idToFamily(familyId); + if (family.equals(Family.TUPLE)) { + if (preambleLongs < Family.TUPLE.getMinPreLongs() || preambleLongs > Family.TUPLE.getMaxPreLongs()) { + throw new SketchesArgumentException( + "Possible corruption: Invalid PreambleLongs value for family TUPLE: " + preambleLongs); + } + } else { + throw new SketchesArgumentException( + "Possible corruption: Invalid Family: " + family.toString()); + } + } + + /** + * Validates the sketch type byte versus the expected value + * @param sketchTypeByte the given sketch type byte + * @param expectedType the expected value + */ + public static void validateType(final byte sketchTypeByte, final SketchType expectedType) { + final SketchType sketchType = getSketchType(sketchTypeByte); + if (!sketchType.equals(expectedType)) { + throw new SketchesArgumentException("Sketch Type mismatch. Expected " + expectedType.name() + + ", got " + sketchType.name()); + } + } + + /** + * Gets the sketch type byte from the given MemorySegment image + * @param seg the given MemorySegment image + * @return the SketchType + */ + public static SketchType getSketchType(final MemorySegment seg) { + final byte sketchTypeByte = seg.get(JAVA_BYTE, TYPE_BYTE_OFFSET); + return getSketchType(sketchTypeByte); + } + + private static SketchType getSketchType(final byte sketchTypeByte) { + if ((sketchTypeByte < 0) || (sketchTypeByte >= SketchType.values().length)) { + throw new SketchesArgumentException("Invalid Sketch Type " + sketchTypeByte); + } + return SketchType.values()[sketchTypeByte]; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketch.java b/src/main/java/org/apache/datasketches/tuple2/Sketch.java new file mode 100644 index 000000000..20315b03d --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Sketch.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static org.apache.datasketches.common.Util.LS; + +import org.apache.datasketches.thetacommon.BinomialBoundsN; + +/** + * This is an equivalent to org.apache.datasketches.theta2.Sketch with + * addition of a user-defined Summary object associated with every unique entry + * in the sketch. + * @param Type of Summary + */ +public abstract class Sketch { + + protected static final byte PREAMBLE_LONGS = 1; + + long thetaLong_; + boolean empty_ = true; + protected SummaryFactory summaryFactory_ = null; + + Sketch(final long thetaLong, final boolean empty, final SummaryFactory summaryFactory) { + this.thetaLong_ = thetaLong; + this.empty_ = empty; + this.summaryFactory_ = summaryFactory; + } + + /** + * Converts this sketch to a CompactSketch on the Java heap. + * + *

If this sketch is already in compact form this operation returns this. + * + * @return this sketch as a CompactSketch on the Java heap. + */ + public abstract CompactSketch compact(); + + /** + * Estimates the cardinality of the set (number of unique values presented to the sketch) + * @return best estimate of the number of unique values + */ + public double getEstimate() { + if (!isEstimationMode()) { return getRetainedEntries(); } + return getRetainedEntries() / getTheta(); + } + + /** + * Gets the approximate upper error bound given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @return the upper bound. + */ + public double getUpperBound(final int numStdDev) { + if (!isEstimationMode()) { return getRetainedEntries(); } + return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, empty_); + } + + /** + * Gets the approximate lower error bound given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @return the lower bound. + */ + public double getLowerBound(final int numStdDev) { + if (!isEstimationMode()) { return getRetainedEntries(); } + return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, empty_); + } + + /** + * Gets the estimate of the true distinct population of subset tuples represented by the count + * of entries in a subset of the total retained entries of the sketch. + * @param numSubsetEntries number of entries for a chosen subset of the sketch. + * @return the estimate of the true distinct population of subset tuples represented by the count + * of entries in a subset of the total retained entries of the sketch. + */ + public double getEstimate(final int numSubsetEntries) { + if (!isEstimationMode()) { return numSubsetEntries; } + return numSubsetEntries / getTheta(); + } + + /** + * Gets the estimate of the lower bound of the true distinct population represented by the count + * of entries in a subset of the total retained entries of the sketch. + * @param numStdDev + * See Number of Standard Deviations + * @param numSubsetEntries number of entries for a chosen subset of the sketch. + * @return the estimate of the lower bound of the true distinct population represented by the count + * of entries in a subset of the total retained entries of the sketch. + */ + public double getLowerBound(final int numStdDev, final int numSubsetEntries) { + if (!isEstimationMode()) { return numSubsetEntries; } + return BinomialBoundsN.getLowerBound(numSubsetEntries, getTheta(), numStdDev, isEmpty()); + } + + /** + * Gets the estimate of the upper bound of the true distinct population represented by the count + * of entries in a subset of the total retained entries of the sketch. + * @param numStdDev + * See Number of Standard Deviations + * @param numSubsetEntries number of entries for a chosen subset of the sketch. + * @return the estimate of the upper bound of the true distinct population represented by the count + * of entries in a subset of the total retained entries of the sketch. + */ + public double getUpperBound(final int numStdDev, final int numSubsetEntries) { + if (!isEstimationMode()) { return numSubsetEntries; } + return BinomialBoundsN.getUpperBound(numSubsetEntries, getTheta(), numStdDev, isEmpty()); + } + + /** + * See Empty + * @return true if empty. + */ + public boolean isEmpty() { + return empty_; + } + + /** + * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode). + * This is true if theta < 1.0 AND isEmpty() is false. + * @return true if the sketch is in estimation mode. + */ + public boolean isEstimationMode() { + return thetaLong_ < Long.MAX_VALUE && !isEmpty(); + } + + /** + * Returns number of retained entries + * @return number of retained entries + */ + public abstract int getRetainedEntries(); + + /** + * Gets the number of hash values less than the given theta expressed as a long. + * @param thetaLong the given theta as a long between zero and Long.MAX_VALUE. + * @return the number of hash values less than the given thetaLong. + */ + public abstract int getCountLessThanThetaLong(final long thetaLong); + + /** + * Gets the Summary Factory class of type S + * @return the Summary Factory class of type S + */ + public SummaryFactory getSummaryFactory() { + return summaryFactory_; + } + + /** + * Gets the value of theta as a double between zero and one + * @return the value of theta as a double + */ + public double getTheta() { + return getThetaLong() / (double) Long.MAX_VALUE; + } + + /** + * This is to serialize a sketch instance to a byte array. + * + *

As of 3.0.0, serializing an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Serializing a CompactSketch is not deprecated.

+ * @return serialized representation of the sketch + */ + public abstract byte[] toByteArray(); + + /** + * Returns a SketchIterator + * @return a SketchIterator + */ + public abstract TupleSketchIterator iterator(); + + /** + * Returns Theta as a long + * @return Theta as a long + */ + public long getThetaLong() { + return isEmpty() ? Long.MAX_VALUE : thetaLong_; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); + sb.append(" Estimate : ").append(getEstimate()).append(LS); + sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); + sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); + sb.append(" Theta (double) : ").append(this.getTheta()).append(LS); + sb.append(" Theta (long) : ").append(this.getThetaLong()).append(LS); + sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); + sb.append(" Empty? : ").append(isEmpty()).append(LS); + sb.append(" Retained Entries : ").append(this.getRetainedEntries()).append(LS); + if (this instanceof UpdatableSketch) { + @SuppressWarnings("rawtypes") + final UpdatableSketch updatable = (UpdatableSketch) this; + sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); + sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); + sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); + sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); + } + sb.append("### END SKETCH SUMMARY").append(LS); + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketches.java b/src/main/java/org/apache/datasketches/tuple2/Sketches.java new file mode 100644 index 000000000..1fdc545dc --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Sketches.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.foreign.MemorySegment; + +/** + * Convenient static methods to instantiate generic tuple sketches. + */ +@SuppressWarnings("deprecation") +public final class Sketches { + + /** + * Creates an empty sketch. + * @param Type of Summary + * @return an empty instance of Sketch + */ + public static Sketch createEmptySketch() { + return new CompactSketch<>(null, null, Long.MAX_VALUE, true); + } + + /** + * Instantiate a Sketch from a given MemorySegment. + * @param Type of Summary + * @param seg MemorySegment object representing a Sketch + * @param deserializer instance of SummaryDeserializer + * @return Sketch created from its MemorySegment representation + */ + public static Sketch heapifySketch( + final MemorySegment seg, + final SummaryDeserializer deserializer) { + final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); + if (sketchType == SerializerDeserializer.SketchType.QuickSelectSketch) { + return new QuickSelectSketch<>(seg, deserializer, null); + } + return new CompactSketch<>(seg, deserializer); + } + + /** + * Instantiate UpdatableSketch from a given MemorySegment + * @param Type of update value + * @param Type of Summary + * @param seg MemorySegment object representing a Sketch + * @param deserializer instance of SummaryDeserializer + * @param summaryFactory instance of SummaryFactory + * @return Sketch created from its MemorySegment representation + */ + public static > UpdatableSketch heapifyUpdatableSketch( + final MemorySegment seg, + final SummaryDeserializer deserializer, + final SummaryFactory summaryFactory) { + return new UpdatableSketch<>(seg, deserializer, summaryFactory); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Summary.java b/src/main/java/org/apache/datasketches/tuple2/Summary.java new file mode 100644 index 000000000..8fb028401 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Summary.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * Interface for user-defined Summary, which is associated with every hash in a tuple sketch + */ +public interface Summary { + + /** + * Deep copy. + * + *

Caution: This must implement a deep copy. + * + * @return deep copy of the Summary + */ + public Summary copy(); + + /** + * This is to serialize a Summary instance to a byte array. + * + *

The user should encode in the byte array its total size, which is used during + * deserialization, especially if the Summary has variable sized elements. + * + * @return serialized representation of the Summary + */ + public byte[] toByteArray(); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java new file mode 100644 index 000000000..93d9f54d9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.foreign.MemorySegment; + +/** + * Interface for deserializing user-defined Summary + * @param type of Summary + */ +public interface SummaryDeserializer { + + /** + * This is to create an instance of a Summary given a serialized representation. + * The user may assume that the start of the given MemorySegment is the correct place to start + * deserializing. However, the user must be able to determine the number of bytes required to + * deserialize the summary as the capacity of the given MemorySegment may + * include multiple such summaries and may be much larger than required for a single summary. + * @param seg MemorySegment object with serialized representation of a Summary + * @return DeserializedResult object, which contains a Summary object and number of bytes read + * from the MemorySegment + */ + public DeserializeResult heapifySummary(MemorySegment seg); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java new file mode 100644 index 000000000..ea229a26c --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * Interface for user-defined SummaryFactory + * @param type of Summary + */ +public interface SummaryFactory { + + /** + * Returns new instance of Summary + * @return new instance of Summary + */ + public S newSummary(); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java new file mode 100644 index 000000000..b0df75dae --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * This is to provide methods of producing unions and intersections of two Summary objects. + * @param type of Summary + */ +public interface SummarySetOperations { + + /** + * This is called by the union operator when both sketches have the same hash value. + * + *

Caution: Do not modify the input Summary objects. Also do not return them directly, + * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is + * important to create a new Summary object with the correct contents to be returned. Do not + * return null summaries. + * + * @param a Summary from sketch A + * @param b Summary from sketch B + * @return union of Summary A and Summary B + */ + public S union(S a, S b); + + /** + * This is called by the intersection operator when both sketches have the same hash value. + * + *

Caution: Do not modify the input Summary objects. Also do not return them directly, + * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is + * important to create a new Summary object with the correct contents to be returned. Do not + * return null summaries. + * + * @param a Summary from sketch A + * @param b Summary from sketch B + * @return intersection of Summary A and Summary B + */ + public S intersection(S a, S b); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java new file mode 100644 index 000000000..a4faa6018 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * Iterator over a generic tuple sketch + * @param Type of Summary + */ +public class TupleSketchIterator { + + private final long[] hashArrTbl_; //could be either hashArr or hashTable + private final S[] summaryArrTbl_; //could be either summaryArr or summaryTable + private int i_; + + TupleSketchIterator(final long[] hashes, final S[] summaries) { + hashArrTbl_ = hashes; + summaryArrTbl_ = summaries; + i_ = -1; + } + + /** + * Advancing the iterator and checking existence of the next entry + * is combined here for efficiency. This results in an undefined + * state of the iterator before the first call of this method. + * @return true if the next element exists + */ + public boolean next() { + if (hashArrTbl_ == null) { return false; } + i_++; + while (i_ < hashArrTbl_.length) { + if (hashArrTbl_[i_] > 0) { return true; } + i_++; + } + return false; + } + + /** + * Gets the hash from the current entry in the sketch, which is a hash + * of the original key passed to update(). The original keys are not + * retained. Don't call this before calling next() for the first time + * or after getting false from next(). + * @return hash from the current entry + */ + public long getHash() { + return hashArrTbl_[i_]; + } + + /** + * Gets a Summary object from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return Summary object for the current entry (this is not a copy!) + */ + public S getSummary() { + return summaryArrTbl_[i_]; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Union.java b/src/main/java/org/apache/datasketches/tuple2/Union.java new file mode 100644 index 000000000..5945ad53c --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Union.java @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.Math.min; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.QuickSelect; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Compute the union of two or more generic tuple sketches or generic tuple sketches combined with + * theta sketches. A new instance represents an empty set. + * @param Type of Summary + */ +public class Union { + private final SummarySetOperations summarySetOps_; + private QuickSelectSketch qsk_; + private long unionThetaLong_; // need to maintain outside of the sketch + private boolean empty_; + + /** + * Creates new Union instance with instructions on how to process two summaries that + * overlap. This will have the default nominal entries (K). + * @param summarySetOps instance of SummarySetOperations + */ + public Union(final SummarySetOperations summarySetOps) { + this(ThetaUtil.DEFAULT_NOMINAL_ENTRIES, summarySetOps); + } + + /** + * Creates new Union instance. + * @param nomEntries nominal entries (K). Forced to the nearest power of 2 greater than + * given value. + * @param summarySetOps instance of SummarySetOperations + */ + public Union(final int nomEntries, final SummarySetOperations summarySetOps) { + summarySetOps_ = summarySetOps; + qsk_ = new QuickSelectSketch<>(nomEntries, null); + unionThetaLong_ = qsk_.getThetaLong(); + empty_ = true; + } + + /** + * Perform a stateless, pair-wise union operation between two tuple sketches. + * The returned sketch will be cut back to the smaller of the two k values if required. + * + *

Nulls and empty sketches are ignored.

+ * + * @param tupleSketchA The first argument + * @param tupleSketchB The second argument + * @return the result ordered CompactSketch on the heap. + */ + public CompactSketch union(final Sketch tupleSketchA, final Sketch tupleSketchB) { + reset(); + union(tupleSketchA); + union(tupleSketchB); + final CompactSketch csk = getResult(true); + return csk; + } + + /** + * Perform a stateless, pair-wise union operation between a tupleSketch and a thetaSketch. + * The returned sketch will be cut back to the smaller of the two k values if required. + * + *

Nulls and empty sketches are ignored.

+ * + * @param tupleSketch The first argument + * @param thetaSketch The second argument + * @param summary the given proxy summary for the theta sketch, which doesn't have one. + * This may not be null. + * @return the result ordered CompactSketch on the heap. + */ + public CompactSketch union(final Sketch tupleSketch, + final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) { + reset(); + union(tupleSketch); + union(thetaSketch, summary); + final CompactSketch csk = getResult(true); + return csk; + } + + /** + * Performs a stateful union of the internal set with the given tupleSketch. + * @param tupleSketch input tuple sketch to merge with the internal set. + * Nulls and empty sketches are ignored. + */ + public void union(final Sketch tupleSketch) { + if (tupleSketch == null || tupleSketch.isEmpty()) { return; } + empty_ = false; + unionThetaLong_ = min(tupleSketch.thetaLong_, unionThetaLong_); + final TupleSketchIterator it = tupleSketch.iterator(); + while (it.next()) { + qsk_.merge(it.getHash(), it.getSummary(), summarySetOps_); + } + unionThetaLong_ = min(unionThetaLong_, qsk_.thetaLong_); + } + + /** + * Performs a stateful union of the internal set with the given thetaSketch by combining entries + * using the hashes from the theta sketch and summary values from the given summary. + * @param thetaSketch the given theta sketch input. If null or empty, it is ignored. + * @param summary the given proxy summary for the theta sketch, which doesn't have one. This may + * not be null. + */ + public void union(final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) { + if (summary == null) { + throw new SketchesArgumentException("Summary cannot be null."); } + if (thetaSketch == null || thetaSketch.isEmpty()) { return; } + empty_ = false; + final long thetaIn = thetaSketch.getThetaLong(); + unionThetaLong_ = min(thetaIn, unionThetaLong_); + final org.apache.datasketches.theta2.HashIterator it = thetaSketch.iterator(); + while (it.next()) { + qsk_.merge(it.get(), summary, summarySetOps_); //copies summary + } + unionThetaLong_ = min(unionThetaLong_, qsk_.thetaLong_); + } + + /** + * Gets the result of a sequence of stateful union operations as an unordered CompactSketch + * @return result of the stateful unions so far. The state of this operation is not reset after the + * result is returned. + */ + public CompactSketch getResult() { + return getResult(false); + } + + /** + * Gets the result of a sequence of stateful union operations as an unordered CompactSketch. + * @param reset If true, clears this operator to the empty state after this result is + * returned. Set this to false if you wish to obtain an intermediate result. + * @return result of the stateful union + */ + @SuppressWarnings("unchecked") + public CompactSketch getResult(final boolean reset) { + final CompactSketch result; + if (empty_) { + result = qsk_.compact(); + } else if (unionThetaLong_ >= qsk_.thetaLong_ && qsk_.getRetainedEntries() <= qsk_.getNominalEntries()) { + //unionThetaLong_ >= qsk_.thetaLong_ means we can ignore unionThetaLong_. We don't need to rebuild. + //qsk_.getRetainedEntries() <= qsk_.getNominalEntries() means we don't need to pull back to k. + result = qsk_.compact(); + } else { + final long tmpThetaLong = min(unionThetaLong_, qsk_.thetaLong_); + + //count the number of valid hashes in because Alpha can have dirty values + int numHashesIn = 0; + TupleSketchIterator it = qsk_.iterator(); + while (it.next()) { //counts valid hashes + if (it.getHash() < tmpThetaLong) { numHashesIn++; } + } + + if (numHashesIn == 0) { + //numHashes == 0 && empty == false means Theta < 1.0 + //Therefore, this is a degenerate sketch: theta < 1.0, count = 0, empty = false + result = new CompactSketch<>(null, null, tmpThetaLong, empty_); + } + + else { + //we know: empty == false, count > 0 + final int numHashesOut; + final long thetaLongOut; + if (numHashesIn > qsk_.getNominalEntries()) { + //we need to trim hashes and need a new thetaLong + final long[] tmpHashArr = new long[numHashesIn]; // temporary, order will be destroyed by quick select + it = qsk_.iterator(); + int i = 0; + while (it.next()) { + final long hash = it.getHash(); + if (hash < tmpThetaLong) { tmpHashArr[i++] = hash; } + } + numHashesOut = qsk_.getNominalEntries(); + thetaLongOut = QuickSelect.select(tmpHashArr, 0, numHashesIn - 1, numHashesOut); + } else { + numHashesOut = numHashesIn; + thetaLongOut = tmpThetaLong; + } + //now prepare the output arrays + final long[] hashArr = new long[numHashesOut]; + final S[] summaries = Util.newSummaryArray(qsk_.getSummaryTable(), numHashesOut); + it = qsk_.iterator(); + int i = 0; + while (it.next()) { //select the qualifying hashes from the gadget synchronized with the summaries + final long hash = it.getHash(); + if (hash < thetaLongOut) { + hashArr[i] = hash; + summaries[i] = (S) it.getSummary().copy(); + i++; + } + } + result = new CompactSketch<>(hashArr, summaries, thetaLongOut, empty_); + } + } + if (reset) { reset(); } + return result; + } + + /** + * Resets the internal set to the initial state, which represents an empty set. This is only useful + * after sequences of stateful union operations. + */ + public void reset() { + qsk_.reset(); + unionThetaLong_ = qsk_.getThetaLong(); + empty_ = true; + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java new file mode 100644 index 000000000..c3b761f14 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import org.apache.datasketches.hash.MurmurHash3; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * An extension of QuickSelectSketch<S>, which can be updated with many types of keys. + * Summary objects are created using a user-defined SummaryFactory class, + * which should allow very flexible parameterization if needed. + * Keys are presented to a sketch along with values of a user-defined + * update type U. When an entry is inserted into a sketch or a duplicate key is + * presented to a sketch then summary.update(U value) method will be called. So + * any kind of user-defined accumulation is possible. Summaries also must know + * how to copy themselves. Also union and intersection of summaries can be + * implemented in a sub-class of SummarySetOperations, which will be used in + * case Union or Intersection of two instances of Tuple Sketch is needed + * @param Type of the value, which is passed to update method of a Summary + * @param Type of the UpdatableSummary<U> + */ +public class UpdatableSketch> extends QuickSelectSketch { + + /** + * This is to create a new instance of an UpdatableQuickSelectSketch. + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * or equal to the given value. + * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: + *
+   * 0 - no resizing (max size allocated),
+   * 1 - double internal hash table each time it reaches a threshold
+   * 2 - grow four times
+   * 3 - grow eight times (default)
+   * 
+ * @param samplingProbability + * See Sampling Probability + * @param summaryFactory An instance of a SummaryFactory. + */ + public UpdatableSketch(final int nomEntries, final int lgResizeFactor, + final float samplingProbability, final SummaryFactory summaryFactory) { + super(nomEntries, lgResizeFactor, samplingProbability, summaryFactory); + } + + /** + * This is to create an instance of a sketch given a serialized form + * @param srcSeg MemorySegment object with data of a serialized UpdatableSketch + * @param deserializer instance of SummaryDeserializer + * @param summaryFactory instance of SummaryFactory + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Heapifying a CompactSketch is not deprecated. + */ + @Deprecated + public UpdatableSketch( + final MemorySegment srcSeg, + final SummaryDeserializer deserializer, + final SummaryFactory summaryFactory) { + super(srcSeg, deserializer, summaryFactory); + } + + /** + * Copy Constructor + * @param sketch the sketch to copy + */ + public UpdatableSketch(final UpdatableSketch sketch) { + super(sketch); + } + + /** + * @return a deep copy of this sketch + */ + @Override + public UpdatableSketch copy() { + return new UpdatableSketch<>(this); + } + + /** + * Updates this sketch with a long key and U value. + * The value is passed to update() method of the Summary object associated with the key + * + * @param key The given long key + * @param value The given U value + */ + public void update(final long key, final U value) { + update(new long[] {key}, value); + } + + /** + * Updates this sketch with a double key and U value. + * The value is passed to update() method of the Summary object associated with the key + * + * @param key The given double key + * @param value The given U value + */ + public void update(final double key, final U value) { + update(Util.doubleToLongArray(key), value); + } + + /** + * Updates this sketch with a String key and U value. + * The value is passed to update() method of the Summary object associated with the key + * + * @param key The given String key + * @param value The given U value + */ + public void update(final String key, final U value) { + update(Util.stringToByteArray(key), value); + } + + /** + * Updates this sketch with a byte[] key and U value. + * The value is passed to update() method of the Summary object associated with the key + * + * @param key The given byte[] key + * @param value The given U value + */ + public void update(final byte[] key, final U value) { + if ((key == null) || (key.length == 0)) { return; } + insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + } + + /** + * Updates this sketch with a ByteBuffer and U value + * The value is passed to the update() method of the Summary object associated with the key + * + * @param buffer The given ByteBuffer key + * @param value The given U value + */ + public void update(final ByteBuffer buffer, final U value) { + if (buffer == null || buffer.hasRemaining() == false) { return; } + insertOrIgnore(MurmurHash3.hash(buffer, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + } + + /** + * Updates this sketch with a int[] key and U value. + * The value is passed to update() method of the Summary object associated with the key + * + * @param key The given int[] key + * @param value The given U value + */ + public void update(final int[] key, final U value) { + if ((key == null) || (key.length == 0)) { return; } + insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + } + + /** + * Updates this sketch with a long[] key and U value. + * The value is passed to update() method of the Summary object associated with the key + * + * @param key The given long[] key + * @param value The given U value + */ + public void update(final long[] key, final U value) { + if ((key == null) || (key.length == 0)) { return; } + insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + } + + void insertOrIgnore(final long hash, final U value) { + setEmpty(false); + if (hash >= getThetaLong()) { return; } + int index = findOrInsert(hash); + if (index < 0) { + index = ~index; + insertSummary(index, getSummaryFactory().newSummary()); + } + summaryTable_[index].update(value); + rebuildIfNeeded(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java new file mode 100644 index 000000000..e0343d302 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * For building a new generic tuple UpdatableSketch + * @param Type of update value + * @param Type of Summary + */ +public class UpdatableSketchBuilder> { + + private int nomEntries_; + private ResizeFactor resizeFactor_; + private float samplingProbability_; + private final SummaryFactory summaryFactory_; + + private static final float DEFAULT_SAMPLING_PROBABILITY = 1; + private static final ResizeFactor DEFAULT_RESIZE_FACTOR = ResizeFactor.X8; + + /** + * Creates an instance of UpdatableSketchBuilder with default parameters + * @param summaryFactory An instance of SummaryFactory. + */ + public UpdatableSketchBuilder(final SummaryFactory summaryFactory) { + nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES; + resizeFactor_ = DEFAULT_RESIZE_FACTOR; + samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; + summaryFactory_ = summaryFactory; + } + + /** + * This is to set the nominal number of entries. + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * or equal to the given value. + * @return this UpdatableSketchBuilder + */ + public UpdatableSketchBuilder setNominalEntries(final int nomEntries) { + nomEntries_ = 1 << ThetaUtil.checkNomLongs(nomEntries); + return this; + } + + /** + * This is to set the resize factor. + * Value of X1 means that the maximum capacity is allocated from the start. + * Default resize factor is X8. + * @param resizeFactor value of X1, X2, X4 or X8 + * @return this UpdatableSketchBuilder + */ + public UpdatableSketchBuilder setResizeFactor(final ResizeFactor resizeFactor) { + resizeFactor_ = resizeFactor; + return this; + } + + /** + * This is to set sampling probability. + * Default probability is 1. + * @param samplingProbability sampling probability from 0 to 1 + * @return this UpdatableSketchBuilder + */ + public UpdatableSketchBuilder setSamplingProbability(final float samplingProbability) { + if ((samplingProbability < 0) || (samplingProbability > 1f)) { + throw new SketchesArgumentException("sampling probability must be between 0 and 1"); + } + samplingProbability_ = samplingProbability; + return this; + } + + /** + * Returns an UpdatableSketch with the current configuration of this Builder. + * @return an UpdatableSketch + */ + public UpdatableSketch build() { + return new UpdatableSketch<>(nomEntries_, resizeFactor_.lg(), samplingProbability_, + summaryFactory_); + } + + /** + * Resets the Nominal Entries, Resize Factor and Sampling Probability to their default values. + * The assignment of U and S remain the same. + */ + public void reset() { + nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES; + resizeFactor_ = DEFAULT_RESIZE_FACTOR; + samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java new file mode 100644 index 000000000..2ec5df695 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * Interface for updating user-defined Summary + * @param type of update value + */ +public interface UpdatableSummary extends Summary { + + /** + * This is to provide a method of updating summaries. + * This is primarily used internally. + * @param value update value + * @return this + */ + UpdatableSummary update(U value); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/Util.java b/src/main/java/org/apache/datasketches/tuple2/Util.java new file mode 100644 index 000000000..f2f8227ea --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/Util.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.hash.MurmurHash3.hash; +import static org.apache.datasketches.hash.XxHash.hashCharArr; +import static org.apache.datasketches.hash.XxHash.hashString; + +import java.lang.reflect.Array; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Common utility functions for Tuples + */ +public final class Util { + private static final int PRIME = 0x7A3C_CA71; + + /** + * Converts a double to a long[]. + * @param value the given double value + * @return the long array + */ + public static final long[] doubleToLongArray(final double value) { + final double d = (value == 0.0) ? 0.0 : value; // canonicalize -0.0, 0.0 + final long[] array = { Double.doubleToLongBits(d) }; // canonicalize all NaN & +/- infinity forms + return array; + } + + /** + * Converts a String to a UTF_8 byte array. If the given value is either null or empty this + * method returns null. + * @param value the given String value + * @return the UTF_8 byte array + */ + public static final byte[] stringToByteArray(final String value) { + if ((value == null) || value.isEmpty()) { return null; } + return value.getBytes(UTF_8); + } + + /** + * Computes and checks the 16-bit seed hash from the given long seed. + * The seed hash may not be zero in order to maintain compatibility with older serialized + * versions that did not have this concept. + * @param seed See Update Hash Seed + * @return the seed hash. + */ + public static short computeSeedHash(final long seed) { + final long[] seedArr = {seed}; + final short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL); + if (seedHash == 0) { + throw new SketchesArgumentException( + "The given seed: " + seed + " produced a seedHash of zero. " + + "You must choose a different seed."); + } + return seedHash; + } + + /** + * Checks the two given seed hashes. If they are not equal, this method throws an Exception. + * @param seedHashA given seed hash A + * @param seedHashB given seed hash B + */ + public static final void checkSeedHashes(final short seedHashA, final short seedHashB) { + if (seedHashA != seedHashB) { + throw new SketchesArgumentException("Incompatible Seed Hashes. " + seedHashA + ", " + + seedHashB); + } + } + + /** + * Gets the starting capacity of a new sketch given the Nominal Entries and the log Resize Factor. + * @param nomEntries the given Nominal Entries + * @param lgResizeFactor the given log Resize Factor + * @return the starting capacity + */ + public static int getStartingCapacity(final int nomEntries, final int lgResizeFactor) { + return 1 << ThetaUtil.startingSubMultiple( + // target table size is twice the number of nominal entries + Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2), + lgResizeFactor, + ThetaUtil.MIN_LG_ARR_LONGS + ); + } + + /** + * Concatenate array of Strings to a single String. + * @param strArr the given String array + * @return the concatenated String + */ + public static String stringConcat(final String[] strArr) { + final int len = strArr.length; + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < len; i++) { + sb.append(strArr[i]); + if ((i + 1) < len) { sb.append(','); } + } + return sb.toString(); + } + + /** + * Returns the hash of the given string + * @param s the string to hash + * @return the hash of the given string + */ + public static long stringHash(final String s) { + return hashString(s, 0, s.length(), PRIME); + } + + /** + * Returns the hash of the concatenated strings + * @param strArray array of Strings + * @return the hash of concatenated strings. + */ + public static long stringArrHash(final String[] strArray) { + final String s = stringConcat(strArray); + return hashCharArr(s.toCharArray(), 0, s.length(), PRIME); + } + + /** + * Will copy compact summary arrays as well as hashed summary tables (with nulls). + * @param type of summary + * @param summaryArr the given summary array or table + * @return the copy + */ + @SuppressWarnings("unchecked") + public static S[] copySummaryArray(final S[] summaryArr) { + final int len = summaryArr.length; + final S[] tmpSummaryArr = newSummaryArray(summaryArr, len); + for (int i = 0; i < len; i++) { + final S summary = summaryArr[i]; + if (summary == null) { continue; } + tmpSummaryArr[i] = (S) summary.copy(); + } + return tmpSummaryArr; + } + + /** + * Creates a new Summary Array with the specified length + * @param summaryArr example array, only used to obtain the component type. It has no data. + * @param length the desired length of the returned array. + * @param the summary class type + * @return a new Summary Array with the specified length + */ + @SuppressWarnings("unchecked") + public static S[] newSummaryArray(final S[] summaryArr, final int length) { + final Class summaryType = (Class) summaryArr.getClass().getComponentType(); + final S[] tmpSummaryArr = (S[]) Array.newInstance(summaryType, length); + return tmpSummaryArr; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java new file mode 100644 index 000000000..ee17bdd7b --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.tuple2.UpdatableSketch; + +/** + * Extends UpdatableSketch<Double, DoubleSummary> + * @author Lee Rhodes + */ +public class DoubleSketch extends UpdatableSketch { + + /** + * Constructs this sketch with given lgK. + * @param lgK Log_base2 of Nominal Entries. + * See Nominal Entries + * @param mode The DoubleSummary mode to be used + */ + public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) { + this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode); + } + + /** + * Creates this sketch with the following parameters: + * @param lgK Log_base2 of Nominal Entries. + * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: + *
+   * 0 - no resizing (max size allocated),
+   * 1 - double internal hash table each time it reaches a threshold
+   * 2 - grow four times
+   * 3 - grow eight times (default)
+   * 
+ * @param samplingProbability + * See Sampling Probability + * @param mode The DoubleSummary mode to be used + */ + public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplingProbability, + final DoubleSummary.Mode mode) { + super(1 << lgK, lgResizeFactor, samplingProbability, new DoubleSummaryFactory(mode)); + } + + /** + * Constructs this sketch from a MemorySegment image, which must be from an DoubleSketch, and + * usually with data. + * @param seg the given MemorySegment + * @param mode The DoubleSummary mode to be used + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Heapifying a CompactSketch is not deprecated. + */ + @Deprecated + public DoubleSketch(final MemorySegment seg, final DoubleSummary.Mode mode) { + super(seg, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); + } + + @Override + public void update(final String key, final Double value) { + super.update(key, value); + } + + @Override + public void update(final long key, final Double value) { + super.update(key, value); + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java new file mode 100644 index 000000000..c27051423 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.tuple2.DeserializeResult; +import org.apache.datasketches.tuple2.UpdatableSummary; + +/** + * Summary for generic tuple sketches of type Double. + * This summary keeps a double value. On update a predefined operation is performed depending on + * the mode. + * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum. + */ +public final class DoubleSummary implements UpdatableSummary { + private double value_; + private final Mode mode_; + + /** + * The aggregation modes for this Summary + */ + public enum Mode { + + /** + * The aggregation mode is the summation function. + * + *

New retained value = previous retained value + incoming value

+ */ + Sum, + + /** + * The aggregation mode is the minimum function. + * + *

New retained value = min(previous retained value, incoming value)

+ */ + Min, + + /** + * The aggregation mode is the maximum function. + * + *

New retained value = max(previous retained value, incoming value)

+ */ + Max, + + /** + * The aggregation mode is always one. + * + *

New retained value = 1.0

+ */ + AlwaysOne + } + + /** + * Creates an instance of DoubleSummary with a given starting value and mode + * @param value starting value + * @param mode update mode + */ + private DoubleSummary(final double value, final Mode mode) { + value_ = value; + mode_ = mode; + } + + /** + * Creates an instance of DoubleSummary with a given mode. + * @param mode update mode + */ + public DoubleSummary(final Mode mode) { + mode_ = mode; + switch (mode) { + case Sum: + value_ = 0; + break; + case Min: + value_ = Double.POSITIVE_INFINITY; + break; + case Max: + value_ = Double.NEGATIVE_INFINITY; + break; + case AlwaysOne: + value_ = 1.0; + break; + } + } + + @Override + public DoubleSummary update(final Double value) { + switch (mode_) { + case Sum: + value_ += value; + break; + case Min: + if (value < value_) { value_ = value; } + break; + case Max: + if (value > value_) { value_ = value; } + break; + case AlwaysOne: + value_ = 1.0; + break; + } + return this; + } + + @Override + public DoubleSummary copy() { + return new DoubleSummary(value_, mode_); + } + + /** + * Returns current value of the DoubleSummary + * @return current value of the DoubleSummary + */ + public double getValue() { + return value_; + } + + private static final int SERIALIZED_SIZE_BYTES = 9; + private static final int VALUE_INDEX = 0; + private static final int MODE_BYTE_INDEX = 8; + + @Override + public byte[] toByteArray() { + final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; + ByteArrayUtil.putDoubleLE(bytes, VALUE_INDEX, value_); + bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal(); + return bytes; + } + + /** + * Creates an instance of the DoubleSummary given a serialized representation + * @param seg MemorySegment object with serialized DoubleSummary + * @return DeserializedResult object, which contains a DoubleSummary object and number of bytes + * read from the MemorySegment + */ + public static DeserializeResult fromMemory(final MemorySegment seg) { + return new DeserializeResult<>(new DoubleSummary(seg.get(JAVA_DOUBLE_UNALIGNED, VALUE_INDEX), + Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java new file mode 100644 index 000000000..ef9854d78 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.tuple2.DeserializeResult; +import org.apache.datasketches.tuple2.SummaryDeserializer; + +/** + * Implements SummaryDeserializer<DoubleSummary> + * @author Lee Rhodes + */ +public class DoubleSummaryDeserializer implements SummaryDeserializer { + + @Override + public DeserializeResult heapifySummary(final MemorySegment seg) { + return DoubleSummary.fromMemory(seg); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java new file mode 100644 index 000000000..18fa33fe1 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import org.apache.datasketches.tuple2.SummaryFactory; + +/** + * Factory for DoubleSummary. + * + * @author Lee Rhodes + */ +public final class DoubleSummaryFactory implements SummaryFactory { + + private final DoubleSummary.Mode summaryMode_; + + /** + * Creates an instance of DoubleSummaryFactory with a given mode + * @param summaryMode summary mode + */ + public DoubleSummaryFactory(final DoubleSummary.Mode summaryMode) { + summaryMode_ = summaryMode; + } + + @Override + public DoubleSummary newSummary() { + return new DoubleSummary(summaryMode_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java new file mode 100644 index 000000000..7bad24567 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import org.apache.datasketches.tuple2.SummarySetOperations; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; + +/** + * Methods for defining how unions and intersections of two objects of type DoubleSummary + * are performed. + */ +public final class DoubleSummarySetOperations implements SummarySetOperations { + + private final Mode unionSummaryMode_; + + /** + * Intersection is not well defined or even meaningful between numeric values. + * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes. + */ + private final Mode intersectionSummaryMode_; + + /** + * Creates an instance with default mode of sum for both union and intersection. + * This exists for backward compatibility. + */ + public DoubleSummarySetOperations() { + unionSummaryMode_ = DoubleSummary.Mode.Sum; + intersectionSummaryMode_ = DoubleSummary.Mode.Sum; + } + + /** + * Creates an instance given a DoubleSummary update mode where the mode is the same for both + * union and intersection. This exists for backward compatibility. + * @param summaryMode DoubleSummary update mode. + */ + public DoubleSummarySetOperations(final Mode summaryMode) { + unionSummaryMode_ = summaryMode; + intersectionSummaryMode_ = summaryMode; + } + + /** + * Creates an instance with two modes. + * @param unionSummaryMode for unions + * @param intersectionSummaryMode for intersections + */ + public DoubleSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) { + unionSummaryMode_ = unionSummaryMode; + intersectionSummaryMode_ = intersectionSummaryMode; + } + + @Override + public DoubleSummary union(final DoubleSummary a, final DoubleSummary b) { + final DoubleSummary result = new DoubleSummary(unionSummaryMode_); + result.update(a.getValue()); + result.update(b.getValue()); + return result; + } + + @Override + public DoubleSummary intersection(final DoubleSummary a, final DoubleSummary b) { + final DoubleSummary result = new DoubleSummary(intersectionSummaryMode_); + result.update(a.getValue()); + result.update(b.getValue()); + return result; + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java b/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java new file mode 100644 index 000000000..c72f3df00 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * This package is for a generic implementation of the Tuple sketch for single Double value. + */ +package org.apache.datasketches.tuple2.adouble; diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java new file mode 100644 index 000000000..a344f5ef2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.tuple2.UpdatableSketch; + +/** + * Extends UpdatableSketch<Integer, IntegerSummary> + * @author Lee Rhodes + */ +public class IntegerSketch extends UpdatableSketch { + + /** + * Constructs this sketch with given lgK. + * @param lgK Log_base2 of Nominal Entries. + * See Nominal Entries + * @param mode The IntegerSummary mode to be used + */ + public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) { + this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode); + } + + /** + * Creates this sketch with the following parameters: + * @param lgK Log_base2 of Nominal Entries. + * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: + *
+   * 0 - no resizing (max size allocated),
+   * 1 - double internal hash table each time it reaches a threshold
+   * 2 - grow four times
+   * 3 - grow eight times (default)
+   * 
+ * @param samplingProbability + * See Sampling Probability + * @param mode The IntegerSummary mode to be used + */ + public IntegerSketch(final int lgK, final int lgResizeFactor, final float samplingProbability, + final IntegerSummary.Mode mode) { + super(1 << lgK, lgResizeFactor, samplingProbability, new IntegerSummaryFactory(mode)); + } + + /** + * Constructs this sketch from a MemorySegment image, which must be from an IntegerSketch, and + * usually with data. + * @param seg the given MemorySegment + * @param mode The IntegerSummary mode to be used + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Heapifying a CompactSketch is not deprecated. + */ + @Deprecated + public IntegerSketch(final MemorySegment seg, final IntegerSummary.Mode mode) { + super(seg, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode)); + } + + @Override + public void update(final String key, final Integer value) { + super.update(key, value); + } + + @Override + public void update(final long key, final Integer value) { + super.update(key, value); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java new file mode 100644 index 000000000..c7f25ecd9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.tuple2.DeserializeResult; +import org.apache.datasketches.tuple2.UpdatableSummary; + +/** + * Summary for generic tuple sketches of type Integer. + * This summary keeps an Integer value. On update a predefined operation is performed depending on + * the mode. + * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum. + */ +public class IntegerSummary implements UpdatableSummary { + private int value_; + private final Mode mode_; + + /** + * The aggregation modes for this Summary + */ + public enum Mode { + + /** + * The aggregation mode is the summation function. + * + *

New retained value = previous retained value + incoming value

+ */ + Sum, + + /** + * The aggregation mode is the minimum function. + * + *

New retained value = min(previous retained value, incoming value)

+ */ + Min, + + /** + * The aggregation mode is the maximum function. + * + *

New retained value = max(previous retained value, incoming value)

+ */ + Max, + + /** + * The aggregation mode is always one. + * + *

New retained value = 1

+ */ + AlwaysOne + } + + /** + * Creates an instance of IntegerSummary with a given starting value and mode. + * @param value starting value + * @param mode update mode + */ + private IntegerSummary(final int value, final Mode mode) { + value_ = value; + mode_ = mode; + } + + /** + * Creates an instance of IntegerSummary with a given mode. + * @param mode update mode. This should not be called by a user. + */ + public IntegerSummary(final Mode mode) { + mode_ = mode; + switch (mode) { + case Sum: + value_ = 0; + break; + case Min: + value_ = Integer.MAX_VALUE; + break; + case Max: + value_ = Integer.MIN_VALUE; + break; + case AlwaysOne: + value_ = 1; + break; + } + } + + @Override + public IntegerSummary update(final Integer value) { + switch (mode_) { + case Sum: + value_ += value; + break; + case Min: + if (value < value_) { value_ = value; } + break; + case Max: + if (value > value_) { value_ = value; } + break; + case AlwaysOne: + value_ = 1; + break; + } + return this; + } + + @Override + public IntegerSummary copy() { + return new IntegerSummary(value_, mode_); + } + + /** + * Returns the current value of the IntegerSummary + * @return current value of the IntegerSummary + */ + public int getValue() { + return value_; + } + + private static final int SERIALIZED_SIZE_BYTES = 5; + private static final int VALUE_INDEX = 0; + private static final int MODE_BYTE_INDEX = 4; + + @Override + public byte[] toByteArray() { + final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; + ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_); + bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal(); + return bytes; + } + + /** + * Creates an instance of the IntegerSummary given a serialized representation + * @param seg MemorySegment object with serialized IntegerSummary + * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes + * read from the MemorySegment + */ + public static DeserializeResult fromMemory(final MemorySegment seg) { + return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX), + Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java new file mode 100644 index 000000000..b981e0db2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.tuple2.DeserializeResult; +import org.apache.datasketches.tuple2.SummaryDeserializer; + +/** + * Implements SummaryDeserializer<IntegerSummary> + * @author Lee Rhodes + */ +public class IntegerSummaryDeserializer implements SummaryDeserializer { + + @Override + public DeserializeResult heapifySummary(final MemorySegment seg) { + return IntegerSummary.fromMemory(seg); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java new file mode 100644 index 000000000..97bf9df24 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import org.apache.datasketches.tuple2.SummaryFactory; + +/** + * Factory for IntegerSummary. + * + * @author Lee Rhodes + */ +public class IntegerSummaryFactory implements SummaryFactory { + + private final IntegerSummary.Mode summaryMode_; + + /** + * Creates an instance of IntegerSummaryFactory with a given mode + * @param summaryMode summary mode + */ + public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) { + summaryMode_ = summaryMode; + } + + @Override + public IntegerSummary newSummary() { + return new IntegerSummary(summaryMode_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java new file mode 100644 index 000000000..5e5555d22 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import org.apache.datasketches.tuple2.SummarySetOperations; +import org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode; + +/** + * Methods for defining how unions and intersections of two objects of type IntegerSummary + * are performed. + * + * @author Lee Rhodes + */ +public class IntegerSummarySetOperations implements SummarySetOperations { + + private final Mode unionSummaryMode_; + + /** + * Intersection is not well defined or even meaningful between numeric values. + * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes. + */ + private final Mode intersectionSummaryMode_; + + /** + * Creates a new instance with two modes + * @param unionSummaryMode for unions + * @param intersectionSummaryMode for intersections + */ + public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) { + unionSummaryMode_ = unionSummaryMode; + intersectionSummaryMode_ = intersectionSummaryMode; + } + + @Override + public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) { + final IntegerSummary result = new IntegerSummary(unionSummaryMode_); + result.update(a.getValue()); + result.update(b.getValue()); + return result; + } + + @Override + public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) { + final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_); + result.update(a.getValue()); + result.update(b.getValue()); + return result; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java new file mode 100644 index 000000000..a80924a62 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * This package is for a generic implementation of the Tuple sketch for single Integer value. + */ +package org.apache.datasketches.tuple2.aninteger; diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java new file mode 100644 index 000000000..55e96be42 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +/** + * Computes a set difference of two tuple sketches of type ArrayOfDoubles + */ +public abstract class ArrayOfDoublesAnotB { + + ArrayOfDoublesAnotB() {} + + /** + * Perform A-and-not-B set operation on the two given sketches. + * A null sketch is interpreted as an empty sketch. + * This is not an accumulating update. Calling update() more than once + * without calling getResult() will discard the result of previous update(). + * Both input sketches must have the same numValues. + * + * @param a The incoming sketch for the first argument + * @param b The incoming sketch for the second argument + */ + public abstract void update(ArrayOfDoublesSketch a, ArrayOfDoublesSketch b); + + /** + * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch + * @return compact sketch representing the result of the operation + */ + public abstract ArrayOfDoublesCompactSketch getResult(); + + /** + * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch + * @param seg MemorySegment for the result (can be null) + * @return compact sketch representing the result of the operation (off-heap if MemorySegment is + * provided) + */ + public abstract ArrayOfDoublesCompactSketch getResult(MemorySegment seg); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java new file mode 100644 index 000000000..30a18c1e4 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.exactLog2OfLong; +import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable; +import static org.apache.datasketches.thetacommon.HashOperations.count; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.thetacommon.SetOperationCornerCases; +import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; +import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.Util; + +/** + * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches. + * + *

This class includes a stateless operation as follows:

+ * + *

+ * CompactSketch csk = anotb.aNotB(ArrayOfDoublesSketch skA, ArrayOfDoublesSketch skB);
+ * 
+ * + * @author Lee Rhodes + */ +public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { + private int numValues_; + private short seedHash_; + + private long thetaLong_ = Long.MAX_VALUE; + private boolean empty_ = true; + private long[] keys_; + private double[] values_; + private int count_; + + ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { + numValues_ = numValues; + seedHash_ = Util.computeSeedHash(seed); + } + + @Override + @SuppressFBWarnings(value = "EI_EXPOSE_REP2", justification = "This is OK here") + public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) { + if (skA == null || skB == null) { + throw new SketchesArgumentException("Neither argument may be null."); + } + numValues_ = skA.getNumValues(); + seedHash_ = skA.getSeedHash(); + if (numValues_ != skB.getNumValues()) { + throw new SketchesArgumentException("Inputs cannot have different numValues"); + } + if (seedHash_ != skB.getSeedHash()) { + throw new SketchesArgumentException("Inputs cannot have different seedHashes"); + } + + final long thetaLongA = skA.getThetaLong(); + final int countA = skA.getRetainedEntries(); + final boolean emptyA = skA.isEmpty(); + + final long thetaLongB = skB.getThetaLong(); + final int countB = skB.getRetainedEntries(); + final boolean emptyB = skB.isEmpty(); + + final int id = + SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); + final CornerCase cCase = CornerCase.caseIdToCornerCase(id); + final AnotbAction anotbAction = cCase.getAnotbAction(); + + final long minThetaLong = min(thetaLongA, thetaLongB); + + switch (anotbAction) { + case EMPTY_1_0_T: { + reset(); + break; + } + case DEGEN_MIN_0_F: { + keys_ = null; + values_ = null; + thetaLong_ = minThetaLong; + empty_ = false; + count_ = 0; + break; + } + case DEGEN_THA_0_F: { + keys_ = null; + values_ = null; + thetaLong_ = thetaLongA; + empty_ = false; + count_ = 0; + break; + } + case TRIM_A: { + final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA); + final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_); + keys_ = da.hashArr; + values_ = da.valuesArr; + thetaLong_ = minThetaLong; + empty_ = skA.isEmpty(); + count_ = da.count; + break; + } + case SKETCH_A: { + final ArrayOfDoublesCompactSketch csk = skA.compact(); + keys_ = csk.getKeys(); + values_ = csk.getValuesAsOneDimension(); + thetaLong_ = csk.thetaLong_; + empty_ = csk.isEmpty(); + count_ = csk.getRetainedEntries(); + break; + } + case FULL_ANOTB: { //both A and B should have valid entries. + final long[] keysA = skA.getKeys(); + final double[] valuesA = skA.getValuesAsOneDimension(); + final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB); + count_ = daR.count; + keys_ = (count_ == 0) ? null : daR.hashArr; + values_ = (count_ == 0) ? null : daR.valuesArr; + thetaLong_ = minThetaLong; + empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0); + break; + } + //default: not possible + } + } + + @Override + public ArrayOfDoublesCompactSketch getResult() { + return new HeapArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_); + } + + @Override + public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstSeg); + } + + private static DataArrays getResultArrays( + final long minThetaLong, + final int countA, + final long[] hashArrA, + final double[] valuesArrA, + final ArrayOfDoublesSketch skB) { + final int numValues = skB.numValues_; + + //create hashtable of skB + final long[] hashTableB = convertToHashTable(skB.getKeys(), skB.getRetainedEntries(), minThetaLong, + ThetaUtil.REBUILD_THRESHOLD); + + //build temporary arrays of skA + long[] tmpHashArrA = new long[countA]; + double[] tmpValuesArrA = new double[countA * numValues]; + + //search for non matches and build temp arrays + final int lgHTBLen = exactLog2OfLong(hashTableB.length); + int nonMatches = 0; + for (int i = 0; i < countA; i++) { + final long hash = hashArrA[i]; + if (continueCondition(minThetaLong, hash)) { continue; } + final int index = hashSearch(hashTableB, lgHTBLen, hash); + if (index == -1) { + tmpHashArrA[nonMatches] = hash; + System.arraycopy(valuesArrA, i * numValues, tmpValuesArrA, nonMatches * numValues, numValues); + nonMatches++; + } + } + tmpHashArrA = Arrays.copyOf(tmpHashArrA, nonMatches); + tmpValuesArrA = Arrays.copyOf(tmpValuesArrA, nonMatches * numValues); + final DataArrays daR = new DataArrays(tmpHashArrA, tmpValuesArrA, nonMatches); + return daR; + } + + private static class DataArrays { + long[] hashArr; + double[] valuesArr; + int count; + + DataArrays(final long[] hashArr, final double[] valuesArr, final int count) { + this.hashArr = hashArr; + this.valuesArr = valuesArr; + this.count = count; + } + } + + private static DataArrays trimDataArrays(final DataArrays da, final long thetaLong, final int numValues) { + final long[] hashArrIn = da.hashArr; + final double[] valuesArrIn = da.valuesArr; + final int count = count(hashArrIn, thetaLong); + final long[] hashArrOut = new long[count]; + final double[] valuesArrOut = new double[count * numValues]; + int haInIdx; + int vaInIdx = 0; + int haOutIdx = 0; + int vaOutIdx = 0; + for (haInIdx = 0; haInIdx < count; haInIdx++, vaInIdx += numValues) { + final long hash = hashArrIn[haInIdx]; + if (continueCondition(thetaLong, hash)) { continue; } + hashArrOut[haOutIdx] = hashArrIn[haInIdx]; + System.arraycopy(valuesArrIn, vaInIdx, valuesArrOut, vaOutIdx, numValues); + haOutIdx++; + vaOutIdx += numValues; + } + return new DataArrays(hashArrOut, valuesArrOut, count); + } + + private void reset() { + empty_ = true; + thetaLong_ = Long.MAX_VALUE; + keys_ = null; + values_ = null; + count_ = 0; + } +} + diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java new file mode 100644 index 000000000..2679debea --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +/** + * Combines two arrays of double values for use with ArrayOfDoubles tuple sketches + */ +public interface ArrayOfDoublesCombiner { + + /** + * Method of combining two arrays of double values + * @param a Array A. + * @param b Array B. + * @return Result of combining A and B + */ + public double[] combine(double[] a, double[] b); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java new file mode 100644 index 000000000..35e8cb15d --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +/** + * Top level compact tuple sketch of type ArrayOfDoubles. Compact sketches are never created + * directly. They are created as a result of the compact() method on a QuickSelectSketch + * or the getResult() method of a set operation like Union, Intersection or AnotB. + * Compact sketch consists of a compact list (i.e. no intervening spaces) of hash values, + * corresponding list of double values, and a value for theta. The lists may or may + * not be ordered. A compact sketch is read-only. + */ +public abstract class ArrayOfDoublesCompactSketch extends ArrayOfDoublesSketch { + + static final byte serialVersionUID = 1; + + // Layout of retained entries: + // Long || Start Byte Adr: + // Adr: + // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | + // 3 ||-----------------------------------|----------Retained Entries------------| + + static final int EMPTY_SIZE = 16; + static final int RETAINED_ENTRIES_INT = 16; + // 4 bytes of padding for 8 byte alignment + static final int ENTRIES_START = 24; + + ArrayOfDoublesCompactSketch(final int numValues) { + super(numValues); + } + + @Override + public int getCurrentBytes() { + final int count = getRetainedEntries(); + int sizeBytes = EMPTY_SIZE; + if (count > 0) { + sizeBytes = ENTRIES_START + (SIZE_OF_KEY_BYTES * count) + + (SIZE_OF_VALUE_BYTES * numValues_ * count); + } + return sizeBytes; + } + + @Override + public int getMaxBytes() { + return getCurrentBytes(); + } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java new file mode 100644 index 000000000..b2b26a30f --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.Math.min; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.tuple2.Util; + +/** + * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles. + * A new instance represents the Universal Set. + * Every update() computes an intersection with the internal set + * and can only reduce the internal set. + */ +public abstract class ArrayOfDoublesIntersection { + //not changed by resetToEmpty() or hardReset() + private final short seedHash_; + private final int numValues_; + //nulled or reset by resetToEmpty + private HashTables hashTables_; + private boolean empty_; + private boolean firstCall_; + private long thetaLong_; + + /** + * Internal constructor, called by HeapArrayOfDoublesIntersection and DirectArrayOfDoublesIntersection + * @param numValues the number of double values in the summary array + * @param seed the hash function update seed. + */ + ArrayOfDoublesIntersection(final int numValues, final long seed) { + seedHash_ = Util.computeSeedHash(seed); + numValues_ = numValues; + hashTables_ = null; + empty_ = false; + thetaLong_ = Long.MAX_VALUE; + firstCall_ = true; + } + + /** + * Performs a stateful intersection of the internal set with the given tupleSketch. + * The given tupleSketch and the internal state must have the same numValues. + * @param tupleSketch Input sketch to intersect with the internal set. + * @param combiner Method of combining two arrays of double values + */ + public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) { + if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } + Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); + if (tupleSketch.numValues_ != numValues_) { + throw new SketchesArgumentException( + "Input tupleSketch cannot have different numValues from the internal numValues."); + } + + final boolean isFirstCall = firstCall_; + firstCall_ = false; + + //could be first or next call + + final boolean emptyIn = tupleSketch.isEmpty(); + if (empty_ || emptyIn) { //empty rule + //Whatever the current internal state, we make our local empty. + resetToEmpty(); // + return; + } + + final long thetaLongIn = tupleSketch.getThetaLong(); + thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule + + if (tupleSketch.getRetainedEntries() == 0) { + if (hashTables_ != null) { + hashTables_.clear(); + } + } + // input sketch will have valid entries > 0 + + if (isFirstCall) { + //Copy first sketch data into local instance hashTables_ + hashTables_ = new HashTables(tupleSketch); + } + + //Next Call + else { + assert hashTables_ != null; + if (hashTables_.getNumKeys() == 0) { return; } + //process intersect with current hashTables + hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, combiner); + } + } + + /** + * Gets the internal set as an on-heap compact sketch. + * @return Result of the intersections so far as a compact sketch. + */ + public ArrayOfDoublesCompactSketch getResult() { + return getResult(null); + } + + /** + * Gets the result of stateful intersections so far. + * @param dstSeg MemorySegment for the compact sketch (can be null). + * @return Result of the intersections so far as a compact sketch. + */ + public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { + if (firstCall_) { + throw new SketchesStateException( + "getResult() with no intervening intersections is not a legal result."); + } + long[] hashArrOut = new long[0]; + double[] valuesArrOut = new double[0]; + if (hashTables_ != null && hashTables_.getHashTable() != null) { + final int numKeys = hashTables_.getNumKeys(); + + if (numKeys > 0) { + final int tableSize = hashTables_.getHashTable().length; + + hashArrOut = new long[numKeys]; + valuesArrOut = new double[numKeys * numValues_]; + + // & flatten the hash tables + int cnt = 0; + final long[] hashTable = hashTables_.getHashTable(); + final double[][] valueTable = hashTables_.getValueTable(); + for (int i = 0; i < tableSize; i++) { + final long hash = hashTable[i]; + if (hash == 0 || hash > thetaLong_) { continue; } + hashArrOut[cnt] = hash; + System.arraycopy(valueTable[i], 0, valuesArrOut, cnt * numValues_, numValues_); + cnt++; + } + assert cnt == numKeys; + } + } + + return (dstSeg == null) + ? new HeapArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut, + thetaLong_, empty_, numValues_, seedHash_) + : new DirectArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut, + thetaLong_, empty_, numValues_, seedHash_, dstSeg); + } + + /** + * Resets the internal set to the initial state, which represents the Universal Set + */ + public void reset() { + hardReset(); + } + + private void hardReset() { + empty_ = false; + firstCall_ = true; + thetaLong_ = Long.MAX_VALUE; + if (hashTables_ != null) { hashTables_.clear(); } + } + + private void resetToEmpty() { + empty_ = true; + firstCall_ = false; + thetaLong_ = Long.MAX_VALUE; + if (hashTables_ != null) { hashTables_.clear(); } + } + + protected abstract ArrayOfDoublesQuickSelectSketch createSketch(int nomEntries, int numValues, long seed); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java new file mode 100644 index 000000000..7c29d7141 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.apache.datasketches.common.Util.ceilingPowerOf2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.QuickSelect; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Top level class for hash table based implementations of tuple sketch of type + * ArrayOfDoubles that uses the QuickSelect algorithm. + */ +abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSketch { + + static final byte serialVersionUID = 1; + + // Layout of next 16 bytes: + // Long || Start Byte Adr: + // Adr: + // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | + // 3 ||-----------P (float)---------------|--------|--lgRF--|--lgArr-|---lgNom---| + // || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 | + // 4 ||-----------------------------------|----------Retained Entries------------| + + static final int LG_NOM_ENTRIES_BYTE = 16; + static final int LG_CUR_CAPACITY_BYTE = 17; + static final int LG_RESIZE_FACTOR_BYTE = 18; + // 1 byte of padding for alignment + static final int SAMPLING_P_FLOAT = 20; + static final int RETAINED_ENTRIES_INT = 24; + // 4 bytes of padding for alignment + static final int ENTRIES_START = 32; + + static final int DEFAULT_LG_RESIZE_FACTOR = 3; + + // these can be derived from other things, but are kept here for performance + int rebuildThreshold_; //absolute value relative to current capacity + int lgCurrentCapacity_; + + ArrayOfDoublesQuickSelectSketch(final int numValues, final long seed) { + super(numValues, seed); + } + + abstract void updateValues(int index, double[] values); + + abstract void setNotEmpty(); + + abstract boolean isInSamplingMode(); + + abstract void rebuild(int newCapacity); + + abstract long getKey(int index); + + abstract void setValues(int index, double[] values); + + abstract void incrementCount(); + + abstract void setThetaLong(long thetaLong); + + abstract int insertKey(long key); + + abstract int findOrInsertKey(long key); + + abstract double[] find(long key); + + abstract int getSerializedSizeBytes(); + + abstract void serializeInto(MemorySegment seg); + + @Override + public void trim() { + if (getRetainedEntries() > getNominalEntries()) { + setThetaLong(getNewThetaLong()); + rebuild(); + } + } + + @Override + public int getMaxBytes() { + final int nomEntries = getNominalEntries(); + final int numValues = getNumValues(); + return getMaxBytes(nomEntries, numValues); + } + + @Override + public int getCurrentBytes() { + return getSerializedSizeBytes(); + } + + /** + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to + * given value. + * @param numValues Number of double values to keep for each key + * @return maximum required storage bytes given nomEntries and numValues + */ + static int getMaxBytes(final int nomEntries, final int numValues) { + return ENTRIES_START + + (SIZE_OF_KEY_BYTES + SIZE_OF_VALUE_BYTES * numValues) * ceilingPowerOf2(nomEntries) * 2; + } + + // non-public methods below + + // this is a special back door insert for merging + // not sufficient by itself without keeping track of theta of another sketch + void merge(final long key, final double[] values) { + setNotEmpty(); + if (key < thetaLong_) { + final int index = findOrInsertKey(key); + if (index < 0) { + incrementCount(); + setValues(~index, values); + } else { + updateValues(index, values); + } + rebuildIfNeeded(); + } + } + + void rebuildIfNeeded() { + if (getRetainedEntries() <= rebuildThreshold_) { return; } + if (getCurrentCapacity() > getNominalEntries()) { + setThetaLong(getNewThetaLong()); + rebuild(); + } else { + rebuild(getCurrentCapacity() * getResizeFactor().getValue()); + } + } + + void rebuild() { + rebuild(getCurrentCapacity()); + } + + void insert(final long key, final double[] values) { + final int index = insertKey(key); + setValues(index, values); + incrementCount(); + } + + final void setRebuildThreshold() { + if (getCurrentCapacity() > getNominalEntries()) { + rebuildThreshold_ = (int) (getCurrentCapacity() * ThetaUtil.REBUILD_THRESHOLD); + } else { + rebuildThreshold_ = (int) (getCurrentCapacity() * ThetaUtil.RESIZE_THRESHOLD); + } + } + + @Override + void insertOrIgnore(final long key, final double[] values) { + if (values.length != getNumValues()) { + throw new SketchesArgumentException("input array of values must have " + getNumValues() + + " elements, but has " + values.length); + } + setNotEmpty(); + if ((key == 0) || (key >= thetaLong_)) { return; } + final int index = findOrInsertKey(key); + if (index < 0) { + incrementCount(); + setValues(~index, values); + } else { + updateValues(index, values); + } + rebuildIfNeeded(); + } + + long getNewThetaLong() { + final long[] keys = new long[getRetainedEntries()]; + int i = 0; + for (int j = 0; j < getCurrentCapacity(); j++) { + final long key = getKey(j); + if (key != 0) { keys[i++] = key; } + } + return QuickSelect.select(keys, 0, getRetainedEntries() - 1, getNominalEntries()); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java new file mode 100644 index 000000000..eaf486a15 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Builds set operations object for tuple sketches of type ArrayOfDoubles. + */ +public class ArrayOfDoublesSetOperationBuilder { + + private int nomEntries_; + private int numValues_; + private long seed_; + + /** + * Default Nominal Entries (a.k.a. K) + */ + public static final int DEFAULT_NOMINAL_ENTRIES = 4096; + + /** + * Default number of values + */ + public static final int DEFAULT_NUMBER_OF_VALUES = 1; + + /** + * Creates an instance of the builder with default parameters + */ + public ArrayOfDoublesSetOperationBuilder() { + nomEntries_ = DEFAULT_NOMINAL_ENTRIES; + numValues_ = DEFAULT_NUMBER_OF_VALUES; + seed_ = ThetaUtil.DEFAULT_UPDATE_SEED; + } + + /** + * This is to set the nominal number of entries. + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * given value. + * @return this builder + */ + public ArrayOfDoublesSetOperationBuilder setNominalEntries(final int nomEntries) { + nomEntries_ = nomEntries; + return this; + } + + /** + * This is to set the number of double values associated with each key + * @param numValues number of double values + * @return this builder + */ + public ArrayOfDoublesSetOperationBuilder setNumberOfValues(final int numValues) { + numValues_ = numValues; + return this; + } + + /** + * Sets the long seed value that is required by the hashing function. + * @param seed See seed + * @return this builder + */ + public ArrayOfDoublesSetOperationBuilder setSeed(final long seed) { + seed_ = seed; + return this; + } + + /** + * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder. + * The new instance is allocated on the heap if the memory is not provided. + * @return an instance of ArrayOfDoublesUnion + */ + public ArrayOfDoublesUnion buildUnion() { + return new HeapArrayOfDoublesUnion(nomEntries_, numValues_, seed_); + } + + /** + * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder + * and the given destination MemorySegment. + * @param dstSeg destination MemorySegment to be used by the sketch + * @return an instance of ArrayOfDoublesUnion + */ + public ArrayOfDoublesUnion buildUnion(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesUnion(nomEntries_, numValues_, seed_, dstSeg); + } + + /** + * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the + * builder. + * The new instance is allocated on the heap if the memory is not provided. + * The number of nominal entries is not relevant to this, so it is ignored. + * @return an instance of ArrayOfDoublesIntersection + */ + public ArrayOfDoublesIntersection buildIntersection() { + return new HeapArrayOfDoublesIntersection(numValues_, seed_); + } + + /** + * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the + * builder. + * The new instance is allocated on the heap if the MemorySegment is not provided. + * The number of nominal entries is not relevant to this, so it is ignored. + * @param dstSeg destination MemorySegment to be used by the sketch + * @return an instance of ArrayOfDoublesIntersection + */ + public ArrayOfDoublesIntersection buildIntersection(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesIntersection(numValues_, seed_, dstSeg); + } + + /** + * Creates an instance of ArrayOfDoublesAnotB based on the current configuration of the builder. + * The memory is not relevant to this, so it is ignored if set. + * The number of nominal entries is not relevant to this, so it is ignored. + * @return an instance of ArrayOfDoublesAnotB + */ + public ArrayOfDoublesAnotB buildAnotB() { + return new ArrayOfDoublesAnotBImpl(numValues_, seed_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java new file mode 100644 index 000000000..c4163fb1e --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.apache.datasketches.common.Util.LS; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.thetacommon.BinomialBoundsN; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.SerializerDeserializer; + +/** + * The base class for the tuple sketch of type ArrayOfDoubles, where an array of double values + * is associated with each key. + * A primitive array of doubles is used here, as opposed to a generic Summary object, + * for improved performance. + */ +public abstract class ArrayOfDoublesSketch { + + // The concept of being empty is about representing an empty set. + // So a sketch can be non-empty, and have no entries. + // For example, as a result of a sampling, when some data was presented to the sketch, but no + // entries were retained. + static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES } + + static final int SIZE_OF_KEY_BYTES = Long.BYTES; + static final int SIZE_OF_VALUE_BYTES = Double.BYTES; + + // Common Layout of first 16 bytes and Empty AoDCompactSketch: + // Long || Start Byte Adr: + // Adr: + // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + // 0 || Seed Hash | #Dbls | Flags | SkType | FamID | SerVer | Preamble_Longs | + // || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | + // 1 ||-------------------------Theta Long------------------------------------------------| + + static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1 + static final int SERIAL_VERSION_BYTE = 1; + static final int FAMILY_ID_BYTE = 2; + static final int SKETCH_TYPE_BYTE = 3; + static final int FLAGS_BYTE = 4; + static final int NUM_VALUES_BYTE = 5; + static final int SEED_HASH_SHORT = 6; + static final int THETA_LONG = 8; + + final int numValues_; + + long thetaLong_; + boolean isEmpty_ = true; + + ArrayOfDoublesSketch(final int numValues) { + numValues_ = numValues; + } + + /** + * Heapify the given MemorySegment as an ArrayOfDoublesSketch + * @param seg the given MemorySegment + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch heapify(final MemorySegment seg) { + return heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch + * @param seg the given MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch heapify(final MemorySegment seg, final long seed) { + final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); + if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) { + return new HeapArrayOfDoublesQuickSelectSketch(seg, seed); + } + return new HeapArrayOfDoublesCompactSketch(seg, seed); + } + + /** + * Wrap the given MemorySegment as an ArrayOfDoublesSketch + * @param seg the given MemorySegment + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch wrap(final MemorySegment seg) { + return wrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch + * @param seg the given MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch wrap(final MemorySegment seg, final long seed) { + final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); + if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) { + return new DirectArrayOfDoublesQuickSelectSketchR(seg, seed); + } + return new DirectArrayOfDoublesCompactSketch(seg, seed); + } + + /** + * Estimates the cardinality of the set (number of unique values presented to the sketch) + * @return best estimate of the number of unique values + */ + public double getEstimate() { + if (!isEstimationMode()) { return getRetainedEntries(); } + return getRetainedEntries() / getTheta(); + } + + /** + * Gets the approximate upper error bound given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @return the upper bound. + */ + public double getUpperBound(final int numStdDev) { + if (!isEstimationMode()) { return getRetainedEntries(); } + return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); + } + + /** + * Gets the approximate lower error bound given the specified number of Standard Deviations. + * This will return getEstimate() if isEmpty() is true. + * + * @param numStdDev + * See Number of Standard Deviations + * @return the lower bound. + */ + public double getLowerBound(final int numStdDev) { + if (!isEstimationMode()) { return getRetainedEntries(); } + return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); + } + + /** + * Returns true if this sketch's data structure is backed by MemorySegment. + * @return true if this sketch's data structure is backed by MemorySegment. + */ + public abstract boolean hasMemorySegment(); + + /** + * Returns the MemorySegment object if it exists, otherwise null. + * @return the MemorySegment object if it exists, otherwise null. + */ + abstract MemorySegment getMemorySegment(); + + /** + * See Empty + * @return true if empty. + */ + public boolean isEmpty() { + return isEmpty_; + } + + /** + * Returns number of double values associated with each key + * @return number of double values associated with each key + */ + public int getNumValues() { + return numValues_; + } + + /** + * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode). + * This is true if theta < 1.0 AND isEmpty() is false. + * @return true if the sketch is in estimation mode. + */ + public boolean isEstimationMode() { + return ((thetaLong_ < Long.MAX_VALUE) && !isEmpty()); + } + + /** + * Gets the value of theta as a double between zero and one + * @return the value of theta as a double + */ + public double getTheta() { + return getThetaLong() / (double) Long.MAX_VALUE; + } + + /** + * Returns number of retained entries + * @return number of retained entries + */ + public abstract int getRetainedEntries(); + + /** + * Returns the maximum number of bytes for this sketch when serialized. + * @return the maximum number of bytes for this sketch when serialized. + */ + public abstract int getMaxBytes(); + + /** + * For compact sketches this is the same as getMaxBytes(). + * @return the current number of bytes for this sketch when serialized. + */ + public abstract int getCurrentBytes(); + + /** + * Returns serialized representation of the sketch + * @return serialized representation of the sketch + */ + public abstract byte[] toByteArray(); + + /** + * Returns array of arrays of double values in the sketch + * @return array of arrays of double values in the sketch + */ + public abstract double[][] getValues(); + + abstract double[] getValuesAsOneDimension(); + + abstract long[] getKeys(); + + /** + * Returns the value of theta as a long + * @return the value of theta as a long + */ + long getThetaLong() { + return isEmpty() ? Long.MAX_VALUE : thetaLong_; + } + + abstract short getSeedHash(); + + /** + * Returns an iterator over the sketch + * @return an iterator over the sketch + */ + public abstract ArrayOfDoublesSketchIterator iterator(); + + /** + * Returns this sketch in compact form, which is immutable. + * @return this sketch in compact form, which is immutable. + */ + public ArrayOfDoublesCompactSketch compact() { + return compact(null); + } + + /** + * Returns this sketch in compact form, which is immutable. + * @param dstSeg the destination MemorySegment + * @return this sketch in compact form, which is immutable. + */ + public abstract ArrayOfDoublesCompactSketch compact(MemorySegment dstSeg); + + @Override + public String toString() { + final int seedHash = Short.toUnsignedInt(getSeedHash()); + final StringBuilder sb = new StringBuilder(); + sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); + sb.append(" Estimate : ").append(getEstimate()).append(LS); + sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); + sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); + sb.append(" Theta (double) : ").append(getTheta()).append(LS); + sb.append(" Theta (long) : ").append(getThetaLong()).append(LS); + sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); + sb.append(" Empty? : ").append(isEmpty()).append(LS); + sb.append(" Retained Entries : ").append(getRetainedEntries()).append(LS); + if (this instanceof ArrayOfDoublesUpdatableSketch) { + final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this; + sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); + sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); + sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); + sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); + } + sb.append(" Seed Hash : ") + .append(Integer.toHexString(seedHash)).append(" | ").append(seedHash).append(LS); + sb.append("### END SKETCH SUMMARY").append(LS); + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java new file mode 100644 index 000000000..71ed63216 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +/** + * Interface for iterating over tuple sketches of type ArrayOfDoubles + */ +public interface ArrayOfDoublesSketchIterator { + /** + * Advancing the iterator and checking existence of the next entry + * is combined here for efficiency. This results in an undefined + * state of the iterator before the first call of this method. + * @return true if the next element exists + */ + public boolean next(); + + /** + * Gets a key from the current entry in the sketch, which is a hash + * of the original key passed to update(). The original keys are not + * retained. Don't call this before calling next() for the first time + * or after getting false from next(). + * @return hash key from the current entry + */ + public long getKey(); + + /** + * Gets an array of values from the current entry in the sketch. + * Don't call this before calling next() for the first time + * or after getting false from next(). + * @return array of double values for the current entry (may or may not be a copy) + */ + public double[] getValues(); +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java new file mode 100644 index 000000000..36421e14d --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles. + */ +public final class ArrayOfDoublesSketches { + + /** + * Heapify the given MemorySegment as an ArrayOfDoublesSketch + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg) { + return heapifySketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesSketch.heapify(srcSeg, seed); + } + + /** + * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg) { + return heapifyUpdatableSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUpdatableSketch.heapify(srcSeg, seed); + } + + /** + * Wrap the given MemorySegment as an ArrayOfDoublesSketch + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg) { + return wrapSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesSketch + */ + public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesSketch.wrap(srcSeg, seed); + } + + /** + * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg) { + return wrapUpdatableSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUpdatableSketch.wrap(srcSeg, seed); + } + + /** + * Heapify the given MemorySegment as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg) { + return heapifyUnion(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUnion.heapify(srcSeg, seed); + } + + /** + * Wrap the given MemorySegment as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg) { + return wrapUnion(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUnion.wrap(srcSeg, seed); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java new file mode 100644 index 000000000..a097ccf47 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.Math.min; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.SerializerDeserializer; +import org.apache.datasketches.tuple2.Util; + +/** + * The base class for unions of tuple sketches of type ArrayOfDoubles. + */ +public abstract class ArrayOfDoublesUnion { + + static final byte serialVersionUID = 1; + //For layout see toByteArray() + static final int PREAMBLE_SIZE_BYTES = 16; + static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1 + static final int SERIAL_VERSION_BYTE = 1; + static final int FAMILY_ID_BYTE = 2; + static final int SKETCH_TYPE_BYTE = 3; + static final int FLAGS_BYTE = 4; + static final int NUM_VALUES_BYTE = 5; + static final int SEED_HASH_SHORT = 6; + static final int THETA_LONG = 8; + + ArrayOfDoublesQuickSelectSketch gadget_; + long unionThetaLong_; + + /** + * Constructs this Union initializing it with the given sketch, which can be on-heap or off-heap. + * @param sketch the given sketch. + */ + ArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch sketch) { + gadget_ = sketch; + unionThetaLong_ = sketch.getThetaLong(); + } + + /** + * Heapify the given MemorySegment as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg, final long seed) { + return HeapArrayOfDoublesUnion.heapifyUnion(srcSeg, seed); + } + + /** + * Wrap the given MemorySegment as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUnion + */ + public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long seed) { + return DirectArrayOfDoublesUnion.wrapUnion(srcSeg, seed, !srcSeg.isReadOnly()); + } + + /** + * Updates the union by adding a set of entries from a given sketch, which can be on-heap or off-heap. + * Both the given tupleSketch and the internal state of the Union must have the same numValues. + * + *

Nulls and empty sketches are ignored.

+ * + * @param tupleSketch sketch to add to the union + */ + public void union(final ArrayOfDoublesSketch tupleSketch) { + if (tupleSketch == null) { return; } + Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); + if (gadget_.getNumValues() != tupleSketch.getNumValues()) { + throw new SketchesArgumentException("Incompatible sketches: number of values mismatch " + + gadget_.getNumValues() + " and " + tupleSketch.getNumValues()); + } + + if (tupleSketch.isEmpty()) { return; } + else { gadget_.setNotEmpty(); } + + setUnionThetaLong(min(min(unionThetaLong_, tupleSketch.getThetaLong()), gadget_.getThetaLong())); + + if (tupleSketch.getRetainedEntries() == 0) { return; } + final ArrayOfDoublesSketchIterator it = tupleSketch.iterator(); + while (it.next()) { + if (it.getKey() < unionThetaLong_) { + gadget_.merge(it.getKey(), it.getValues()); + } + } + // keep the union theta as low as possible for performance + if (gadget_.getThetaLong() < unionThetaLong_) { + setUnionThetaLong(gadget_.getThetaLong()); + } + } + + /** + * Returns the resulting union in the form of a compact sketch + * @param dstSeg MemorySegment for the result (can be null) + * @return compact sketch representing the union (off-heap if MemorySegment is provided) + */ + public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { + long unionThetaLong = unionThetaLong_; + if (gadget_.getRetainedEntries() > gadget_.getNominalEntries()) { + unionThetaLong = Math.min(unionThetaLong, gadget_.getNewThetaLong()); + } + if (dstSeg == null) { + return new HeapArrayOfDoublesCompactSketch(gadget_, unionThetaLong); + } + return new DirectArrayOfDoublesCompactSketch(gadget_, unionThetaLong, dstSeg); + } + + /** + * Returns the resulting union in the form of a compact sketch + * @return on-heap compact sketch representing the union + */ + public ArrayOfDoublesCompactSketch getResult() { + return getResult(null); + } + + /** + * Resets the union to an empty state + */ + public void reset() { + gadget_.reset(); + setUnionThetaLong(gadget_.getThetaLong()); + } + + // Layout of first 16 bytes: + // Long || Start Byte Adr: + // Adr: + // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + // 0 || Seed Hash=0 | #Dbls=0|Flags=0 | SkType | FamID | SerVer | Preamble_Longs | + // || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | + // 1 ||---------------------------Union Theta Long-----------------------------------------| + /** + * Returns a byte array representation of this object + * @return a byte array representation of this object + */ + public byte[] toByteArray() { + final int sizeBytes = PREAMBLE_SIZE_BYTES + gadget_.getSerializedSizeBytes(); + final byte[] byteArray = new byte[sizeBytes]; + final MemorySegment seg = MemorySegment.ofArray(byteArray); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 + seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); + //byte 4-7 automatically zero + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, unionThetaLong_); + gadget_.serializeInto(seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES)); + return byteArray; + } + + /** + * Returns maximum required storage bytes given nomEntries and numValues + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to + * given value. + * @param numValues Number of double values to keep for each key + * @return maximum required storage bytes given nomEntries and numValues + */ + public static int getMaxBytes(final int nomEntries, final int numValues) { + return ArrayOfDoublesQuickSelectSketch.getMaxBytes(nomEntries, numValues) + PREAMBLE_SIZE_BYTES; + } + + void setUnionThetaLong(final long thetaLong) { + unionThetaLong_ = thetaLong; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java new file mode 100644 index 000000000..c61e8944d --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.hash.MurmurHash3; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.Util; + +/** + * The top level for updatable tuple sketches of type ArrayOfDoubles. + */ +public abstract class ArrayOfDoublesUpdatableSketch extends ArrayOfDoublesSketch { + + final long seed_; + + ArrayOfDoublesUpdatableSketch(final int numValues, final long seed) { + super(numValues); + seed_ = seed; + } + + /** + * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * @param seg the given MemorySegment + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg) { + return heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * @param seg the given MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg, final long seed) { + return new HeapArrayOfDoublesQuickSelectSketch(seg, seed); + } + + /** + * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * @param seg the given MemorySegment + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg) { + return wrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * @param seg the given MemorySegment + * @param seed the given seed + * @return an ArrayOfDoublesUpdatableSketch + */ + public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg, final long seed) { + return new DirectArrayOfDoublesQuickSelectSketch(seg, seed); + } + + /** + * Updates this sketch with a long key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given long key + * @param values The given values + */ + public void update(final long key, final double[] values) { + update(new long[] {key}, values); + } + + /** + * Updates this sketch with a double key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given double key + * @param values The given values + */ + public void update(final double key, final double[] values) { + update(Util.doubleToLongArray(key), values); + } + + /** + * Updates this sketch with a String key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given String key + * @param values The given values + */ + public void update(final String key, final double[] values) { + update(Util.stringToByteArray(key), values); + } + + /** + * Updates this sketch with a byte[] key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given byte[] key + * @param values The given values + */ + public void update(final byte[] key, final double[] values) { + if (key == null || key.length == 0) { return; } + insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); + } + + /** + * Updates this sketch with a ByteBuffer key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given ByteBuffer key + * @param values The given values + */ + public void update(final ByteBuffer key, final double[] values) { + if (key == null || key.hasRemaining() == false) { return; } + insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); + } + + /** + * Updates this sketch with a int[] key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given int[] key + * @param values The given values + */ + public void update(final int[] key, final double[] values) { + if (key == null || key.length == 0) { return; } + insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); + } + + /** + * Updates this sketch with a long[] key and double values. + * The values will be stored or added to the ones associated with the key + * + * @param key The given long[] key + * @param values The given values + */ + public void update(final long[] key, final double[] values) { + if (key == null || key.length == 0) { return; } + insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); + } + + /** + * Gets the configured nominal number of entries + * @return nominal number of entries + */ + public abstract int getNominalEntries(); + + /** + * Gets the configured resize factor + * @return resize factor + */ + public abstract ResizeFactor getResizeFactor(); + + /** + * Gets the configured sampling probability + * @return sampling probability + */ + public abstract float getSamplingProbability(); + + /** + * Rebuilds reducing the actual number of entries to the nominal number of entries if needed + */ + public abstract void trim(); + + /** + * Resets this sketch an empty state. + */ + public abstract void reset(); + + /** + * Gets an on-heap compact representation of the sketch + * @return compact sketch + */ + @Override + public ArrayOfDoublesCompactSketch compact() { + return compact(null); + } + + /** + * Gets an off-heap compact representation of the sketch using the given MemorySegment + * @param dstSeg MemorySegment for the compact sketch (can be null) + * @return compact sketch (off-heap if MemorySegment is provided) + */ + @Override + public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { + if (dstSeg == null) { + return new HeapArrayOfDoublesCompactSketch(this); + } + return new DirectArrayOfDoublesCompactSketch(this, dstSeg); + } + + abstract int getCurrentCapacity(); + + long getSeed() { + return seed_; + } + + @Override + short getSeedHash() { + return Util.computeSeedHash(seed_); + } + + /** + * Insert if key is less than thetaLong and not a duplicate, otherwise ignore. + * @param key the hash value of the input value + * @param values array of values to update the summary + */ + abstract void insertOrIgnore(long key, double[] values); + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java new file mode 100644 index 000000000..a6fa5e118 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; + +/** + * For building a new ArrayOfDoublesUpdatableSketch + */ +public class ArrayOfDoublesUpdatableSketchBuilder { + + private int nomEntries_; + private ResizeFactor resizeFactor_; + private int numValues_; + private float samplingProbability_; + private long seed_; + + private static final int DEFAULT_NUMBER_OF_VALUES = 1; + private static final float DEFAULT_SAMPLING_PROBABILITY = 1; + private static final ResizeFactor DEFAULT_RESIZE_FACTOR = ResizeFactor.X8; + + /** + * Creates an instance of builder with default parameters + */ + public ArrayOfDoublesUpdatableSketchBuilder() { + nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES; + resizeFactor_ = DEFAULT_RESIZE_FACTOR; + numValues_ = DEFAULT_NUMBER_OF_VALUES; + samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; + seed_ = ThetaUtil.DEFAULT_UPDATE_SEED; + } + + /** + * This is to set the nominal number of entries. + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * or equal to given value. + * @return this builder + */ + public ArrayOfDoublesUpdatableSketchBuilder setNominalEntries(final int nomEntries) { + nomEntries_ = 1 << ThetaUtil.checkNomLongs(nomEntries); + return this; + } + + /** + * This is to set the resize factor. + * Value of X1 means that the maximum capacity is allocated from the start. + * Default resize factor is X8. + * @param resizeFactor value of X1, X2, X4 or X8 + * @return this UpdatableSketchBuilder + */ + public ArrayOfDoublesUpdatableSketchBuilder setResizeFactor(final ResizeFactor resizeFactor) { + resizeFactor_ = resizeFactor; + return this; + } + + /** + * This is to set sampling probability. + * Default probability is 1. + * @param samplingProbability sampling probability from 0 to 1 + * @return this builder + */ + public ArrayOfDoublesUpdatableSketchBuilder + setSamplingProbability(final float samplingProbability) { + if ((samplingProbability < 0) || (samplingProbability > 1f)) { + throw new SketchesArgumentException("sampling probability must be between 0 and 1"); + } + samplingProbability_ = samplingProbability; + return this; + } + + /** + * This is to set the number of double values associated with each key + * @param numValues number of double values + * @return this builder + */ + public ArrayOfDoublesUpdatableSketchBuilder setNumberOfValues(final int numValues) { + numValues_ = numValues; + return this; + } + + /** + * Sets the long seed value that is required by the hashing function. + * @param seed See seed + * @return this builder + */ + public ArrayOfDoublesUpdatableSketchBuilder setSeed(final long seed) { + seed_ = seed; + return this; + } + + /** + * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder. + * @return an ArrayOfDoublesUpdatableSketch + */ + public ArrayOfDoublesUpdatableSketch build() { + return new HeapArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(), + samplingProbability_, numValues_, seed_); + } + + /** + * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder. + * @param dstSeg instance of MemorySegment to be used by the sketch + * @return an ArrayOfDoublesUpdatableSketch + */ + public ArrayOfDoublesUpdatableSketch build(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(), + samplingProbability_, numValues_, seed_, dstSeg); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java new file mode 100644 index 000000000..727c9dccf --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.SerializerDeserializer; +import org.apache.datasketches.tuple2.Util; + +/** + * Direct Compact Sketch of type ArrayOfDoubles. + * + *

This implementation uses data in a given Memory that is owned and managed by the caller. + * This Memory can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ */ +final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch { + + // this value exists only on heap, never serialized + private MemorySegment seg_; + + /** + * Converts the given UpdatableArrayOfDoublesSketch to this compact form. + * @param sketch the given UpdatableArrayOfDoublesSketch + * @param dstSeg the given destination MemorySegment. + */ + DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, + final MemorySegment dstSeg) { + this(sketch, sketch.getThetaLong(), dstSeg); + } + + /** + * Converts the given UpdatableArrayOfDoublesSketch to this compact form + * trimming if necessary according to given theta + * @param sketch the given UpdatableArrayOfDoublesSketch + * @param thetaLong new value of thetaLong + * @param dstSeg the given destination MemorySegment. + */ + DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, + final long thetaLong, final MemorySegment dstSeg) { + super(sketch.getNumValues()); + checkIfEnoughMemory(dstSeg, sketch.getRetainedEntries(), sketch.getNumValues()); + seg_ = dstSeg; + dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) + SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); + final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + isEmpty_ = sketch.isEmpty(); + final int count = sketch.getRetainedEntries(); + dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( + (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) + | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) + | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) + )); + dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed())); + thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); + dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + if (count > 0) { + int keyOffset = ENTRIES_START; + int valuesOffset = keyOffset + (SIZE_OF_KEY_BYTES * sketch.getRetainedEntries()); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + int actualCount = 0; + while (it.next()) { + if (it.getKey() < thetaLong_) { + dstSeg.set(JAVA_LONG_UNALIGNED, keyOffset, it.getKey()); + MemorySegment.copy(it.getValues(), 0, dstSeg, JAVA_DOUBLE_UNALIGNED, valuesOffset, numValues_); + keyOffset += SIZE_OF_KEY_BYTES; + valuesOffset += SIZE_OF_VALUE_BYTES * numValues_; + actualCount++; + } + } + dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, actualCount); + } + } + + /* + * Creates an instance from components + */ + DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong, + final boolean isEmpty, final int numValues, final short seedHash, final MemorySegment dstSeg) { + super(numValues); + checkIfEnoughMemory(dstSeg, values.length, numValues); + seg_ = dstSeg; + dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) + SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); + final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + isEmpty_ = isEmpty; + final int count = keys.length; + dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( + (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) + | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) + | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) + )); + dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash); + thetaLong_ = thetaLong; + dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + if (count > 0) { + dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count); + MemorySegment.copy(keys, 0, dstSeg, JAVA_LONG_UNALIGNED, ENTRIES_START, count); + MemorySegment.copy(values, 0, dstSeg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values.length); + } + } + + /** + * Wraps the given MemorySegment. + * @param seg the given MemorySegment + */ + DirectArrayOfDoublesCompactSketch(final MemorySegment seg) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); + seg_ = seg; + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); + final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE); + if (version != serialVersionUID) { + throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + + ", actual: " + version); + } + final boolean isBigEndian = + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Byte order mismatch"); + } + + isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); + } + + /** + * Wraps the given MemorySegment. + * @param seg the given MemorySegment. + * @param seed See seed + */ + DirectArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); + seg_ = seg; + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); + final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE); + if (version != serialVersionUID) { + throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + + ", actual: " + version); + } + final boolean isBigEndian = + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Byte order mismatch"); + } + Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); + } + + @Override + public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { + if (dstSeg == null) { + return new + HeapArrayOfDoublesCompactSketch(getKeys(), getValuesAsOneDimension(), thetaLong_, isEmpty_, numValues_, + getSeedHash()); + } else { + MemorySegment.copy(seg_, 0, dstSeg, 0, seg_.byteSize()); + return new DirectArrayOfDoublesCompactSketch(dstSeg); + } + } + + @Override + public int getRetainedEntries() { + final boolean hasEntries = + (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; + return (hasEntries ? seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0); + } + + @Override + //converts compact MemorySegment array of double[] to compact double[][] + public double[][] getValues() { + final int count = getRetainedEntries(); + final double[][] values = new double[count][]; + if (count > 0) { + int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count); + for (int i = 0; i < count; i++) { + final double[] array = new double[numValues_]; + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_); + values[i] = array; + valuesOffset += SIZE_OF_VALUE_BYTES * numValues_; + } + } + return values; + } + + @Override + //converts compact MemorySegment array of double[] to compact double[] + double[] getValuesAsOneDimension() { + final int count = getRetainedEntries(); + final int numDoubles = count * numValues_; + final double[] values = new double[numDoubles]; + if (count > 0) { + final int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, values, 0, numDoubles); + } + return values; + } + + @Override + //converts compact Memory array of long[] to compact long[] + long[] getKeys() { + final int count = getRetainedEntries(); + final long[] keys = new long[count]; + if (count > 0) { + for (int i = 0; i < count; i++) { + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, ENTRIES_START, keys, 0, count); + } + } + return keys; + } + + @Override + public byte[] toByteArray() { + final int sizeBytes = getCurrentBytes(); + final byte[] byteArray = new byte[sizeBytes]; + final MemorySegment seg = MemorySegment.ofArray(byteArray); + MemorySegment.copy(seg_, 0, seg, 0, sizeBytes); + return byteArray; + } + + @Override + public ArrayOfDoublesSketchIterator iterator() { + return new DirectArrayOfDoublesSketchIterator( + seg_, ENTRIES_START, getRetainedEntries(), numValues_); + } + + @Override + short getSeedHash() { + return seg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); + } + + @Override + public boolean hasMemorySegment() { return true; } + + @Override + MemorySegment getMemorySegment() { return seg_; } + + private static void checkIfEnoughMemory(final MemorySegment seg, final int numEntries, + final int numValues) { + final int sizeNeeded = + ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); + if (sizeNeeded > seg.byteSize()) { + throw new SketchesArgumentException("Not enough memory: need " + sizeNeeded + + " bytes, got " + seg.byteSize() + " bytes"); + } + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java new file mode 100644 index 000000000..c5771046e --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +/** + * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. + * + *

This implementation uses data in a given Memory that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ */ +final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { + + private MemorySegment seg_; + + /** + * Creates an instance of a DirectArrayOfDoublesIntersection with a custom update seed + * @param numValues number of double values associated with each key + * @param seed See seed + * @param dstSeg the destination MemorySegment + */ + DirectArrayOfDoublesIntersection(final int numValues, final long seed, final MemorySegment dstSeg) { + super(numValues, seed); + seg_ = dstSeg; + } + + @Override + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, + final long seed) { + return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, seg_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java new file mode 100644 index 000000000..249723323 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -0,0 +1,433 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.common.Util.clear; +import static org.apache.datasketches.common.Util.clearBits; +import static org.apache.datasketches.common.Util.setBits; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.tuple2.SerializerDeserializer; +import org.apache.datasketches.tuple2.Util; + +/** + * Direct QuickSelect tuple sketch of type ArrayOfDoubles. + * + *

This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ */ +class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch { + + // these values exist only on heap, never serialized + private MemorySegment seg_; + // these can be derived from the seg_ contents, but are kept here for performance + private int keysOffset_; + private int valuesOffset_; + + /** + * Construct a new sketch using the given MemorySegment as its backing store. + * + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * given value. + * @param lgResizeFactor log2(resize factor) - value from 0 to 3: + * 0 - no resizing (max size allocated), + * 1 - double internal hash table each time it reaches a threshold + * 2 - grow four times + * 3 - grow eight times (default) + * @param samplingProbability + * See Sampling Probability + * @param numValues Number of double values to keep for each key. + * @param seed See seed + * @param dstSeg the destination MemorySegment. + */ + DirectArrayOfDoublesQuickSelectSketch( + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final int numValues, + final long seed, + final MemorySegment dstSeg) { + this(checkMemory(nomEntries, lgResizeFactor, numValues, dstSeg), + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + nomEntries, + lgResizeFactor, + samplingProbability, + numValues, + seed, + dstSeg); + } + + private DirectArrayOfDoublesQuickSelectSketch( + final boolean secure, //required part of Finalizer Attack prevention + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final int numValues, + final long seed, + final MemorySegment dstSeg) { + super(numValues, seed); + seg_ = dstSeg; + final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); + seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) + SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal()); + final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + seg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( + (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) + | (samplingProbability < 1f ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) + | (1 << Flags.IS_EMPTY.ordinal()) + )); + seg_.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues); + seg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(seed)); + thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + seg_.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries)); + seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); + seg_.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor); + seg_.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + keysOffset_ = ENTRIES_START; + valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity); + clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); + setRebuildThreshold(); + } + + private static final boolean checkMemory( + final int nomEntries, + final int lgResizeFactor, + final int numValues, + final MemorySegment dstSeg) { + final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); + checkIfEnoughMemory(dstSeg, startingCapacity, numValues); + return true; + } + + /** + * Wraps the given MemorySegment. + * @param seg the given MemorySegment + * @param seed update seed + */ + DirectArrayOfDoublesQuickSelectSketch( + final MemorySegment seg, + final long seed) { + this(checkSerVer_Endianness(seg), seg, seed); + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + } + + private DirectArrayOfDoublesQuickSelectSketch( + final boolean secure, //required part of Finalizer Attack prevention + final MemorySegment seg, + final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed); + seg_ = seg; + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); + + Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + keysOffset_ = ENTRIES_START; + valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); + // to do: make parent take care of its own parts + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); + thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); + isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + setRebuildThreshold(); + } + + private static final boolean checkSerVer_Endianness(final MemorySegment seg) { + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); + if (version != serialVersionUID) { + throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + + ", actual: " + version); + } + final boolean isBigEndian = + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Byte order mismatch"); + } + return true; + } + + @Override + //converts Memory hashTable of double[] to compacted double[][] + public double[][] getValues() { + final int count = getRetainedEntries(); + final double[][] values = new double[count][]; + if (count > 0) { + long keyOffset = keysOffset_; + long valuesOffset = valuesOffset_; + int cnt = 0; + for (int j = 0; j < getCurrentCapacity(); j++) { + if (seg_.get(JAVA_LONG_UNALIGNED, keyOffset) != 0) { + final double[] array = new double[numValues_]; + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_); + values[cnt++] = array; + } + keyOffset += SIZE_OF_KEY_BYTES; + valuesOffset += (long)SIZE_OF_VALUE_BYTES * numValues_; + } + } + return values; + } + + @Override + //converts heap hashTable of double[] to compacted double[] + double[] getValuesAsOneDimension() { + final int count = getRetainedEntries(); + final double[] values = new double[count * numValues_]; + final int cap = getCurrentCapacity(); + if (count > 0) { + long keyOffsetBytes = keysOffset_; + long valuesOffsetBytes = valuesOffset_; + int cnt = 0; + for (int j = 0; j < cap; j++) { + if (seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes) != 0) { + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffsetBytes, values, cnt++ * numValues_, numValues_); + } + keyOffsetBytes += SIZE_OF_KEY_BYTES; + valuesOffsetBytes += (long)SIZE_OF_VALUE_BYTES * numValues_; + } + assert cnt == count; + } + return values; + } + + @Override + //converts heap hashTable of long[] to compacted long[] + long[] getKeys() { + final int count = getRetainedEntries(); + final long[] keys = new long[count]; + final int cap = getCurrentCapacity(); + if (count > 0) { + long keyOffsetBytes = keysOffset_; + int cnt = 0; + for (int j = 0; j < cap; j++) { + final long key; + if ((key = seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes)) != 0) { + keys[cnt++] = key; + } + keyOffsetBytes += SIZE_OF_KEY_BYTES; + } + assert cnt == count; + } + return keys; + } + + @Override + public int getRetainedEntries() { + return seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); + } + + @Override + public int getNominalEntries() { + return 1 << seg_.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); + } + + @Override + public ResizeFactor getResizeFactor() { + return ResizeFactor.getRF(seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE)); + } + + @Override + public float getSamplingProbability() { + return seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); + } + + @Override + public byte[] toByteArray() { + final int sizeBytes = getSerializedSizeBytes(); + final byte[] byteArray = new byte[sizeBytes]; + final MemorySegment seg = MemorySegment.ofArray(byteArray); + serializeInto(seg); + return byteArray; + } + + @Override + public ArrayOfDoublesSketchIterator iterator() { + return new DirectArrayOfDoublesSketchIterator(seg_, keysOffset_, getCurrentCapacity(), numValues_); + } + + @Override + public boolean hasMemorySegment() { return true; } + + @Override + MemorySegment getMemorySegment() { return seg_; } + + @Override + int getSerializedSizeBytes() { + return valuesOffset_ + (SIZE_OF_VALUE_BYTES * numValues_ * getCurrentCapacity()); + } + + @Override + void serializeInto(final MemorySegment seg) { + MemorySegment.copy(seg_, 0, seg, 0, seg.byteSize()); + } + + @Override + public void reset() { + if (!isEmpty_) { + isEmpty_ = true; + setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); + } + final int lgResizeFactor = seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE); + final float samplingProbability = seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); + final int startingCapacity = Util.getStartingCapacity(getNominalEntries(), lgResizeFactor); + thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + keysOffset_ = ENTRIES_START; + valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity); + clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); + setRebuildThreshold(); + } + + @Override + protected long getKey(final int index) { + return seg_.get(JAVA_LONG_UNALIGNED, keysOffset_ + ((long) SIZE_OF_KEY_BYTES * index)); + } + + @Override + protected void incrementCount() { + final int count = seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); + if (count == 0) { + setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.HAS_ENTRIES.ordinal())); + } + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count + 1); + } + + @Override + protected final int getCurrentCapacity() { + return 1 << seg_.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE); + } + + @Override + protected void setThetaLong(final long thetaLong) { + thetaLong_ = thetaLong; + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + } + + @Override + protected void setValues(final int index, final double[] values) { + long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index); + for (int i = 0; i < numValues_; i++) { + seg_.set(JAVA_DOUBLE_UNALIGNED, offset, values[i]); + offset += SIZE_OF_VALUE_BYTES; + } + } + + @Override + protected void updateValues(final int index, final double[] values) { + long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index); + for (int i = 0; i < numValues_; i++) { + seg_.set(JAVA_DOUBLE_UNALIGNED, offset, seg_.get(JAVA_DOUBLE_UNALIGNED, offset) + values[i]); + offset += SIZE_OF_VALUE_BYTES; + } + } + + @Override + protected void setNotEmpty() { + if (isEmpty_) { + isEmpty_ = false; + clearBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); + + } + } + + @Override + protected boolean isInSamplingMode() { + return (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) != 0; + } + + // rebuild in the same memory + @Override + protected void rebuild(final int newCapacity) { + final int numValues = getNumValues(); + checkIfEnoughMemory(seg_, newCapacity, numValues); + final int currCapacity = getCurrentCapacity(); + final long[] keys = new long[currCapacity]; + final double[] values = new double[currCapacity * numValues]; + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, keysOffset_, keys, 0, currCapacity); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_, values, 0, currCapacity * numValues); + + clear(seg_, keysOffset_, ((long) SIZE_OF_KEY_BYTES * newCapacity) + ((long) SIZE_OF_VALUE_BYTES * newCapacity * numValues)); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte)Integer.numberOfTrailingZeros(newCapacity)); + valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * newCapacity); + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity); + for (int i = 0; i < keys.length; i++) { + if ((keys[i] != 0) && (keys[i] < thetaLong_)) { + insert(keys[i], Arrays.copyOfRange(values, i * numValues, (i + 1) * numValues)); + } + } + setRebuildThreshold(); + } + + @Override + protected int insertKey(final long key) { + return HashOperations.hashInsertOnlyMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START); + } + + @Override + protected int findOrInsertKey(final long key) { + return HashOperations.hashSearchOrInsertMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START); + } + + @Override + protected double[] find(final long key) { + final int index = HashOperations.hashSearchMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START); + if (index == -1) { return null; } + final double[] array = new double[numValues_]; + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_ + + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index), array, 0, numValues_); + return array; + } + + private static void checkIfEnoughMemory(final MemorySegment seg, final int numEntries, final int numValues) { + final int sizeNeeded = + ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); + if (sizeNeeded > seg.byteSize()) { + throw new SketchesArgumentException("Not enough memory: need " + + sizeNeeded + " bytes, got " + seg.byteSize() + " bytes"); + } + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java new file mode 100644 index 000000000..7d2af2ba9 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesReadOnlyException; + +final class DirectArrayOfDoublesQuickSelectSketchR extends DirectArrayOfDoublesQuickSelectSketch { + + DirectArrayOfDoublesQuickSelectSketchR(final MemorySegment seg, final long seed) { + super(seg, seed); + } + + @Override + void insertOrIgnore(final long key, final double[] values) { + throw new SketchesReadOnlyException(); + } + + @Override + public void trim() { + throw new SketchesReadOnlyException(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java new file mode 100644 index 000000000..63b421f4d --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +/** + * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). + * + *

This implementation uses data in a given Memory that is owned and managed by the caller. + * This Memory can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ */ +final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { + + private MemorySegment seg_; + private int offset_; + private int numEntries_; + private int numValues_; + private int i_; + private static final int SIZE_OF_KEY_BYTES = 8; + private static final int SIZE_OF_VALUE_BYTES = 8; + + DirectArrayOfDoublesSketchIterator(final MemorySegment seg, final int offset, final int numEntries, + final int numValues) { + seg_ = seg; + offset_ = offset; + numEntries_ = numEntries; + numValues_ = numValues; + i_ = -1; + } + + @Override + public boolean next() { + i_++; + while (i_ < numEntries_) { + final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_); + if (seg_.get(JAVA_LONG_UNALIGNED, off) != 0) { return true; } + i_++; + } + return false; + } + + @Override + public long getKey() { + final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_); + return seg_.get(JAVA_LONG_UNALIGNED, off); + } + + @Override + public double[] getValues() { + long off; + if (numValues_ == 1) { + off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_); + return new double[] { seg_.get(JAVA_DOUBLE_UNALIGNED, off) }; + } + final double[] array = new double[numValues_]; + off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_ * numValues_); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, off, array, 0, numValues_); + return array; + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java new file mode 100644 index 000000000..e546d4756 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.SerializerDeserializer; + +/** + * Direct Union operation for tuple sketches of type ArrayOfDoubles. + * + *

This implementation uses data in a given Memory that is owned and managed by the caller. + * This Memory can be off-heap, which if managed properly will greatly reduce the need for + * the JVM to perform garbage collection.

+ */ +class DirectArrayOfDoublesUnion extends ArrayOfDoublesUnion { + + final MemorySegment seg_; + + /** + * Creates an instance of DirectArrayOfDoublesUnion + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than given value. + * @param numValues Number of double values to keep for each key. + * @param seed See seed + * @param dstSeg the destination MemorySegment + */ + DirectArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed, + final MemorySegment dstSeg) { + super(new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed, + dstSeg.asSlice(PREAMBLE_SIZE_BYTES, dstSeg.byteSize() - PREAMBLE_SIZE_BYTES))); + seg_ = dstSeg; + seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 + seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, gadget_.getThetaLong()); + } + + //Called from wrapUnion below and extended by DirectArrayOfDoublesUnionR + DirectArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) { + super(gadget); + seg_ = seg; + unionThetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); + } + + @Override + void setUnionThetaLong(final long thetaLong) { + super.setUnionThetaLong(thetaLong); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); + } + + static ArrayOfDoublesUnion wrapUnion(final MemorySegment seg, final long seed, final boolean isWritable) { + final byte version = seg.get(JAVA_BYTE, ArrayOfDoublesUnion.SERIAL_VERSION_BYTE); + if (version != ArrayOfDoublesUnion.serialVersionUID) { + throw new SketchesArgumentException("Serial version mismatch. Expected: " + + serialVersionUID + ", actual: " + version); + } + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesUnion); + + if (isWritable) { + final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); + return new DirectArrayOfDoublesUnion(new DirectArrayOfDoublesQuickSelectSketch(sketchSeg, seed), seg); + } + final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); + return new DirectArrayOfDoublesUnionR(new DirectArrayOfDoublesQuickSelectSketchR(sketchSeg, seed), seg); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java new file mode 100644 index 000000000..51568fd87 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesReadOnlyException; + +final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion { + + /** + * Wraps the given Memory. + * @param gadget the ArrayOfDoublesQuickSelectSketch + * @param seg the destination MemorySegment + */ + DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) { + super(gadget, seg); + } + + @Override + public void union(final ArrayOfDoublesSketch tupleSketch) { + throw new SketchesReadOnlyException(); + } + + @Override + public void reset() { + throw new SketchesReadOnlyException(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java new file mode 100644 index 000000000..4baa685d6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.Math.ceil; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; + +import org.apache.datasketches.thetacommon.ThetaUtil; + +class HashTables { + private long[] hashTable = null; + private double[][] valueTable = null; + private int numValues = 0; + private int lgTableSize = 0; + private int numKeys = 0; + + //Construct from sketch + HashTables(final ArrayOfDoublesSketch sketchIn) { + numKeys = sketchIn.getRetainedEntries(); + numValues = sketchIn.getNumValues(); + + lgTableSize = getLgTableSize(numKeys); + final int tableSize = 1 << lgTableSize; + hashTable = new long[tableSize]; + valueTable = new double[tableSize][]; + final ArrayOfDoublesSketchIterator it = sketchIn.iterator(); + + while (it.next()) { + final long hash = it.getKey(); + final int index = hashInsertOnly(hashTable, lgTableSize, hash); + valueTable[index] = new double[numValues]; + System.arraycopy(it.getValues(), 0, valueTable[index], 0, numValues); + } + } + + //Construct: Load the hash and value tables from packed hash and value arrays + private HashTables(final long[] hashArr, final double[][] valuesArr, final int numKeys, final int numValues) { + this.numValues = numValues; + this.numKeys = numKeys; + lgTableSize = getLgTableSize(numKeys); + + final int tableSize = 1 << lgTableSize; + hashTable = new long[tableSize]; + valueTable = new double[tableSize][]; + + for (int i = 0; i < numKeys; i++) { + final long hash = hashArr[i]; + final int index = hashInsertOnly(hashTable, lgTableSize, hash); + valueTable[index] = new double[numValues]; + System.arraycopy(valuesArr[i], 0, valueTable[index], 0, numValues); + } + } + + HashTables getIntersectHashTables( + final ArrayOfDoublesSketch nextTupleSketch, + final long thetaLong, + final ArrayOfDoublesCombiner combiner) { + //Match nextSketch data with local instance data, filtering by theta + final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries()); + assert numValues == nextTupleSketch.numValues_; + final long[] matchHashArr = new long[maxMatchSize]; + final double[][] matchValuesArr = new double[maxMatchSize][]; + + //Copy the intersecting items from local hashTables_ + // sequentially into local packed matchHashArr_ and matchValuesArr + int matchCount = 0; + final ArrayOfDoublesSketchIterator it = nextTupleSketch.iterator(); + while (it.next()) { + final long hash = it.getKey(); + if (hash >= thetaLong) { continue; } + final int index = hashSearch(hashTable, lgTableSize, hash); + if (index < 0) { continue; } + matchHashArr[matchCount] = hash; + matchValuesArr[matchCount] = combiner.combine(valueTable[index], it.getValues()); + matchCount++; + } + return new HashTables(matchHashArr, matchValuesArr, matchCount, numValues); + } + + int getNumKeys() { + return numKeys; + } + + int getNumValues() { + return numValues; + } + + long[] getHashTable() { + return hashTable; + } + + double[][] getValueTable() { + return valueTable; + } + + void clear() { + hashTable = null; + valueTable = null; + numValues = 0; + lgTableSize = 0; + numKeys = 0; + } + + static int getLgTableSize(final int numKeys) { + final int tableSize = max(ceilingPowerOf2((int) ceil(numKeys / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS); + return Integer.numberOfTrailingZeros(tableSize); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java new file mode 100644 index 000000000..dc84da82a --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.SerializerDeserializer; +import org.apache.datasketches.tuple2.Util; + +/** + * The on-heap implementation of tuple Compact Sketch of type ArrayOfDoubles. + */ +final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch { + + private final short seedHash_; + private long[] keys_; + private double[] values_; + + /** + * Converts the given UpdatableArrayOfDoublesSketch to this compact form. + * @param sketch the given UpdatableArrayOfDoublesSketch + */ + HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch) { + this(sketch, sketch.getThetaLong()); + } + + /** + * Converts the given UpdatableArrayOfDoublesSketch to this compact form + * trimming if necessary according to given thetaLong + * @param sketch the given UpdatableArrayOfDoublesSketch + * @param thetaLong new value of thetaLong + */ + HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final long thetaLong) { + super(sketch.getNumValues()); + isEmpty_ = sketch.isEmpty(); + thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); + seedHash_ = Util.computeSeedHash(sketch.getSeed()); + final int count = sketch.getRetainedEntries(); + if (count > 0) { + keys_ = new long[count]; + values_ = new double[count * numValues_]; + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + int i = 0; + while (it.next()) { + final long key = it.getKey(); + if (key < thetaLong_) { + keys_[i] = key; + System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_); + i++; + } + } + // trim if necessary + if (i < count) { + if (i == 0) { + keys_ = null; + values_ = null; + } else { + keys_ = Arrays.copyOf(keys_, i); + values_ = Arrays.copyOf(values_, i * numValues_); + } + } + } + } + + /* + * Creates an instance from components + */ + HeapArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong, + final boolean isEmpty, final int numValues, final short seedHash) { + super(numValues); + keys_ = keys; + values_ = values; + thetaLong_ = thetaLong; + isEmpty_ = isEmpty; + seedHash_ = seedHash; + } + + /** + * This is to create an instance given a serialized form + * @param seg the destination segment + */ + HeapArrayOfDoublesCompactSketch(final MemorySegment seg) { + this(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * This is to create an instance given a serialized form + * @param seg the source MemorySegment + * @param seed See seed + */ + HeapArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); + seedHash_ = seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); + if (version != serialVersionUID) { + throw new SketchesArgumentException( + "Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); + } + final boolean isBigEndian = + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Byte order mismatch"); + } + Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); + isEmpty_ = (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); + final boolean hasEntries = + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; + if (hasEntries) { + final int count = seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); + keys_ = new long[count]; + values_ = new double[count * numValues_]; + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, count); + MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_, 0, values_.length); + } + } + + @Override + public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { + if (dstSeg == null) { + return new + HeapArrayOfDoublesCompactSketch(keys_.clone(), values_.clone(), thetaLong_, isEmpty_, numValues_, seedHash_); + } else { + final byte[] byteArr = this.toByteArray(); + MemorySegment.copy(byteArr, 0, dstSeg, JAVA_BYTE, 0, byteArr.length); + return new DirectArrayOfDoublesCompactSketch(dstSeg); + } + } + + @Override + public int getRetainedEntries() { + return keys_ == null ? 0 : keys_.length; + } + + @Override + public byte[] toByteArray() { + final int count = getRetainedEntries(); + final int sizeBytes = getCurrentBytes(); + final byte[] bytes = new byte[sizeBytes]; + final MemorySegment seg = MemorySegment.ofArray(bytes); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); + final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( + ((isBigEndian ? 1 : 0) << Flags.IS_BIG_ENDIAN.ordinal()) + | ((isEmpty() ? 1 : 0) << Flags.IS_EMPTY.ordinal()) + | ((count > 0 ? 1 : 0) << Flags.HAS_ENTRIES.ordinal()) + )); + seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + if (count > 0) { + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count); + MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, count); + MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_.length); + } + return bytes; + } + + @Override + //converts compact heap array of double[] to compact double[][] + public double[][] getValues() { + final int count = getRetainedEntries(); + final double[][] values = new double[count][]; + if (count > 0) { + int i = 0; + for (int j = 0; j < count; j++) { + values[i++] = Arrays.copyOfRange(values_, j * numValues_, (j + 1) * numValues_); + } + } + return values; + } + + @Override + double[] getValuesAsOneDimension() { + return values_.clone(); + } + + @Override + long[] getKeys() { + return keys_.clone(); + } + + @Override + public ArrayOfDoublesSketchIterator iterator() { + return new HeapArrayOfDoublesSketchIterator(keys_, values_, numValues_); + } + + @Override + short getSeedHash() { + return seedHash_; + } + + @Override + public boolean hasMemorySegment() { return false; } + + @Override + MemorySegment getMemorySegment() { return null; } +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java new file mode 100644 index 000000000..dc0383567 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +/** + * On-heap implementation of intersection set operation for tuple sketches of type + * ArrayOfDoubles. + */ +final class HeapArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { + + /** + * Creates an instance of a HeapArrayOfDoublesIntersection with a custom update seed + * @param numValues number of double values associated with each key + * @param seed See seed + */ + HeapArrayOfDoublesIntersection(final int numValues, final long seed) { + super(numValues, seed); + } + + @Override + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { + return new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java new file mode 100644 index 000000000..9e33f4e87 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java @@ -0,0 +1,363 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.common.Util.exactLog2OfLong; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteOrder; +import java.util.Arrays; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.tuple2.SerializerDeserializer; +import org.apache.datasketches.tuple2.Util; + +/** + * The on-heap implementation of the tuple QuickSelect sketch of type ArrayOfDoubles. + */ + +final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch { + + private final int lgNomEntries_; + private final int lgResizeFactor_; + private final float samplingProbability_; + + private int count_; + private long[] keys_; + private double[] values_; + + /** + * This is to create an instance of a QuickSelectSketch with custom resize factor and sampling + * probability + * @param nomEntries Nominal number of entries. Forced to the smallest power of 2 greater than + * or equal to the given value. + * @param lgResizeFactor log2(resize factor) - value from 0 to 3: + * 0 - no resizing (max size allocated), + * 1 - double internal hash table each time it reaches a threshold + * 2 - grow four times + * 3 - grow eight times (default) + * @param samplingProbability + * See Sampling Probability + * @param numValues number of double values to keep for each key + * @param seed See seed + */ + HeapArrayOfDoublesQuickSelectSketch(final int nomEntries, final int lgResizeFactor, + final float samplingProbability, final int numValues, final long seed) { + super(numValues, seed); + lgNomEntries_ = exactLog2OfLong(ceilingPowerOf2(nomEntries)); + lgResizeFactor_ = lgResizeFactor; + samplingProbability_ = samplingProbability; + thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); + final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); + keys_ = new long[startingCapacity]; + values_ = new double[startingCapacity * numValues]; + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); + setRebuildThreshold(); + } + + /** + * This is to create an instance given a serialized form + * @param seg the source MemorySegment + * @param seed See seed + */ + HeapArrayOfDoublesQuickSelectSketch(final MemorySegment seg, final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed); + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); + if (version != serialVersionUID) { + throw new SketchesArgumentException("Serial version mismatch. Expected: " + + serialVersionUID + ", actual: " + version); + } + final byte flags = seg.get(JAVA_BYTE, FLAGS_BYTE); + final boolean isBigEndian = (flags & (1 << Flags.IS_BIG_ENDIAN.ordinal())) > 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Byte order mismatch"); + } + Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0; + lgNomEntries_ = seg.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); + final int currentCapacity = 1 << seg.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE); + lgResizeFactor_ = seg.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE); + samplingProbability_ = seg.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); + keys_ = new long[currentCapacity]; + values_ = new double[currentCapacity * numValues_]; + final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; + count_ = hasEntries ? seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0; + if (count_ > 0) { + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, currentCapacity); + final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * currentCapacity); + MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, off, values_, 0, currentCapacity * numValues_); + + } + setRebuildThreshold(); + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(currentCapacity); + } + + @Override + //converts heap hashTable of double[] to compacted double[][] + public double[][] getValues() { + final int numVal = numValues_; + final int count = getRetainedEntries(); + final double[][] values = new double[count][]; + if (count > 0) { + int cnt = 0; + for (int j = 0; j < keys_.length; j++) { + if (keys_[j] == 0) { continue; } + values[cnt++] = Arrays.copyOfRange(values_, j * numVal, (j + 1) * numVal); + } + assert cnt == count; + } + return values; + } + + @Override + //converts heap hashTable of double[] to compacted double[] + double[] getValuesAsOneDimension() { + final int numVal = numValues_; + final int count = getRetainedEntries(); + final double[] values = new double[count * numVal]; + if (count > 0) { + int cnt = 0; + for (int j = 0; j < keys_.length; j++) { + if (keys_[j] == 0) { continue; } + System.arraycopy(values_, j * numVal, values, cnt++ * numVal, numVal); + } + assert cnt == count; + } + return values; + } + + @Override + //converts heap hashTable of long[] to compacted long[] + long[] getKeys() { + final int count = getRetainedEntries(); + final long[] keysArr = new long[count]; + if (count > 0) { + int cnt = 0; + for (int j = 0; j < keys_.length; j++) { + if (keys_[j] == 0) { continue; } + keysArr[cnt++] = keys_[j]; + } + assert cnt == count; + } + return keysArr; + } + + @Override + public int getRetainedEntries() { + return count_; + } + + @Override + public int getNominalEntries() { + return 1 << lgNomEntries_; + } + + @Override + public float getSamplingProbability() { + return samplingProbability_; + } + + @Override + public ResizeFactor getResizeFactor() { + return ResizeFactor.getRF(lgResizeFactor_); + } + + @Override + public byte[] toByteArray() { + final byte[] byteArray = new byte[getSerializedSizeBytes()]; + final MemorySegment seg = MemorySegment.ofArray(byteArray); + serializeInto(seg); + return byteArray; + } + + @Override + public ArrayOfDoublesSketchIterator iterator() { + return new HeapArrayOfDoublesSketchIterator(keys_, values_, numValues_); + } + + @Override + int getSerializedSizeBytes() { + return ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues_)) * getCurrentCapacity()); + } + + // X/Y: X = Byte index for just AoDQuickSelectSketch + // Y = Byte index when combined with Union Preamble + // Long || Start Byte Adr: + // Adr: + // First 16 bytes are preamble from AoDUnion + // || 7/23 | 6/22 | 5/21 | 4/20 | 3/19 | 2/18 | 1/17 | 0/16 | + // 0/2 || Seed Hash | #Dbls | Flags | SkType2 | FamID | SerVer | Preamble_Longs | + // || 15/31 | 14/30 | 13/29 | 12/28 | 11/27 | 10/26 | 9/25 | 8/24 | + // 1/3 ||------------------------------Theta Long----------------------------------------------| + // || 23/39 | 22/38 | 21/37 | 20/36 | 19/35 | 18/34 | 17/33 | 16/32 | + // 2/4 || Sampling P Float | | LgRF |lgCapLongs| LgNomEntries | + // || 31/47 | 30/46 | 29/45 | 28/44 | 27/43 | 26/42 | 25/41 | 24/40 | + // 3/5 || | Retained Entries Int | + // || | 32/48 | + // 4/6 || Keys Array longs * keys[] Length | + // || Values Array doubles * values[] Length | + + @Override + void serializeInto(final MemorySegment seg) { + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, + (byte) SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal()); + final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte)( + (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) + | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) + | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) + | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) + )); + seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(seed_)); + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + seg.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_); + seg.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length)); + seg.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor_); + seg.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability_); + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count_); + if (count_ > 0) { + MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_.length); + final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * keys_.length); + MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, off, values_.length); + } + } + + @Override + public boolean hasMemorySegment() { return false; } + + @Override + MemorySegment getMemorySegment() { return null; } + + @Override + public void reset() { + isEmpty_ = true; + count_ = 0; + thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); + final int startingCapacity = Util.getStartingCapacity(1 << lgNomEntries_, lgResizeFactor_); + keys_ = new long[startingCapacity]; + values_ = new double[startingCapacity * numValues_]; + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); + setRebuildThreshold(); + } + + @Override + protected long getKey(final int index) { + return keys_[index]; + } + + @Override + protected void incrementCount() { + count_++; + } + + @Override + protected void setValues(final int index, final double[] values) { + if (numValues_ == 1) { + values_[index] = values[0]; + } else { + System.arraycopy(values, 0, values_, index * numValues_, numValues_); + } + } + + @Override + protected void updateValues(final int index, final double[] values) { + if (numValues_ == 1) { + values_[index] += values[0]; + } else { + final int offset = index * numValues_; + for (int i = 0; i < numValues_; i++) { + values_[offset + i] += values[i]; + } + } + } + + @Override + protected void setNotEmpty() { + isEmpty_ = false; + } + + @Override + protected boolean isInSamplingMode() { + return samplingProbability_ < 1f; + } + + @Override + protected void setThetaLong(final long thetaLong) { + thetaLong_ = thetaLong; + } + + @Override + protected int getCurrentCapacity() { + return keys_.length; + } + + @Override + protected void rebuild(final int newCapacity) { + final long[] oldKeys = keys_; + final double[] oldValues = values_; + keys_ = new long[newCapacity]; + values_ = new double[newCapacity * numValues_]; + count_ = 0; + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity); + for (int i = 0; i < oldKeys.length; i++) { + if ((oldKeys[i] != 0) && (oldKeys[i] < thetaLong_)) { + insert(oldKeys[i], Arrays.copyOfRange(oldValues, i * numValues_, (i + 1) * numValues_)); + } + } + setRebuildThreshold(); + } + + @Override + protected int insertKey(final long key) { + return HashOperations.hashInsertOnly(keys_, lgCurrentCapacity_, key); + } + + @Override + protected int findOrInsertKey(final long key) { + return HashOperations.hashSearchOrInsert(keys_, lgCurrentCapacity_, key); + } + + @Override + protected double[] find(final long key) { + final int index = HashOperations.hashSearch(keys_, lgCurrentCapacity_, key); + if (index == -1) { return null; } + return Arrays.copyOfRange(values_, index * numValues_, (index + 1) * numValues_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java new file mode 100644 index 000000000..7d77978e8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.util.Arrays; + +/** + * Iterator over the on-heap ArrayOfDoublesSketch (compact or hash table) + */ +final class HeapArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { + + private long[] keys_; + private double[] values_; + private int numValues_; + private int i_; + + HeapArrayOfDoublesSketchIterator(final long[] keys, final double[] values, final int numValues) { + keys_ = keys; + values_ = values; + numValues_ = numValues; + i_ = -1; + } + + @Override + public boolean next() { + if (keys_ == null) { return false; } + i_++; + while (i_ < keys_.length) { + if (keys_[i_] != 0) { return true; } + i_++; + } + return false; + } + + @Override + public long getKey() { + return keys_[i_]; + } + + @Override + public double[] getValues() { + if (numValues_ == 1) { + return new double[] { values_[i_] }; + } + return Arrays.copyOfRange(values_, i_ * numValues_, (i_ + 1) * numValues_); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java new file mode 100644 index 000000000..6603aad95 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.SerializerDeserializer; + +/** + * The on-heap implementation of the Union set operation for tuple sketches of type + * ArrayOfDoubles. + */ +final class HeapArrayOfDoublesUnion extends ArrayOfDoublesUnion { + + /** + * Creates an instance of HeapArrayOfDoublesUnion with a custom seed + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than + * given value. + * @param numValues Number of double values to keep for each key. + * @param seed See seed + */ + HeapArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed) { + super(new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed)); + } + + HeapArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final long unionThetaLong) { + super(gadget); + unionThetaLong_ = unionThetaLong; + } + + /** + * This is to create an instance given a serialized form and a custom seed + * @param seg the source MemorySegment + * @param seed See seed + * @return a ArrayOfDoublesUnion on the Java heap + */ + static ArrayOfDoublesUnion heapifyUnion(final MemorySegment seg, final long seed) { + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); + if (version != serialVersionUID) { + throw new SketchesArgumentException("Serial version mismatch. Expected: " + + serialVersionUID + ", actual: " + version); + } + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), + SerializerDeserializer.SketchType.ArrayOfDoublesUnion); + + final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); + final ArrayOfDoublesQuickSelectSketch sketch = new HeapArrayOfDoublesQuickSelectSketch(sketchSeg, seed); + return new HeapArrayOfDoublesUnion(sketch, seg.get(JAVA_LONG_UNALIGNED, THETA_LONG)); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java new file mode 100644 index 000000000..5044b0e3e --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * This package is for a concrete implementation of the Tuple sketch for an array of double values. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; diff --git a/src/main/java/org/apache/datasketches/tuple2/package-info.java b/src/main/java/org/apache/datasketches/tuple2/package-info.java new file mode 100644 index 000000000..1cb15c83e --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/package-info.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * The tuple package contains a number of sketches based on the same + * fundamental algorithms of the Theta Sketch Framework and extend these + * concepts for whole new families of sketches. + */ +package org.apache.datasketches.tuple2; diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java new file mode 100644 index 000000000..636c90d12 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.strings; + +import static org.apache.datasketches.tuple2.Util.stringArrHash; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.tuple2.UpdatableSketch; + +/** + * Extends UpdatableSketch<String[], ArrayOfStringsSummary> + * @author Lee Rhodes + */ +public class ArrayOfStringsSketch extends UpdatableSketch { + + /** + * Constructs new sketch with default K = 4096 (lgK = 12), default ResizeFactor=X8, + * and default p = 1.0. + */ + public ArrayOfStringsSketch() { + this(12); + } + + /** + * Constructs new sketch with default ResizeFactor=X8, default p = 1.0 and given lgK. + * @param lgK Log_base2 of Nominal Entries. + * See Nominal Entries + */ + public ArrayOfStringsSketch(final int lgK) { + this(lgK, ResizeFactor.X8, 1.0F); + } + + /** + * Constructs new sketch with given ResizeFactor, p and lgK. + * @param lgK Log_base2 of Nominal Entries. + * See Nominal Entries + * @param rf ResizeFactor + * See Resize Factor + * @param p sampling probability + * See Sampling Probability + */ + public ArrayOfStringsSketch(final int lgK, final ResizeFactor rf, final float p) { + super(1 << lgK, rf.lg(), p, new ArrayOfStringsSummaryFactory()); + } + + /** + * Constructs this sketch from a Memory image, which must be from an ArrayOfStringsSketch, and + * usually with data. + * @param seg the given Memory + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Heapifying a CompactSketch is not deprecated. + */ + @Deprecated + public ArrayOfStringsSketch(final MemorySegment seg) { + super(seg, new ArrayOfStringsSummaryDeserializer(), new ArrayOfStringsSummaryFactory()); + } + + /** + * Copy Constructor + * @param sketch the sketch to copy + */ + public ArrayOfStringsSketch(final ArrayOfStringsSketch sketch) { + super(sketch); + } + + /** + * @return a deep copy of this sketch + */ + @Override + public ArrayOfStringsSketch copy() { + return new ArrayOfStringsSketch(this); + } + + /** + * Updates the sketch with String arrays for both key and value. + * @param strArrKey the given String array key + * @param strArr the given String array value + */ + public void update(final String[] strArrKey, final String[] strArr) { + super.update(stringArrHash(strArrKey), strArr); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java new file mode 100644 index 000000000..66eed2a8f --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.strings; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.datasketches.tuple2.Util.stringArrHash; +import static org.apache.datasketches.tuple2.Util.stringConcat; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.UpdatableSummary; + +/** + * Implements UpdatableSummary<String[]> + * @author Lee Rhodes + */ +public final class ArrayOfStringsSummary implements UpdatableSummary { + + private String[] stringArr = null; + + ArrayOfStringsSummary() { //required for ArrayOfStringsSummaryFactory + stringArr = null; + } + + //Used by copy() and in test + ArrayOfStringsSummary(final String[] stringArr) { + this.stringArr = stringArr.clone(); + checkNumNodes(stringArr.length); + } + + //used by fromMemory and in test + /** + * This reads a MemorySegment that has a layout similar to the C struct: + * {@snippet : + * typedef struct { + * int totBytes; + * byte nodes; //number of Nodes. + * Node[nodes] = { Node[0], Node[1], ... } + * } + * } + * Where a Node has a layout similar to the C struct: + * {@snippet : + * typedef struct { + * int numBytes; + * byte[] byteArray; //UTF-8 byte array. Not null terminated. + * } + * } + * @param seg the MemorySegment containing the Summary data + */ + ArrayOfStringsSummary(final MemorySegment seg) { + int pos = 0; + final int totBytes = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES; + checkInBytes(seg, totBytes); + final int nodes = seg.get(JAVA_BYTE, pos); pos += Byte.BYTES; + checkNumNodes(nodes); + final String[] stringArr = new String[nodes]; + for (int i = 0; i < nodes; i++) { + final int len = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES; + final byte[] byteArr = new byte[len]; + MemorySegment.copy(seg, JAVA_BYTE, pos, byteArr, 0, len); pos += len; + stringArr[i] = new String(byteArr, UTF_8); + } + assert pos == totBytes; + this.stringArr = stringArr; + } + + @Override + public ArrayOfStringsSummary copy() { + final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(stringArr); + return nodes; + } + + @Override + public byte[] toByteArray() { + final ComputeBytes cb = new ComputeBytes(stringArr); + final int totBytes = cb.totBytes_; + final byte[] out = new byte[totBytes]; + final MemorySegment wseg = MemorySegment.ofArray(out); + int pos = 0; + wseg.set(JAVA_INT_UNALIGNED, pos, totBytes); pos += Integer.BYTES; + final int numNodes = cb.numNodes_; + wseg.set(JAVA_BYTE, pos, (byte)numNodes); pos += Byte.BYTES; + for (int i = 0; i < numNodes; i++) { + final int nodeLen = cb.nodeLengthsArr_[i]; + wseg.set(JAVA_INT_UNALIGNED, pos, nodeLen); pos += Integer.BYTES; + MemorySegment.copy(cb.nodeBytesArr_[i], 0, wseg, JAVA_BYTE, pos, nodeLen); pos += nodeLen; + } + assert pos == totBytes; + return out; + } + + //From UpdatableSummary + + @Override + public ArrayOfStringsSummary update(final String[] value) { + if (stringArr == null) { + stringArr = value.clone(); + } + return this; + } + + //From Object + + @Override + public int hashCode() { + return (int) stringArrHash(stringArr); + } + + @Override + public boolean equals(final Object summary) { + if (summary == null || !(summary instanceof ArrayOfStringsSummary)) { + return false; + } + final String thatStr = stringConcat(((ArrayOfStringsSummary) summary).stringArr); + final String thisStr = stringConcat(stringArr); + return thisStr.equals(thatStr); + } + + /** + * Returns the nodes array for this summary. + * @return the nodes array for this summary. + */ + public String[] getValue() { + return stringArr.clone(); + } + + //also used in test + static void checkNumNodes(final int numNodes) { + if (numNodes > 127 || numNodes < 0) { + throw new SketchesArgumentException("Number of nodes cannot exceed 127 or be negative."); + } + } + + //also used in test + static void checkInBytes(final MemorySegment seg, final int totBytes) { + if (seg.byteSize() < totBytes) { + throw new SketchesArgumentException("Incoming Memory has insufficient capacity."); + } + } + + /** + * Computes total bytes and number of nodes from the given string array. + */ + private static class ComputeBytes { + final byte numNodes_; + final int[] nodeLengthsArr_; + final byte[][] nodeBytesArr_; + final int totBytes_; + + ComputeBytes(final String[] stringArr) { + numNodes_ = (byte) stringArr.length; + checkNumNodes(numNodes_); + nodeLengthsArr_ = new int[numNodes_]; + nodeBytesArr_ = new byte[numNodes_][]; + int sumNodeBytes = 0; + for (int i = 0; i < numNodes_; i++) { + nodeBytesArr_[i] = stringArr[i].getBytes(UTF_8); + nodeLengthsArr_[i] = nodeBytesArr_[i].length; + sumNodeBytes += nodeLengthsArr_[i]; + } + totBytes_ = sumNodeBytes + (numNodes_ + 1) * Integer.BYTES + 1; + } + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java new file mode 100644 index 000000000..f48f6c95e --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.strings; + +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.tuple2.DeserializeResult; +import org.apache.datasketches.tuple2.SummaryDeserializer; + +/** + * Implements SummaryDeserializer<ArrayOfStringsSummary> + * @author Lee Rhodes + */ +public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer { + + @Override + public DeserializeResult heapifySummary(final MemorySegment seg) { + return ArrayOfStringsSummaryDeserializer.fromMemory(seg); + } + + /** + * Also used in test. + * @param seg the given MemorySegment + * @return the DeserializeResult + */ + static DeserializeResult fromMemory(final MemorySegment seg) { + final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(seg); + final int totBytes = seg.get(JAVA_INT_UNALIGNED, 0); + return new DeserializeResult<>(nsum, totBytes); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java new file mode 100644 index 000000000..b0fb5a539 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.strings; + +import org.apache.datasketches.tuple2.SummaryFactory; + +/** + * Implements SummaryFactory<ArrayOfStringsSummary> + * @author Lee Rhodes + */ +public class ArrayOfStringsSummaryFactory implements SummaryFactory { + + @Override + public ArrayOfStringsSummary newSummary() { + return new ArrayOfStringsSummary(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java new file mode 100644 index 000000000..07225f45c --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.strings; + +import org.apache.datasketches.tuple2.SummarySetOperations; + +/** + * Implements SummarySetOperations<ArrayOfStringsSummary> + * @author Lee Rhodes + */ +public class ArrayOfStringsSummarySetOperations implements SummarySetOperations { + + @Override + public ArrayOfStringsSummary union(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) { + return a.copy(); + } + + @Override + public ArrayOfStringsSummary intersection(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) { + return a.copy(); + } + +} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java b/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java new file mode 100644 index 000000000..25a2be3e6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * This package is for a generic implementation of the Tuple sketch for single String value. + */ + +package org.apache.datasketches.tuple2.strings; diff --git a/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java new file mode 100644 index 000000000..fdaf1de26 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.adouble.DoubleSummary; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryDeserializer; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CompactSketchWithDoubleSummaryTest { + private final DoubleSummary.Mode mode = Mode.Sum; + + @Test + public void emptyFromNonPublicConstructorNullArray() { + CompactSketch sketch = + new CompactSketch<>(null, null, Long.MAX_VALUE, true); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getRetainedEntries(), 0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + TupleSketchIterator it = sketch.iterator(); + Assert.assertNotNull(it); + Assert.assertFalse(it.next()); + sketch.toString(); + } + + @Test + public void emptyFromNonPublicConstructor() { + long[] keys = new long[0]; + DoubleSummary[] summaries = + (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0); + CompactSketch sketch = + new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getRetainedEntries(), 0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + TupleSketchIterator it = sketch.iterator(); + Assert.assertNotNull(it); + Assert.assertFalse(it.next()); + } + + @Test + public void emptyFromQuickSelectSketch() { + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + CompactSketch sketch = us.compact(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getRetainedEntries(), 0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + TupleSketchIterator it = sketch.iterator(); + Assert.assertNotNull(it); + Assert.assertFalse(it.next()); + } + + @Test + public void exactModeFromQuickSelectSketch() { + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + us.update(1, 1.0); + us.update(2, 1.0); + us.update(3, 1.0); + us.update(1, 1.0); + us.update(2, 1.0); + us.update(3, 1.0); + CompactSketch sketch = us.compact(); + Assert.assertFalse(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 3.0); + Assert.assertEquals(sketch.getLowerBound(1), 3.0); + Assert.assertEquals(sketch.getUpperBound(1), 3.0); + Assert.assertEquals(sketch.getRetainedEntries(), 3); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + TupleSketchIterator it = sketch.iterator(); + int count = 0; + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 2.0); + count++; + } + Assert.assertEquals(count, 3); + } + + @Test + public void serializeDeserializeSmallExact() { + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + us.update("a", 1.0); + us.update("b", 1.0); + us.update("c", 1.0); + CompactSketch sketch1 = us.compact(); + Sketch sketch2 = + Sketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray()), + new DoubleSummaryDeserializer()); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 3.0); + Assert.assertEquals(sketch2.getLowerBound(1), 3.0); + Assert.assertEquals(sketch2.getUpperBound(1), 3.0); + Assert.assertEquals(sketch2.getRetainedEntries(), 3); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + TupleSketchIterator it = sketch2.iterator(); + int count = 0; + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 1.0); + count++; + } + Assert.assertEquals(count, 3); + } + + @Test + public void serializeDeserializeEstimation() throws Exception { + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + us.update(i, 1.0); + } + us.trim(); + CompactSketch sketch1 = us.compact(); + byte[] bytes = sketch1.toByteArray(); + + // for binary testing + //TestUtil.writeBytesToFile(bytes, "CompactSketchWithDoubleSummary4K.sk"); + + Sketch sketch2 = + Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer()); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); + Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); + TupleSketchIterator it = sketch2.iterator(); + int count = 0; + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 1.0); + count++; + } + Assert.assertEquals(count, 4096); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void deserializeWrongType() { + UpdatableSketch us = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + us.update(i, 1.0); + } + CompactSketch sketch1 = us.compact(); + Sketches.heapifyUpdatableSketch(MemorySegment.ofArray(sketch1.toByteArray()), + new DoubleSummaryDeserializer(), + new DoubleSummaryFactory(mode)); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java new file mode 100644 index 000000000..6c4fa49ec --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ByteArrayUtil; + +/** + * Summary for generic tuple sketches of type Integer. + * This summary keeps an Integer value. + */ +public class IntegerSummary implements UpdatableSummary { + private int value_; + + /** + * Creates an instance of IntegerSummary with a given starting value. + * @param value starting value + */ + public IntegerSummary(final int value) { + value_ = value; + } + + @Override + public IntegerSummary update(final Integer value) { + value_ += value; + return this; + } + + @Override + public IntegerSummary copy() { + return new IntegerSummary(value_); + } + + /** + * @return current value of the IntegerSummary + */ + public int getValue() { + return value_; + } + + private static final int SERIALIZED_SIZE_BYTES = 4; + private static final int VALUE_INDEX = 0; + + @Override + public byte[] toByteArray() { + final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; + ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_); + return bytes; + } + + /** + * Creates an instance of the IntegerSummary given a serialized representation + * @param seg MemorySegment object with serialized IntegerSummary + * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes + * read from the MemorySegment + */ + public static DeserializeResult fromMemory(final MemorySegment seg) { + return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX)), SERIALIZED_SIZE_BYTES); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java new file mode 100644 index 000000000..877fd1d65 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.foreign.MemorySegment; + +public class IntegerSummaryDeserializer implements SummaryDeserializer { + + @Override + public DeserializeResult heapifySummary(final MemorySegment seg) { + return IntegerSummary.fromMemory(seg); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java new file mode 100644 index 000000000..6edf88608 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +/** + * Factory for IntegerSummary. + */ +public class IntegerSummaryFactory implements SummaryFactory { + + @Override + public IntegerSummary newSummary() { + return new IntegerSummary(0); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java new file mode 100644 index 000000000..a4dab704e --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import org.apache.datasketches.tuple2.adouble.DoubleSummary; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations; +import org.testng.annotations.Test; + +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import static org.apache.datasketches.tuple2.JaccardSimilarity.dissimilarityTest; +import static org.apache.datasketches.tuple2.JaccardSimilarity.exactlyEqual; +import static org.apache.datasketches.tuple2.JaccardSimilarity.jaccard; +import static org.apache.datasketches.tuple2.JaccardSimilarity.similarityTest; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +/** + * @author Lee Rhodes + * @author David Cromberge + */ +public class JaccardSimilarityTest { + private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum; + private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations(); + private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode); + private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); + private final UpdatableSketchBuilder tupleBldr = new UpdatableSketchBuilder<>(factory); + private final Double constSummary = 1.0; + + @Test + public void checkNullsEmpties1() { // tuple, tuple + int minK = 1 << 12; + double threshold = 0.95; + println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold); + //check both null + double[] jResults = jaccard(null, null, dsso); + boolean state = jResults[1] > threshold; + println("null \t null:\t" + state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(null, null, dsso); + assertFalse(state); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); + final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).build(); + + //check both empty + jResults = jaccard(measured, expected, dsso); + state = jResults[1] > threshold; + println("empty\tempty:\t" + state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected, dsso); + assertTrue(state); + + state = exactlyEqual(measured, measured, dsso); + assertTrue(state); + + //adjust one + expected.update(1, constSummary); + jResults = jaccard(measured, expected, dsso); + state = jResults[1] > threshold; + println("empty\t 1:\t" + state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected, dsso); + assertFalse(state); + + println(""); + } + + @Test + public void checkNullsEmpties2() { // tuple, theta + int minK = 1 << 12; + double threshold = 0.95; + println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold); + //check both null + double[] jResults = jaccard(null, null, factory.newSummary(), dsso); + boolean state = jResults[1] > threshold; + println("null \t null:\t" + state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(null, null, factory.newSummary(), dsso); + assertFalse(state); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); + final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build(); + + //check both empty + jResults = jaccard(measured, expected, factory.newSummary(), dsso); + state = jResults[1] > threshold; + println("empty\tempty:\t" + state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected, factory.newSummary(), dsso); + assertTrue(state); + + state = exactlyEqual(measured, measured, dsso); + assertTrue(state); + + //adjust one + expected.update(1); + jResults = jaccard(measured, expected, factory.newSummary(), dsso); + state = jResults[1] > threshold; + println("empty\t 1:\t" + state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected, factory.newSummary(), dsso); + assertFalse(state); + + println(""); + } + + @Test + public void checkExactMode1() { // tuple, tuple + int k = 1 << 12; + int u = k; + double threshold = 0.9999; + println("Exact Mode, minK: " + k + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); + final UpdatableSketch expected = tupleBldr.setNominalEntries(k).build(); + + for (int i = 0; i < (u-1); i++) { //one short + measured.update(i, constSummary); + expected.update(i, constSummary); + } + + double[] jResults = jaccard(measured, expected, dsso); + boolean state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected, dsso); + assertTrue(state); + + measured.update(u-1, constSummary); //now exactly k entries + expected.update(u, constSummary); //now exactly k entries but differs by one + jResults = jaccard(measured, expected, dsso); + state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected, dsso); + assertFalse(state); + + println(""); + } + + @Test + public void checkExactMode2() { // tuple, theta + int k = 1 << 12; + int u = k; + double threshold = 0.9999; + println("Exact Mode, minK: " + k + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); + final UpdateSketch expected = thetaBldr.setNominalEntries(k).build(); + + for (int i = 0; i < (u-1); i++) { //one short + measured.update(i, constSummary); + expected.update(i); + } + + double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); + boolean state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected, factory.newSummary(), dsso); + assertTrue(state); + + measured.update(u-1, constSummary); //now exactly k entries + expected.update(u); //now exactly k entries but differs by one + jResults = jaccard(measured, expected, factory.newSummary(), dsso); + state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected, factory.newSummary(), dsso); + assertFalse(state); + + println(""); + } + + @Test + public void checkEstMode1() { // tuple, tuple + int k = 1 << 12; + int u = 1 << 20; + double threshold = 0.9999; + println("Estimation Mode, minK: " + k + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); + final UpdatableSketch expected = tupleBldr.setNominalEntries(k).build(); + + for (int i = 0; i < u; i++) { + measured.update(i, constSummary); + expected.update(i, constSummary); + } + + double[] jResults = jaccard(measured, expected, dsso); + boolean state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected, dsso); + assertTrue(state); + + for (int i = u; i < (u + 50); i++) { //empirically determined + measured.update(i, constSummary); + } + + jResults = jaccard(measured, expected, dsso); + state = jResults[1] >= threshold; + println(state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected, dsso); + assertFalse(state); + + println(""); + } + + @Test + public void checkEstMode2() { // tuple, theta + int k = 1 << 12; + int u = 1 << 20; + double threshold = 0.9999; + println("Estimation Mode, minK: " + k + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); + final UpdateSketch expected = thetaBldr.setNominalEntries(k).build(); + + for (int i = 0; i < u; i++) { + measured.update(i, constSummary); + expected.update(i); + } + + double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); + boolean state = jResults[1] > threshold; + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + + state = exactlyEqual(measured, expected, factory.newSummary(), dsso); + assertTrue(state); + + for (int i = u; i < (u + 50); i++) { //empirically determined + measured.update(i, constSummary); + } + + jResults = jaccard(measured, expected, factory.newSummary(), dsso); + state = jResults[1] >= threshold; + println(state + "\t" + jaccardString(jResults)); + assertFalse(state); + + state = exactlyEqual(measured, expected, factory.newSummary(), dsso); + assertFalse(state); + + println(""); + } + + /** + * Enable printing on this test and you will see that the distribution is pretty tight, + * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about + * +/- 1.56%. + */ + @Test + public void checkSimilarity1() { // tuple, tuple + int minK = 1 << 12; + int u1 = 1 << 20; + int u2 = (int) (u1 * 0.95); + double threshold = 0.943; + println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); + final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).build(); + + for (int i = 0; i < u1; i++) { + expected.update(i, constSummary); + } + + for (int i = 0; i < u2; i++) { + measured.update(i, constSummary); + } + + double[] jResults = jaccard(measured, expected, dsso); + boolean state = similarityTest(measured, expected, dsso, threshold); + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + //check identity case + state = similarityTest(measured, measured, dsso, threshold); + assertTrue(state); + } + + /** + * Enable printing on this test and you will see that the distribution is pretty tight, + * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about + * +/- 1.56%. + */ + @Test + public void checkSimilarity2() { // tuple, theta + int minK = 1 << 12; + int u1 = 1 << 20; + int u2 = (int) (u1 * 0.95); + double threshold = 0.943; + println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); + final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build(); + + for (int i = 0; i < u1; i++) { + expected.update(i); + } + + for (int i = 0; i < u2; i++) { + measured.update(i, constSummary); + } + + double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); + boolean state = similarityTest(measured, expected, factory.newSummary(), dsso, threshold); + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + //check identity case + state = similarityTest(measured, measured, dsso, threshold); + assertTrue(state); + } + + /** + * Enable printing on this test and you will see that the distribution is much looser, + * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of + * intersection to the union becomes a small number. + */ + @Test + public void checkDissimilarity1() { // tuple, tuple + int minK = 1 << 12; + int u1 = 1 << 20; + int u2 = (int) (u1 * 0.05); + double threshold = 0.061; + println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build(); + final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build(); + + for (int i = 0; i < u1; i++) { + expected.update(i, constSummary); + } + + for (int i = 0; i < u2; i++) { + measured.update(i, constSummary); + } + + double[] jResults = jaccard(measured, expected, dsso); + boolean state = dissimilarityTest(measured, expected, dsso, threshold); + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + } + + /** + * Enable printing on this test and you will see that the distribution is much looser, + * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of + * intersection to the union becomes a small number. + */ + @Test + public void checkDissimilarity2() { // tuple, theta + int minK = 1 << 12; + int u1 = 1 << 20; + int u2 = (int) (u1 * 0.05); + double threshold = 0.061; + println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); + + final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build(); + final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build(); + + for (int i = 0; i < u1; i++) { + expected.update(i); + } + + for (int i = 0; i < u2; i++) { + measured.update(i, constSummary); + } + + double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); + boolean state = dissimilarityTest(measured, expected, factory.newSummary(), dsso, threshold); + println(state + "\t" + jaccardString(jResults)); + assertTrue(state); + } + + private static String jaccardString(double[] jResults) { + double lb = jResults[0]; + double est = jResults[1]; + double ub = jResults[2]; + return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0); + } + + @Test + public void checkMinK1() { // tuple, tuple + final UpdatableSketch skA = tupleBldr.build(); //4096 + final UpdatableSketch skB = tupleBldr.build(); //4096 + skA.update(1, constSummary); + skB.update(1, constSummary); + double[] result = jaccard(skA, skB, dsso); + println(result[0] + ", " + result[1] + ", " + result[2]); + for (int i = 1; i < 4096; i++) { + skA.update(i, constSummary); + skB.update(i, constSummary); + } + result = jaccard(skA, skB, dsso); + println(result[0] + ", " + result[1] + ", " + result[2]); + } + + @Test + public void checkMinK2() { // tuple, theta + final UpdatableSketch skA = tupleBldr.build(); //4096 + final UpdateSketch skB = UpdateSketch.builder().build(); //4096 + skA.update(1, constSummary); + skB.update(1); + double[] result = jaccard(skA, skB, factory.newSummary(), dsso); + println(result[0] + ", " + result[1] + ", " + result[2]); + for (int i = 1; i < 4096; i++) { + skA.update(i, constSummary); + skB.update(i); + } + result = jaccard(skA, skB, factory.newSummary(), dsso); + println(result[0] + ", " + result[1] + ", " + result[2]); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(Object o) { + //System.out.println(o.toString()); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/MiscTest.java b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java new file mode 100644 index 000000000..fb353d6a9 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.tuple2.adouble.DoubleSummary; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class MiscTest { + + @Test + public void checkUpdatableSketchBuilderReset() { + final DoubleSummary.Mode mode = Mode.Sum; + final UpdatableSketchBuilder bldr = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)); + bldr.reset(); + final UpdatableSketch sk = bldr.build(); + assertTrue(sk.isEmpty()); + } + + @Test + public void checkStringToByteArray() { + Util.stringToByteArray(""); + } + + @Test + public void checkDoubleToLongArray() { + final long[] v = Util.doubleToLongArray(-0.0); + assertEquals(v[0], 0); + } + + //@Test + public void checkById() { + final int[] ids = {0,1,2, 5, 6 }; + final int len = ids.length; + for (int i = 0; i < len; i++) { + for (int j = 0; j < len; j++) { + final int id = ids[i] << 3 | ids[j]; + final CornerCase cCase = CornerCase.caseIdToCornerCase(id); + final String interResStr = cCase.getIntersectAction().getActionDescription(); + final String anotbResStr = cCase.getAnotbAction().getActionDescription(); + println(Integer.toOctalString(id) + "\t" + cCase + "\t" + cCase.getCaseDescription() + + "\t" + interResStr + "\t" + anotbResStr); + } + } + } + + @Test + public void checkCopyCtor() { + final DoubleSummary.Mode mode = Mode.Sum; + final UpdatableSketchBuilder bldr = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)); + bldr.reset(); + final UpdatableSketch sk = bldr.build(); + sk.update(1.0, 1.0); + assertEquals(sk.getRetainedEntries(), 1); + final UpdatableSketch sk2 = sk.copy(); + assertEquals(sk2.getRetainedEntries(), 1); + } + + + /** + * + * @param o object to print + */ + private static void println(final Object o) { + //System.out.println(o.toString()); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java new file mode 100644 index 000000000..eb45779c6 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ReadOnlyMemoryTest { + + @Test + public void wrapAndTryUpdatingSketch() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1}); + final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) + ArrayOfDoublesSketches.wrapSketch(Memory.wrap(sketch1.toByteArray())); + Assert.assertEquals(sketch2.getEstimate(), 1.0); + sketch2.toByteArray(); + boolean thrown = false; + try { + sketch2.update(2, new double[] {1}); + } catch (final SketchesReadOnlyException e) { + thrown = true; + } + try { + sketch2.trim(); + } catch (final SketchesReadOnlyException e) { + thrown = true; + } + Assert.assertTrue(thrown); + } + + @Test + public void heapifyAndUpdateSketch() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1}); + // downcasting is not recommended, for testing only + final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) + ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); + sketch2.update(2, new double[] {1}); + Assert.assertEquals(sketch2.getEstimate(), 2.0); + } + + @Test + public void wrapAndTryUpdatingUnionEstimationMode() { + final int numUniques = 10000; + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < numUniques; i++) { + sketch1.update(key++, new double[] {1}); + } + final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union1.union(sketch1); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesSketch resultSketch = union2.getResult(); + Assert.assertTrue(resultSketch.isEstimationMode()); + Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); + + // make sure union update actually needs to modify the union + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < numUniques; i++) { + sketch2.update(key++, new double[] {1}); + } + + boolean thrown = false; + try { + union2.union(sketch2); + } catch (final SketchesReadOnlyException e) { + thrown = true; + } + Assert.assertTrue(thrown); + } + + @Test + public void heapifyAndUpdateUnion() { + final int numUniques = 10000; + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < numUniques; i++) { + sketch1.update(key++, new double[] {1}); + } + final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union1.union(sketch1); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesSketch resultSketch = union2.getResult(); + Assert.assertTrue(resultSketch.isEstimationMode()); + Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); + + // make sure union update actually needs to modify the union + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < numUniques; i++) { + sketch2.update(key++, new double[] {1}); + } + union2.union(sketch2); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java new file mode 100644 index 000000000..65311b17c --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SerializerDeserializerTest { + + @Test + public void validSketchType() { + byte[] bytes = new byte[4]; + bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal(); + Assert.assertEquals(SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)), SerializerDeserializer.SketchType.CompactSketch); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void invalidSketchType() { + byte[] bytes = new byte[4]; + bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = 33; + SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void validateFamilyNotTuple() { + SerializerDeserializer.validateFamily((byte) 1, (byte) 0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void validateFamilyWrongPreambleLength() { + SerializerDeserializer.validateFamily((byte) Family.TUPLE.getID(), (byte) 0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkBadSeedHash() { + org.apache.datasketches.tuple.Util.computeSeedHash(50541); + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java new file mode 100644 index 000000000..196c82510 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_HISTORICAL_FILES; +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; +import static org.apache.datasketches.common.TestUtil.javaPath; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; +import java.io.IOException; +import java.nio.file.Files; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.TestUtil; +import org.apache.datasketches.tuple2.adouble.DoubleSummary; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryDeserializer; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUnion; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class TupleCrossLanguageTest { + + @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) + public void serialVersion1Compatibility() { + final byte[] byteArr = TestUtil.getResourceBytes("CompactSketchWithDoubleSummary4K_serialVersion1.sk"); + Sketch sketch = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new DoubleSummaryDeserializer()); + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99); + Assert.assertEquals(sketch.getRetainedEntries(), 4096); + int count = 0; + TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 1.0); + count++; + } + Assert.assertEquals(count, 4096); + } + + @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) + public void version2Compatibility() { + final byte[] byteArr = TestUtil.getResourceBytes("TupleWithTestIntegerSummary4kTrimmedSerVer2.sk"); + Sketch sketch1 = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new IntegerSummaryDeserializer()); + + // construct the same way + final int lgK = 12; + final int K = 1 << lgK; + final UpdatableSketchBuilder builder = + new UpdatableSketchBuilder<>(new IntegerSummaryFactory()); + final UpdatableSketch updatableSketch = builder.build(); + for (int i = 0; i < 2 * K; i++) { + updatableSketch.update(i, 1); + } + updatableSketch.trim(); + Sketch sketch2 = updatableSketch.compact(); + + Assert.assertEquals(sketch1.getRetainedEntries(), sketch2.getRetainedEntries()); + Assert.assertEquals(sketch1.getThetaLong(), sketch2.getThetaLong()); + Assert.assertEquals(sketch1.isEmpty(), sketch2.isEmpty()); + Assert.assertEquals(sketch1.isEstimationMode(), sketch2.isEstimationMode()); + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppIntegerSummary() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n: nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("tuple_int_n" + n + "_cpp.sk")); + final Sketch sketch = + Sketches.heapifySketch(MemorySegment.ofArray(bytes), new IntegerSummaryDeserializer()); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertTrue(n > 1000 ? sketch.isEstimationMode() : !sketch.isEstimationMode()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getHash() < sketch.getThetaLong()); + assertTrue(it.getSummary().getValue() < n); + } + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateForCppIntegerSummary() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n: nArr) { + final UpdatableSketch sk = + new UpdatableSketchBuilder<>(new IntegerSummaryFactory()).build(); + for (int i = 0; i < n; i++) { + sk.update(i, i); + } + Files.newOutputStream(javaPath.resolve("tuple_int_n" + n + "_java.sk")).write(sk.compact().toByteArray()); + } + } + + @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES}) + public void noSupportHeapifyV0_9_1() throws Exception { + final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk"); + ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(byteArr)); + } + + @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES}) + public void noSupportWrapV0_9_1() throws Exception { + final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk"); + ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(byteArr)); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java b/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java new file mode 100644 index 000000000..8cc8b7975 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + + import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations; +import org.testng.annotations.Test; + + /** + * Tests for Version 2.0.0 + * @author Lee Rhodes + */ + public class TupleExamples2Test { + private final DoubleSummary.Mode umode = Mode.Sum; + private final DoubleSummary.Mode imode = Mode.AlwaysOne; + private final DoubleSummarySetOperations dsso0 = new DoubleSummarySetOperations(); + private final DoubleSummarySetOperations dsso1 = new DoubleSummarySetOperations(umode); + private final DoubleSummarySetOperations dsso2 = new DoubleSummarySetOperations(umode, imode); + private final DoubleSummaryFactory ufactory = new DoubleSummaryFactory(umode); + private final DoubleSummaryFactory ifactory = new DoubleSummaryFactory(imode); + private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); + private final UpdatableSketchBuilder tupleBldr = + new UpdatableSketchBuilder<>(ufactory); + + + @Test + public void example1() { // stateful: tuple, theta, use dsso2 + //Load source sketches + final UpdatableSketch tupleSk = tupleBldr.build(); + final UpdateSketch thetaSk = thetaBldr.build(); + for (int i = 1; i <= 12; i++) { + tupleSk.update(i, 1.0); + thetaSk.update(i + 3); + } + + //Union + final Union union = new Union<>(dsso2); + union.union(tupleSk); + union.union(thetaSk, ufactory.newSummary().update(1.0)); + final CompactSketch ucsk = union.getResult(); + int entries = ucsk.getRetainedEntries(); + println("Union Stateful: tuple, theta: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = (int)uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection + final Intersection inter = new Intersection<>(dsso2); + inter.intersect(tupleSk); + inter.intersect(thetaSk, ifactory.newSummary().update(1.0)); + final CompactSketch icsk = inter.getResult(); + entries = icsk.getRetainedEntries(); + println("Intersection Stateful: tuple, theta: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = (int)iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 1 + assertEquals(i, 1); + } + } + + @Test + public void example2() { //stateless: tuple1, tuple2, use dsso2 + //Load source sketches + final UpdatableSketch tupleSk1 = tupleBldr.build(); + final UpdatableSketch tupleSk2 = tupleBldr.build(); + + for (int i = 1; i <= 12; i++) { + tupleSk1.update(i, 1.0); + tupleSk2.update(i + 3, 1.0); + } + + //Union + final Union union = new Union<>(dsso2); + final CompactSketch ucsk = union.union(tupleSk1, tupleSk2); + int entries = ucsk.getRetainedEntries(); + println("Union: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = (int)uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection + final Intersection inter = new Intersection<>(dsso2); + final CompactSketch icsk = inter.intersect(tupleSk1, tupleSk2); + entries = icsk.getRetainedEntries(); + println("Intersection: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = (int)iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2 + assertEquals(i, 1); + } + } + + @Test + public void example3() { //stateless: tuple1, tuple2, use dsso2 + //Load source sketches + final UpdatableSketch tupleSk = tupleBldr.build(); + final UpdateSketch thetaSk = thetaBldr.build(); + for (int i = 1; i <= 12; i++) { + tupleSk.update(i, 1.0); + thetaSk.update(i + 3); + } + + //Union + final Union union = new Union<>(dsso2); + final CompactSketch ucsk = + union.union(tupleSk, thetaSk, ufactory.newSummary().update(1.0)); + int entries = ucsk.getRetainedEntries(); + println("Union: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = (int)uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection + final Intersection inter = new Intersection<>(dsso2); + final CompactSketch icsk = + inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1.0)); + entries = icsk.getRetainedEntries(); + println("Intersection: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = (int)iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2 + assertEquals(i, 1); + } + } + + @Test + public void example4() { //stateful: tuple, theta, Mode=sum for both, use dsso0 + //Load source sketches + final UpdatableSketch tupleSk = tupleBldr.build(); + final UpdateSketch thetaSk = thetaBldr.build(); + for (int i = 1; i <= 12; i++) { + tupleSk.update(i, 1.0); + thetaSk.update(i + 3); + } + + //Union + final Union union = new Union<>(dsso0); + union.union(tupleSk); + union.union(thetaSk, ufactory.newSummary().update(1.0)); + final CompactSketch ucsk = union.getResult(); + int entries = ucsk.getRetainedEntries(); + println("Union Stateful: tuple, theta: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = (int)uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection + final Intersection inter = new Intersection<>(dsso0); + inter.intersect(tupleSk); + inter.intersect(thetaSk, ifactory.newSummary().update(1.0)); + final CompactSketch icsk = inter.getResult(); + entries = icsk.getRetainedEntries(); + println("Intersection Stateful: tuple, theta: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = (int)iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 1 + assertEquals(i, 2); + } + } + + @Test + public void example5() { //stateful, tuple, theta, Mode=sum for both, use dsso1 + //Load source sketches + final UpdatableSketch tupleSk = tupleBldr.build(); + final UpdateSketch thetaSk = thetaBldr.build(); + for (int i = 1; i <= 12; i++) { + tupleSk.update(i, 1.0); + thetaSk.update(i + 3); + } + + //Union + final Union union = new Union<>(dsso1); + union.union(tupleSk); + union.union(thetaSk, ufactory.newSummary().update(1.0)); + final CompactSketch ucsk = union.getResult(); + int entries = ucsk.getRetainedEntries(); + println("Union Stateful: tuple, theta: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = (int)uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection + final Intersection inter = new Intersection<>(dsso1); + inter.intersect(tupleSk); + inter.intersect(thetaSk, ifactory.newSummary().update(1.0)); + final CompactSketch icsk = inter.getResult(); + entries = icsk.getRetainedEntries(); + println("Intersection Stateful: tuple, theta: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = (int)iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 1 + assertEquals(i, 2); + } + } + + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //enable/disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java b/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java new file mode 100644 index 000000000..a1409ac37 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2; + +import static org.testng.Assert.assertEquals; + +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.tuple2.aninteger.IntegerSummary; +import org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode; +import org.apache.datasketches.tuple2.aninteger.IntegerSummaryFactory; +import org.apache.datasketches.tuple2.aninteger.IntegerSummarySetOperations; +import org.testng.annotations.Test; + +/** + * Tests for Version 2.0.0 + * @author Lee Rhodes + */ +public class TupleExamplesTest { + private final IntegerSummary.Mode umode = Mode.Sum; + private final IntegerSummary.Mode imode = Mode.AlwaysOne; + private final IntegerSummarySetOperations isso = new IntegerSummarySetOperations(umode, imode); + private final IntegerSummaryFactory ufactory = new IntegerSummaryFactory(umode); + private final IntegerSummaryFactory ifactory = new IntegerSummaryFactory(imode); + private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); + private final UpdatableSketchBuilder tupleBldr = + new UpdatableSketchBuilder<>(ufactory); + + + @Test + public void example1() { + //Load source sketches + final UpdatableSketch tupleSk = tupleBldr.build(); + final UpdateSketch thetaSk = thetaBldr.build(); + for (int i = 1; i <= 12; i++) { + tupleSk.update(i, 1); + thetaSk.update(i + 3); + } + + //Union stateful: tuple, theta + final Union union = new Union<>(isso); + union.union(tupleSk); + union.union(thetaSk, ufactory.newSummary().update(1)); + final CompactSketch ucsk = union.getResult(); + int entries = ucsk.getRetainedEntries(); + println("Union Stateful: tuple, theta: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection stateful: tuple, theta + final Intersection inter = new Intersection<>(isso); + inter.intersect(tupleSk); + inter.intersect(thetaSk, ifactory.newSummary().update(1)); + final CompactSketch icsk = inter.getResult(); + entries = icsk.getRetainedEntries(); + println("Intersection Stateful: tuple, theta: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 1 + assertEquals(i, 1); + } + } + + @Test + public void example2() { + //Load source sketches + final UpdatableSketch tupleSk1 = tupleBldr.build(); + final UpdatableSketch tupleSk2 = tupleBldr.build(); + + for (int i = 1; i <= 12; i++) { + tupleSk1.update(i, 1); + tupleSk2.update(i + 3, 1); + } + + //Union, stateless: tuple1, tuple2 + final Union union = new Union<>(isso); + final CompactSketch ucsk = union.union(tupleSk1, tupleSk2); + int entries = ucsk.getRetainedEntries(); + println("Union: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection stateless: tuple1, tuple2 + final Intersection inter = new Intersection<>(isso); + final CompactSketch icsk = inter.intersect(tupleSk1, tupleSk2); + entries = icsk.getRetainedEntries(); + println("Intersection: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2 + assertEquals(i, 1); + } + } + + @Test + public void example3() { + //Load source sketches + final UpdatableSketch tupleSk = tupleBldr.build(); + final UpdateSketch thetaSk = thetaBldr.build(); + for (int i = 1; i <= 12; i++) { + tupleSk.update(i, 1); + thetaSk.update(i + 3); + } + + //Union, stateless: tuple1, tuple2 + final Union union = new Union<>(isso); + final CompactSketch ucsk = + union.union(tupleSk, thetaSk, ufactory.newSummary().update(1)); + int entries = ucsk.getRetainedEntries(); + println("Union: " + entries); + final TupleSketchIterator uiter = ucsk.iterator(); + int counter = 1; + int twos = 0; + int ones = 0; + while (uiter.next()) { + final int i = uiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 + if (i == 1) { ones++; } + if (i == 2) { twos++; } + } + assertEquals(ones, 6); + assertEquals(twos, 9); + + //Intersection stateless: tuple1, tuple2 + final Intersection inter = new Intersection<>(isso); + final CompactSketch icsk = + inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1)); + entries = icsk.getRetainedEntries(); + println("Intersection: " + entries); + final TupleSketchIterator iiter = icsk.iterator(); + counter = 1; + while (iiter.next()) { + final int i = iiter.getSummary().getValue(); + println(counter++ + ", " + i); //9 entries = 2 + assertEquals(i, 1); + } + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //enable/disable here + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java new file mode 100644 index 000000000..8951c528c --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.tuple2.AnotB; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.UpdatableSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class AdoubleAnotBTest { + private static final DoubleSummary.Mode mode = Mode.Sum; + private final Results results = new Results(); + + private static void threeMethodsWithTheta( + final AnotB aNotB, + final Sketch skA, + final Sketch skB, + final org.apache.datasketches.theta2.Sketch skThetaB, + final Results results) + { + CompactSketch result; + + //Stateful, A = Tuple, B = Tuple + if (skA != null) { + try { + aNotB.setA(skA); + aNotB.notB(skB); + result = aNotB.getResult(true); + results.check(result); + } + catch (final SketchesArgumentException e) { } + } + + //Stateless A = Tuple, B = Tuple + if (skA == null || skB == null) { + try { + result = AnotB.aNotB(skA, skB); + fail(); + } + catch (final SketchesArgumentException e) { } + } else { + result = AnotB.aNotB(skA, skB); + results.check(result); + } + + //Stateless A = Tuple, B = Theta + if (skA == null || skThetaB == null) { + try { result = AnotB.aNotB(skA, skThetaB); fail(); } + catch (final SketchesArgumentException e) { } + } else { + result = AnotB.aNotB(skA, skThetaB); + results.check(result); + } + + //Stateful A = Tuple, B = Tuple + if (skA == null) { + try { aNotB.setA(skA); fail(); } + catch (final SketchesArgumentException e) { } + } else { + aNotB.setA(skA); + aNotB.notB(skB); + result = aNotB.getResult(true); + results.check(result); + } + + //Stateful A = Tuple, B = Theta + if (skA == null) { + try { aNotB.setA(skA); fail(); } + catch (final SketchesArgumentException e) { } + } else { + aNotB.setA(skA); + aNotB.notB(skThetaB); + result = aNotB.getResult(false); + results.check(result); + result = aNotB.getResult(true); + results.check(result); + } + } + + private static class Results { + private int retEnt = 0; + private boolean empty = true; + private double expect = 0.0; + private double tol = 0.0; + private double sum = 0.0; + + Results() {} + + Results set(final int retEnt, final boolean empty, + final double expect, final double tol, final double sum) { + this.retEnt = retEnt; //retained Entries + this.empty = empty; + this.expect = expect; //expected estimate + this.tol = tol; //tolerance + this.sum = sum; + return this; + } + + void check(final CompactSketch result) { + assertEquals(result.getRetainedEntries(), retEnt); + assertEquals(result.isEmpty(), empty); + if (result.getTheta() < 1.0) { + final double est = result.getEstimate(); + assertEquals(est, expect, expect * tol); + assertTrue(result.getUpperBound(1) > est); + assertTrue(result.getLowerBound(1) <= est); + } else { + assertEquals(result.getEstimate(), expect, 0.0); + assertEquals(result.getUpperBound(1), expect, 0.0); + assertEquals(result.getLowerBound(1), expect, 0.0); + } + final TupleSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), sum); + } + } + } //End class Results + + private static UpdatableSketch buildUpdatableTuple() { + return new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + } + + private static UpdateSketch buildUpdateTheta() { + return new UpdateSketchBuilder().build(); + } + + /*****************************************/ + + @Test + public void aNotBNullEmptyCombinations() { + final AnotB aNotB = new AnotB<>(); + // calling getResult() before calling update() should yield an empty set + final CompactSketch result = aNotB.getResult(true); + results.set(0, true, 0.0, 0.0, 0.0).check(result); + + final UpdatableSketch sketch = buildUpdatableTuple(); + final UpdateSketch skTheta = buildUpdateTheta(); + + threeMethodsWithTheta(aNotB, null, null, null, results); + threeMethodsWithTheta(aNotB, sketch, null, null, results); + threeMethodsWithTheta(aNotB, null, sketch, null, results); + threeMethodsWithTheta(aNotB, sketch, sketch, null, results); + threeMethodsWithTheta(aNotB, null, null, skTheta, results); + threeMethodsWithTheta(aNotB, sketch, null, skTheta, results); + threeMethodsWithTheta(aNotB, null, sketch, skTheta, results); + threeMethodsWithTheta(aNotB, sketch, sketch, skTheta, results); + } + + @Test + public void aNotBCheckDoubleSetAs() { + final UpdatableSketch skA = buildUpdatableTuple(); + skA.update(1, 1.0); + skA.update(2, 1.0); + final UpdatableSketch skA2 = buildUpdatableTuple(); + final AnotB aNotB = new AnotB<>(); + aNotB.setA(skA); + assertEquals(aNotB.getResult(false).isEmpty(), false); + aNotB.setA(skA2); + assertEquals(aNotB.getResult(false).isEmpty(), true); + } + + @Test + public void aNotBEmptyExact() { + final UpdatableSketch sketchA = buildUpdatableTuple(); + final UpdatableSketch sketchB = buildUpdatableTuple(); + sketchB.update(1, 1.0); + sketchB.update(2, 1.0); + final UpdateSketch skThetaB = buildUpdateTheta(); + skThetaB.update(1); + skThetaB.update(2); + + final AnotB aNotB = new AnotB<>(); + results.set(0, true, 0.0, 0.0, 0.0); + threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); + } + + @Test + public void aNotBExactEmpty() { + final UpdatableSketch sketchA = buildUpdatableTuple(); + sketchA.update(1, 1.0); + sketchA.update(2, 1.0); + final UpdatableSketch sketchB = buildUpdatableTuple(); + final UpdateSketch skThetaB = buildUpdateTheta(); + + final AnotB aNotB = new AnotB<>(); + results.set(2, false, 2.0, 0.0, 1.0); + threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); + + // same thing, but compact sketches + threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results); + } + + @Test + public void aNotBExactOverlap() { + final UpdatableSketch sketchA = buildUpdatableTuple(); + sketchA.update(1, 1.0); + sketchA.update(1, 1.0); + sketchA.update(2, 1.0); + sketchA.update(2, 1.0); + + final UpdatableSketch sketchB = buildUpdatableTuple(); + sketchB.update(2, 1.0); + sketchB.update(2, 1.0); + sketchB.update(3, 1.0); + sketchB.update(3, 1.0); + + final UpdateSketch skThetaB = buildUpdateTheta(); + skThetaB.update(2); + skThetaB.update(3); + + final AnotB aNotB = new AnotB<>(); + results.set(1, false, 1.0, 0.0, 2.0); + threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); + } + + @Test + public void aNotBEstimationOverlap() { + final UpdatableSketch sketchA = buildUpdatableTuple(); + for (int i = 0; i < 8192; i++) { + sketchA.update(i, 1.0); + } + + final UpdatableSketch sketchB = buildUpdatableTuple(); + for (int i = 0; i < 4096; i++) { + sketchB.update(i, 1.0); + } + + final UpdateSketch skThetaB = buildUpdateTheta(); + for (int i = 0; i < 4096; i++) { + skThetaB.update(i); + } + + final AnotB aNotB = new AnotB<>(); + results.set(2123, false, 4096.0, 0.03, 1.0); + threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); + + // same thing, but compact sketches + threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results); + } + + @Test + public void aNotBEstimationOverlapLargeB() { + final UpdatableSketch sketchA = buildUpdatableTuple(); + for (int i = 0; i < 10_000; i++) { + sketchA.update(i, 1.0); + } + + final UpdatableSketch sketchB = buildUpdatableTuple(); + for (int i = 0; i < 100_000; i++) { + sketchB.update(i + 8000, 1.0); + } + + final UpdateSketch skThetaB = buildUpdateTheta(); + for (int i = 0; i < 100_000; i++) { + skThetaB.update(i + 8000); + } + + final int expected = 8_000; + final AnotB aNotB = new AnotB<>(); + results.set(376, false, expected, 0.1, 1.0); + threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); + + // same thing, but compact sketches + threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java new file mode 100644 index 000000000..1a85334fc --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.Intersection; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.Sketches; +import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.UpdatableSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class AdoubleIntersectionTest { + private final DoubleSummary.Mode mode = Mode.Sum; + + @Test + public void intersectionNotEmptyNoEntries() { + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<> + (new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build(); + sketch1.update("a", 1.0); // this happens to get rejected because of sampling with low probability + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + intersection.intersect(sketch1); + final CompactSketch result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0, 0.0001); + Assert.assertTrue(result.getUpperBound(1) > 0); + } + + @Test + public void intersectionExactWithEmpty() { + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch1.update(1, 1.0); + sketch1.update(2, 1.0); + sketch1.update(3, 1.0); + + final Sketch sketch2 = Sketches.createEmptySketch(); + + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + intersection.intersect(sketch1); + intersection.intersect(sketch2); + final CompactSketch result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + + @Test + public void intersectionExactMode() { + UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch1.update(1, 1.0); + sketch1.update(1, 1.0); + sketch1.update(2, 1.0); + sketch1.update(2, 1.0); + + final UpdatableSketch sketch2 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch2.update(2, 1.0); + sketch2.update(2, 1.0); + sketch2.update(3, 1.0); + sketch2.update(3, 1.0); + + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + intersection.intersect(sketch1); + intersection.intersect(sketch2); + final CompactSketch result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 1); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 1.0); + Assert.assertEquals(result.getLowerBound(1), 1.0); + Assert.assertEquals(result.getUpperBound(1), 1.0); + final TupleSketchIterator it = result.iterator(); + Assert.assertTrue(it.next()); + Assert.assertTrue(it.getHash() > 0); + Assert.assertEquals(it.getSummary().getValue(), 4.0); + Assert.assertFalse(it.next()); + + intersection.reset(); + sketch1 = null; + try { intersection.intersect(sketch1); fail();} + catch (final SketchesArgumentException e) { } + +} + + @Test + public void intersectionDisjointEstimationMode() { + int key = 0; + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, 1.0); + } + + final UpdatableSketch sketch2 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, 1.0); + } + + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + intersection.intersect(sketch1); + intersection.intersect(sketch2); + CompactSketch result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertTrue(result.getUpperBound(1) > 0); + + // an intersection with no entries must survive more updates + intersection.intersect(sketch1); + result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertTrue(result.getUpperBound(1) > 0); + } + + @Test + public void intersectionEstimationMode() { + int key = 0; + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, 1.0); + } + + key -= 4096; // overlap half of the entries + final UpdatableSketch sketch2 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, 1.0); + } + + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + intersection.intersect(sketch1); + intersection.intersect(sketch2); + final CompactSketch result = intersection.getResult(); + Assert.assertFalse(result.isEmpty()); + // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + final TupleSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 2.0); + } + } + + @Test + public void checkExactIntersectionWithTheta() { + final UpdateSketch thSkNull = null; + final UpdateSketch thSkEmpty = new UpdateSketchBuilder().build(); + final UpdateSketch thSk10 = new UpdateSketchBuilder().build(); + final UpdateSketch thSk15 = new UpdateSketchBuilder().build(); + for (int i = 0; i < 10; i++) { thSk10.update(i); } + for (int i = 0; i < 10; i++) { thSk15.update(i + 5); } //overlap = 5 + + DoubleSummary dsum = new DoubleSummaryFactory(mode).newSummary(); + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + CompactSketch result; + + try { intersection.getResult(); fail(); } + catch (final SketchesStateException e ) { } //OK. + + try { intersection.intersect(thSkNull, dsum); fail(); } + catch (final SketchesArgumentException e) { } //OK + + intersection.intersect(thSkEmpty, dsum); + result = intersection.getResult(); + Assert.assertTrue(result.isEmpty()); //Empty after empty first call + intersection.reset(); + + intersection.intersect(thSk10, dsum); + result = intersection.getResult(); + Assert.assertEquals(result.getEstimate(), 10.0); //Returns valid first call + intersection.reset(); + + intersection.intersect(thSk10, dsum); // Valid first call + intersection.intersect(thSkEmpty, dsum); + result = intersection.getResult(); + Assert.assertTrue(result.isEmpty()); //Returns Empty after empty second call + intersection.reset(); + + intersection.intersect(thSk10, dsum); + intersection.intersect(thSk15, dsum); + result = intersection.getResult(); + Assert.assertEquals(result.getEstimate(), 5.0); //Returns intersection + intersection.reset(); + + dsum = null; + try { intersection.intersect(thSk10, dsum); fail(); } + catch (final SketchesArgumentException e) { } + } + + @Test + public void checkExactIntersectionWithThetaDisjoint() { + final UpdateSketch thSkA = new UpdateSketchBuilder().setLogNominalEntries(10).build(); + final UpdateSketch thSkB = new UpdateSketchBuilder().setLogNominalEntries(10).build(); + int key = 0; + for (int i = 0; i < 32; i++) { thSkA.update(key++); } + for (int i = 0; i < 32; i++) { thSkB.update(key++); } + + final DoubleSummary dsum = new DoubleSummaryFactory(mode).newSummary(); + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + CompactSketch result; + + intersection.intersect(thSkA, dsum); + intersection.intersect(thSkB, dsum); + result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + + // an intersection with no entries must survive more updates + intersection.intersect(thSkA, dsum); + result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + intersection.reset(); + } + + @Test + public void checkEstimatingIntersectionWithThetaOverlapping() { + final UpdateSketch thSkA = new UpdateSketchBuilder().setLogNominalEntries(4).build(); + final UpdateSketch thSkB = new UpdateSketchBuilder().setLogNominalEntries(10).build(); + for (int i = 0; i < 64; i++) { thSkA.update(i); } //dense mode, low theta + for (int i = 32; i < 96; i++) { thSkB.update(i); } //exact overlapping + + final DoubleSummary dsum = new DoubleSummaryFactory(mode).newSummary(); + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + CompactSketch result; + + intersection.intersect(thSkA, dsum); + intersection.intersect(thSkB, dsum); + result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 14); + + thSkB.reset(); + for (int i = 100; i < 164; i++) { thSkB.update(i); } //exact, disjoint + intersection.intersect(thSkB, dsum); //remove existing entries + result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + intersection.intersect(thSkB, dsum); + result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + } + + @Test + public void intersectionEmpty() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + final Intersection intersection = + new Intersection<>(new DoubleSummarySetOperations(mode, mode)); + intersection.intersect(sketch); + final CompactSketch result = intersection.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java new file mode 100644 index 000000000..d3f747f1f --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java @@ -0,0 +1,421 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import static org.testng.Assert.assertEquals; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.Sketches; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.UpdatableSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class AdoubleTest { + private final DoubleSummary.Mode mode = Mode.Sum; + + @Test + public void isEmpty() { + final int lgK = 12; + final DoubleSketch sketch = new DoubleSketch(lgK, mode); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertNotNull(sketch.toString()); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertNotNull(it); + Assert.assertFalse(it.next()); + } + + @Test + public void checkLowK() { + final UpdatableSketchBuilder bldr = new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(Mode.Sum)); + bldr.setNominalEntries(16); + final UpdatableSketch sk = bldr.build(); + assertEquals(sk.getLgK(), 4); + } + + @SuppressWarnings("deprecation") + @Test + public void serDeTest() { + final int lgK = 12; + final int K = 1 << lgK; + final DoubleSketch a1Sk = new DoubleSketch(lgK, Mode.AlwaysOne); + final int m = 2 * K; + for (int key = 0; key < m; key++) { + a1Sk.update(key, 1.0); + } + final double est1 = a1Sk.getEstimate(); + final MemorySegment seg = MemorySegment.ofArray(a1Sk.toByteArray()); + final DoubleSketch a1Sk2 = new DoubleSketch(seg, Mode.AlwaysOne); + final double est2 = a1Sk2.getEstimate(); + assertEquals(est1, est2); + } + + @Test + public void checkStringKey() { + final int lgK = 12; + final int K = 1 << lgK; + final DoubleSketch a1Sk1 = new DoubleSketch(lgK, Mode.AlwaysOne); + final int m = K / 2; + for (int key = 0; key < m; key++) { + a1Sk1.update(Integer.toHexString(key), 1.0); + } + assertEquals(a1Sk1.getEstimate(), K / 2.0); + } + + + @Test + public void isEmptyWithSampling() { + final float samplingProbability = 0.1f; + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)) + .setSamplingProbability(samplingProbability).build(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + } + + @Test + public void sampling() { + final float samplingProbability = 0.001f; + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)).setSamplingProbability(samplingProbability).build(); + sketch.update("a", 1.0); + Assert.assertFalse(sketch.isEmpty()); + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertTrue(sketch.getUpperBound(1) > 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); + Assert.assertEquals((float)sketch.getTheta(), samplingProbability); + Assert.assertEquals((float)sketch.getTheta(), samplingProbability); + } + + @Test + public void exactMode() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)).build(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + for (int i = 1; i <= 4096; i++) { + sketch.update(i, 1.0); + } + Assert.assertFalse(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 4096.0); + Assert.assertEquals(sketch.getUpperBound(1), 4096.0); + Assert.assertEquals(sketch.getLowerBound(1), 4096.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + + int count = 0; + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 1.0); + count++; + } + Assert.assertEquals(count, 4096); + + sketch.reset(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + } + + @Test + // The moment of going into the estimation mode is, to some extent, an implementation detail + // Here we assume that presenting as many unique values as twice the nominal + // size of the sketch will result in estimation mode + public void estimationMode() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)).build(); + Assert.assertEquals(sketch.getEstimate(), 0.0); + for (int i = 1; i <= 8192; i++) { + sketch.update(i, 1.0); + } + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); + Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); + Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); + + int count = 0; + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 1.0); + count++; + } + Assert.assertTrue(count >= 4096); + + sketch.reset(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertEquals(sketch.getTheta(), 1.0); +} + + @Test + public void estimationModeWithSamplingNoResizing() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)) + .setSamplingProbability(0.5f) + .setResizeFactor(ResizeFactor.X1).build(); + for (int i = 0; i < 16384; i++) { + sketch.update(i, 1.0); + } + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 16384, 16384 * 0.01); + Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); + Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); + } + + @Test + public void updatesOfAllKeyTypes() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch.update(1L, 1.0); + sketch.update(2.0, 1.0); + final byte[] bytes = { 3, 3 }; + sketch.update(bytes, 1.0); + sketch.update(ByteBuffer.wrap(bytes), 1.0); // same as previous + sketch.update(ByteBuffer.wrap(bytes, 0, 1), 1.0); // slice of previous + final int[] ints = { 4 }; + sketch.update(ints, 1.0); + final long[] longs = { 5L }; + sketch.update(longs, 1.0); + sketch.update("a", 1.0); + Assert.assertEquals(sketch.getEstimate(), 7.0); + } + + @Test + public void doubleSummaryDefaultSumMode() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)).build(); + { + sketch.update(1, 1.0); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 1.0); + Assert.assertFalse(it.next()); + } + { + sketch.update(1, 0.7); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 1.7); + Assert.assertFalse(it.next()); + } + { + sketch.update(1, 0.8); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 2.5); + Assert.assertFalse(it.next()); + } + } + + @Test + public void doubleSummaryMinMode() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(DoubleSummary.Mode.Min)).build(); + { + sketch.update(1, 1.0); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 1.0); + Assert.assertFalse(it.next()); + } + { + sketch.update(1, 0.7); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 0.7); + Assert.assertFalse(it.next()); + } + { + sketch.update(1, 0.8); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 0.7); + Assert.assertFalse(it.next()); + } + } + @Test + + public void doubleSummaryMaxMode() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(DoubleSummary.Mode.Max)).build(); + { + sketch.update(1, 1.0); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 1.0); + Assert.assertFalse(it.next()); + } + { + sketch.update(1, 0.7); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 1.0); + Assert.assertFalse(it.next()); + } + { + sketch.update(1, 2.0); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + final TupleSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 2.0); + Assert.assertFalse(it.next()); + } + } + + @SuppressWarnings("deprecation") + @Test + public void serializeDeserializeExact() throws Exception { + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch1.update(1, 1.0); + + final UpdatableSketch sketch2 = Sketches.heapifyUpdatableSketch( + MemorySegment.ofArray(sketch1.toByteArray()), + new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); + + Assert.assertEquals(sketch2.getEstimate(), 1.0); + final TupleSketchIterator it = sketch2.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 1.0); + Assert.assertFalse(it.next()); + + // the same key, so still one unique + sketch2.update(1, 1.0); + Assert.assertEquals(sketch2.getEstimate(), 1.0); + + sketch2.update(2, 1.0); + Assert.assertEquals(sketch2.getEstimate(), 2.0); + } + + @SuppressWarnings("deprecation") + @Test + public void serializeDeserializeEstimationNoResizing() throws Exception { + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)).setResizeFactor(ResizeFactor.X1).build(); + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 8192; i++) { + sketch1.update(i, 1.0); + } + } + sketch1.trim(); + final byte[] bytes = sketch1.toByteArray(); + + //for binary testing + //TestUtil.writeBytesToFile(bytes, "UpdatableSketchWithDoubleSummary4K.sk"); + + final Sketch sketch2 = + Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer()); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); + Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); + final TupleSketchIterator it = sketch2.iterator(); + int count = 0; + while (it.next()) { + Assert.assertEquals(it.getSummary().getValue(), 10.0); + count++; + } + Assert.assertEquals(count, 4096); + } + + @SuppressWarnings("deprecation") + @Test + public void serializeDeserializeSampling() throws Exception { + final int sketchSize = 16384; + final int numberOfUniques = sketchSize; + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)) + .setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); + for (int i = 0; i < numberOfUniques; i++) { + sketch1.update(i, 1.0); + } + final Sketch sketch2 = Sketches.heapifySketch( + MemorySegment.ofArray(sketch1.toByteArray()), new DoubleSummaryDeserializer()); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); + Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); + Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); + } + + @Test + public void checkUpdatableSketch() { + final DoubleSummaryFactory dsumFact = new DoubleSummaryFactory(mode); + //DoubleSummary dsum = dsumFact.newSummary(); + final UpdatableSketchBuilder bldr = new UpdatableSketchBuilder<>(dsumFact); + final UpdatableSketch usk = bldr.build(); + final byte[] byteArr = new byte[0]; + usk.update(byteArr, 0.0); + final int[] intArr = new int[0]; + usk.update(intArr, 1.0); + final long[] longArr = new long[0]; + usk.update(longArr, 2.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void invalidSamplingProbability() { + new UpdatableSketchBuilder<> + (new DoubleSummaryFactory(mode)).setSamplingProbability(2f).build(); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java new file mode 100644 index 000000000..cfbd999af --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.Union; +import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.UpdatableSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class AdoubleUnionTest { + private final DoubleSummary.Mode mode = Mode.Sum; + + @Test + public void unionEmptySampling() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build(); + sketch.update(1, 1.0); + Assert.assertEquals(sketch.getRetainedEntries(), 0); // not retained due to low sampling probability + + final Union union = new Union<>(new DoubleSummarySetOperations(mode, mode)); + union.union(sketch); + final CompactSketch result = union.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertFalse(result.isEmpty()); + Assert.assertTrue(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + } + + @Test + public void unionExactMode() { + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch1.update(1, 1.0); + sketch1.update(1, 1.0); + sketch1.update(1, 1.0); + sketch1.update(2, 1.0); + + final UpdatableSketch sketch2 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + sketch2.update(2, 1.0); + sketch2.update(2, 1.0); + sketch2.update(3, 1.0); + sketch2.update(3, 1.0); + sketch2.update(3, 1.0); + + final Union union = new Union<>(new DoubleSummarySetOperations(mode, mode)); + union.union(sketch1); + union.union(sketch2); + CompactSketch result = union.getResult(); + Assert.assertEquals(result.getEstimate(), 3.0); + + final TupleSketchIterator it = result.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 3.0); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 3.0); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getSummary().getValue(), 3.0); + Assert.assertFalse(it.next()); + + union.reset(); + result = union.getResult(); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + } + + @Test + public void unionEstimationMode() { + int key = 0; + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, 1.0); + } + + key -= 4096; // overlap half of the entries + final UpdatableSketch sketch2 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, 1.0); + } + + final Union union = new Union<>(4096, new DoubleSummarySetOperations(mode, mode)); + union.union(sketch1); + union.union(sketch2); + final CompactSketch result = union.getResult(); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + } + + @Test + public void unionMixedMode() { + int key = 0; + final UpdatableSketch sketch1 = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + for (int i = 0; i < 1000; i++) { + sketch1.update(key++, 1.0); + //System.out.println("theta1=" + sketch1.getTheta() + " " + sketch1.getThetaLong()); + } + + key -= 500; // overlap half of the entries + final UpdatableSketch sketch2 = + new UpdatableSketchBuilder<> + (new DoubleSummaryFactory(mode)).setSamplingProbability(0.2f).build(); + for (int i = 0; i < 20000; i++) { + sketch2.update(key++, 1.0); + //System.out.println("theta2=" + sketch2.getTheta() + " " + sketch2.getThetaLong()); + } + + final Union union = new Union<>(4096, new DoubleSummarySetOperations(mode, mode)); + union.union(sketch1); + union.union(sketch2); + final CompactSketch result = union.getResult(); + Assert.assertEquals(result.getEstimate(), 20500.0, 20500 * 0.01); + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + } + + @Test + public void checkUnionUpdateWithTheta() { + final Union union = new Union<>(new DoubleSummarySetOperations(mode, mode)); + UpdateSketch usk = null; + DoubleSummary dsum = null; + + try { union.union(usk, dsum); fail(); } + catch (final SketchesArgumentException e) { } + + usk = new UpdateSketchBuilder().build(); + try { union.union(usk, dsum); fail(); } + catch (final SketchesArgumentException e) { } + + dsum = new DoubleSummaryFactory(mode).newSummary(); + for (int i = 0; i < 10; i++) { usk.update(i); } + union.union(usk, dsum); + Assert.assertEquals(union.getResult().getEstimate(), 10.0); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java new file mode 100644 index 000000000..b69ef74d3 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.adouble; + +import java.util.Random; + +import org.apache.datasketches.tuple2.Filter; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.Sketches; +import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.UpdatableSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class FilterTest { + private static final int numberOfElements = 100; + private static final Random random = new Random(1);//deterministic for this class + private final DoubleSummary.Mode mode = Mode.Sum; + + @Test + public void emptySketch() { + final Sketch sketch = Sketches.createEmptySketch(); + + final Filter filter = new Filter<>(o -> true); + + final Sketch filteredSketch = filter.filter(sketch); + + Assert.assertEquals(filteredSketch.getEstimate(), 0.0); + Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); + Assert.assertTrue(filteredSketch.isEmpty()); + Assert.assertEquals(filteredSketch.getLowerBound(1), 0.0); + Assert.assertEquals(filteredSketch.getUpperBound(1), 0.0); + } + + @Test + public void nullSketch() { + final Filter filter = new Filter<>(o -> true); + + final Sketch filteredSketch = filter.filter(null); + + Assert.assertEquals(filteredSketch.getEstimate(), 0.0); + Assert.assertEquals(filteredSketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertTrue(filteredSketch.isEmpty()); + Assert.assertEquals(filteredSketch.getLowerBound(1), 0.0); + Assert.assertEquals(filteredSketch.getUpperBound(1), 0.0); + } + + @Test + public void filledSketchShouldBehaveTheSame() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + + fillSketch(sketch, numberOfElements, 0.0); + + final Filter filter = new Filter<>(o -> true); + + final Sketch filteredSketch = filter.filter(sketch); + + Assert.assertEquals(filteredSketch.getEstimate(), sketch.getEstimate()); + Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); + Assert.assertFalse(filteredSketch.isEmpty()); + Assert.assertEquals(filteredSketch.getLowerBound(1), sketch.getLowerBound(1)); + Assert.assertEquals(filteredSketch.getUpperBound(1), sketch.getUpperBound(1)); + } + + @Test + public void filledSketchShouldFilterOutElements() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + + fillSketch(sketch, numberOfElements, 0.0); + fillSketch(sketch, 2 * numberOfElements, 1.0); + + final Filter filter = new Filter<>(o -> o.getValue() < 0.5); + + final Sketch filteredSketch = filter.filter(sketch); + + Assert.assertEquals(filteredSketch.getEstimate(), numberOfElements); + Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); + Assert.assertFalse(filteredSketch.isEmpty()); + Assert.assertTrue(filteredSketch.getLowerBound(1) <= filteredSketch.getEstimate()); + Assert.assertTrue(filteredSketch.getUpperBound(1) >= filteredSketch.getEstimate()); + } + + @Test + public void filteringInEstimationMode() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); + + final int n = 10000; + fillSketch(sketch, n, 0.0); + fillSketch(sketch, 2 * n, 1.0); + + final Filter filter = new Filter<>(o -> o.getValue() < 0.5); + + final Sketch filteredSketch = filter.filter(sketch); + + Assert.assertEquals(filteredSketch.getEstimate(), n, n * 0.05); + Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); + Assert.assertFalse(filteredSketch.isEmpty()); + Assert.assertTrue(filteredSketch.getLowerBound(1) <= filteredSketch.getEstimate()); + Assert.assertTrue(filteredSketch.getUpperBound(1) >= filteredSketch.getEstimate()); + } + + @Test + public void nonEmptySketchWithNoEntries() { + final UpdatableSketch sketch = + new UpdatableSketchBuilder<>( + new DoubleSummaryFactory(mode)).setSamplingProbability(0.0001f).build(); + sketch.update(0, 0.0); + + Assert.assertFalse(sketch.isEmpty()); + Assert.assertEquals(sketch.getRetainedEntries(), 0); + + final Filter filter = new Filter<>(o -> true); + + final Sketch filteredSketch = filter.filter(sketch); + + Assert.assertFalse(filteredSketch.isEmpty()); + Assert.assertEquals(filteredSketch.getEstimate(), sketch.getEstimate()); + Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); + Assert.assertEquals(filteredSketch.getLowerBound(1), sketch.getLowerBound(1)); + Assert.assertEquals(filteredSketch.getUpperBound(1), sketch.getUpperBound(1)); + } + + private static void fillSketch(final UpdatableSketch sketch, + final int numberOfElements, final Double sketchValue) { + + + for (int cont = 0; cont < numberOfElements; cont++) { + sketch.update(random.nextLong(), sketchValue); + } + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java new file mode 100644 index 000000000..fabb9d03d --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java @@ -0,0 +1,630 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import static org.apache.datasketches.hash.MurmurHash3.hash; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.AnotB; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.Intersection; +import org.apache.datasketches.tuple2.Union; +import org.testng.annotations.Test; + +public class CornerCaseTupleSetOperationsTest { + + /* Hash Values + * 9223372036854775807 Theta = 1.0 + * + * 6730918654704304314 hash(3L)[0] >>> 1 GT_MIDP + * 4611686018427387904 Theta for p = 0.5f = MIDP + * 2206043092153046979 hash(2L)[0] >>> 1 LT_MIDP_V + * 1498732507761423037 hash(5L)[0] >>> 1 LTLT_MIDP_V + * + * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V + * 922337217429372928 Theta for p = 0.1f = LOWP + * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V + * 405753591161026837 hash(1L)[0] >>> 1 LTLT_LOWP_V + */ + + private static final long GT_MIDP_V = 3L; + private static final float MIDP_FLT = 0.5f; + + private static final long GT_LOWP_V = 6L; + private static final float LOWP_FLT = 0.1f; + private static final long LT_LOWP_V = 4L; + + + private IntegerSummary.Mode mode = IntegerSummary.Mode.Min; + private IntegerSummary integerSummary = new IntegerSummary(mode); + private IntegerSummarySetOperations setOperations = new IntegerSummarySetOperations(mode, mode); + + private enum SkType { + EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05 + EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value + ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value + DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value + } + + //================================= + + @Test + public void emptyEmpty() { + IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); + IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = true; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyExact() { + IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); + IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); + UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, GT_MIDP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void EmptyDegenerate() { + IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); + IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyEstimation() { + IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); + IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void exactEmpty() { + IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); + IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactExact() { + IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); + IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); + UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, GT_MIDP_V); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactDegenerate() { + IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); + IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); //entries = 0 + UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactEstimation() { + IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); + IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void estimationEmpty() { + IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationExact() { + IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationDegenerate() { + IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_V); + IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationEstimation() { + IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_V); + IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void degenerateEmpty() { + IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); //entries = 0 + IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); + UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateExact() { + IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); //entries = 0 + IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateDegenerate() { + IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_V); //entries = 0 + IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateEstimation() { + IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_V); //entries = 0 + IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(tupleA, tupleB, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + //================================= + + private void checks( + IntegerSketch tupleA, + IntegerSketch tupleB, + UpdateSketch thetaB, + double expectedIntersectTheta, + int expectedIntersectCount, + boolean expectedIntersectEmpty, + double expectedAnotbTheta, + int expectedAnotbCount, + boolean expectedAnotbEmpty, + double expectedUnionTheta, + int expectedUnionCount, + boolean expectedUnionEmpty) { + CompactSketch csk; + Intersection inter = new Intersection<>(setOperations); + AnotB anotb = new AnotB<>(); + Union union = new Union<>(16, setOperations); + + //Intersection Stateless Tuple, Tuple Updatable + csk = inter.intersect(tupleA, tupleB); + checkResult("Intersect Stateless Tuple, Tuple", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + //Intersection Stateless Tuple, Tuple Compact + csk = inter.intersect(tupleA.compact(), tupleB.compact()); + checkResult("Intersect Stateless Tuple, Tuple", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + //Intersection Stateless Tuple, Theta Updatable + csk = inter.intersect(tupleA, thetaB, integerSummary); //Tuple, Theta + checkResult("Intersect Stateless Tuple, Theta", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + //Intersection Stateless Tuple, Theta Compact + csk = inter.intersect(tupleA.compact(), thetaB.compact(), integerSummary); + checkResult("Intersect Stateless Tuple, Theta", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + + //AnotB Stateless Tuple, Tuple Updatable + csk = AnotB.aNotB(tupleA, tupleB); + checkResult("AnotB Stateless Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateless Tuple, Tuple Compact + csk = AnotB.aNotB(tupleA.compact(), tupleB.compact()); + checkResult("AnotB Stateless Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateless Tuple, Theta Updatable + csk = AnotB.aNotB(tupleA, thetaB); + checkResult("AnotB Stateless Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateless Tuple, Theta Compact + csk = AnotB.aNotB(tupleA.compact(), thetaB.compact()); + checkResult("AnotB Stateless Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + + //AnotB Stateful Tuple, Tuple Updatable + anotb.setA(tupleA); + anotb.notB(tupleB); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateful Tuple, Tuple Compact + anotb.setA(tupleA.compact()); + anotb.notB(tupleB.compact()); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateful Tuple, Theta Updatable + anotb.setA(tupleA); + anotb.notB(thetaB); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateful Tuple, Theta Compact + anotb.setA(tupleA.compact()); + anotb.notB(thetaB.compact()); + csk = anotb.getResult(true); + checkResult("AnotB Stateful Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + + //Union Stateless Tuple, Tuple Updatable + csk = union.union(tupleA, tupleB); + checkResult("Union Stateless Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //Union Stateless Tuple, Tuple Compact + csk = union.union(tupleA.compact(), tupleB.compact()); + checkResult("Union Stateless Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //Union Stateless Tuple, Theta Updatable + csk = union.union(tupleA, thetaB, integerSummary); + checkResult("Union Stateless Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //Union Stateless Tuple, Theta Compact + csk = union.union(tupleA.compact(), thetaB.compact(), integerSummary); + checkResult("Union Stateless Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + + //Union Stateful Tuple, Tuple Updatable + union.union(tupleA); + union.union(tupleB); + csk = union.getResult(true); + checkResult("Union Stateful Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //AnotB Stateful Tuple, Tuple Compact + union.union(tupleA.compact()); + union.union(tupleB.compact()); + csk = union.getResult(true); + checkResult("Union Stateful Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //AnotB Stateful Tuple, Theta Updatable + union.union(tupleA); + union.union(thetaB, integerSummary); + csk = union.getResult(true); + checkResult("Union Stateful Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //AnotB Stateful Tuple, Theta Compact + union.union(tupleA.compact()); + union.union(thetaB.compact(), integerSummary); + csk = union.getResult(true); + checkResult("Union Stateful Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + + } + + private static void checkResult( + String comment, + CompactSketch csk, + double expectedTheta, + int expectedEntries, + boolean expectedEmpty) { + double actualTheta = csk.getTheta(); + int actualEntries = csk.getRetainedEntries(); + boolean actualEmpty = csk.isEmpty(); + + boolean thetaOk = actualTheta == expectedTheta; + boolean entriesOk = actualEntries == expectedEntries; + boolean emptyOk = actualEmpty == expectedEmpty; + if (!thetaOk || !entriesOk || !emptyOk) { + StringBuilder sb = new StringBuilder(); + sb.append(comment + ": "); + if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); } + if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); } + if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); } + throw new IllegalArgumentException(sb.toString()); + } + } + + private static IntegerSketch getTupleSketch( + SkType skType, + float p, + long updateKey) { + + IntegerSketch sk; + switch(skType) { + case EMPTY: { // { 1.0, 0, T} p and value are not used + sk = new IntegerSketch(4, 2, 1.0f, IntegerSummary.Mode.Min); + break; + } + case EXACT: { // { 1.0, >0, F} p is not used + sk = new IntegerSketch(4, 2, 1.0f, IntegerSummary.Mode.Min); + sk.update(updateKey, 1); + break; + } + case ESTIMATION: { // {<1.0, >0, F} + checkValidUpdate(p, updateKey); + sk = new IntegerSketch(4, 2, p, IntegerSummary.Mode.Min); + sk.update(updateKey, 1); + break; + } + case DEGENERATE: { // {<1.0, 0, F} + checkInvalidUpdate(p, updateKey); + sk = new IntegerSketch(4, 2, p, IntegerSummary.Mode.Min); + sk.update(updateKey, 1); // > theta + break; + } + + default: { return null; } // should not happen + } + return sk; + } + + //NOTE: p and value arguments are used for every case + private static UpdateSketch getThetaSketch( + SkType skType, + float p, + long updateKey) { + UpdateSketchBuilder bldr = new UpdateSketchBuilder(); + bldr.setLogNominalEntries(4); + bldr.setResizeFactor(ResizeFactor.X4); + + UpdateSketch sk; + switch(skType) { + case EMPTY: { // { 1.0, 0, T} p and value are not used + sk = bldr.build(); + break; + } + case EXACT: { // { 1.0, >0, F} p is not used + sk = bldr.build(); + sk.update(updateKey); + break; + } + case ESTIMATION: { // {<1.0, >0, F} + checkValidUpdate(p, updateKey); + bldr.setP(p); + sk = bldr.build(); + sk.update(updateKey); + break; + } + case DEGENERATE: { // {<1.0, 0, F} + checkInvalidUpdate(p, updateKey); + bldr.setP(p); + sk = bldr.build(); + sk.update(updateKey); + break; + } + + default: { return null; } // should not happen + } + return sk; + } + + private static void checkValidUpdate(float p, long updateKey) { + assertTrue( getLongHash(updateKey) < (long) (p * Long.MAX_VALUE)); + } + + private static void checkInvalidUpdate(float p, long updateKey) { + assertTrue( getLongHash(updateKey) > (long) (p * Long.MAX_VALUE)); + } + + static long getLongHash(long v) { + return (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java new file mode 100644 index 000000000..30ba32a1f --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import static java.lang.Math.exp; +import static java.lang.Math.log; +import static java.lang.Math.round; +import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.AlwaysOne; +import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.Sum; + +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.Union; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class EngagementTest { + public static final int numStdDev = 2; + + @Test + public void computeEngagementHistogram() { + final int lgK = 8; //Using a larger sketch >= 9 will produce exact results for this little example + final int K = 1 << lgK; + final int days = 30; + int v = 0; + final IntegerSketch[] skArr = new IntegerSketch[days]; + for (int i = 0; i < days; i++) { + skArr[i] = new IntegerSketch(lgK, AlwaysOne); + } + for (int i = 0; i <= days; i++) { //31 generating indices for symmetry + final int numIds = numIDs(days, i); + final int numDays = numDays(days, i); + final int myV = v++; + for (int d = 0; d < numDays; d++) { + for (int id = 0; id < numIds; id++) { + skArr[d].update(myV + id, 1); + } + } + v += numIds; + } + unionOps(K, Sum, skArr); + } + + private static int numIDs(final int totalDays, final int index) { + final double d = totalDays; + final double i = index; + return (int)round(exp(i * log(d) / d)); + } + + private static int numDays(final int totalDays, final int index) { + final double d = totalDays; + final double i = index; + return (int)round(exp((d - i) * log(d) / d)); + } + + private static void unionOps(final int K, final IntegerSummary.Mode mode, final IntegerSketch ... sketches) { + final IntegerSummarySetOperations setOps = new IntegerSummarySetOperations(mode, mode); + final Union union = new Union<>(K, setOps); + final int len = sketches.length; + + for (final IntegerSketch isk : sketches) { + union.union(isk); + } + final CompactSketch result = union.getResult(); + final TupleSketchIterator itr = result.iterator(); + + final int[] numDaysArr = new int[len + 1]; //zero index is ignored + + while (itr.next()) { + //For each unique visitor from the result sketch, get the # days visited + final int numDaysVisited = itr.getSummary().getValue(); + //increment the number of visitors that visited numDays + numDaysArr[numDaysVisited]++; //values range from 1 to 30 + } + + println("\nEngagement Histogram:"); + println("Number of Unique Visitors by Number of Days Visited"); + printf("%12s%12s%12s%12s\n","Days Visited", "Estimate", "LB", "UB"); + int sumVisits = 0; + final double theta = result.getTheta(); + for (int i = 0; i < numDaysArr.length; i++) { + final int visitorsAtDaysVisited = numDaysArr[i]; + if (visitorsAtDaysVisited == 0) { continue; } + sumVisits += visitorsAtDaysVisited * i; + + final double estVisitorsAtDaysVisited = visitorsAtDaysVisited / theta; + final double lbVisitorsAtDaysVisited = result.getLowerBound(numStdDev, visitorsAtDaysVisited); + final double ubVisitorsAtDaysVisited = result.getUpperBound(numStdDev, visitorsAtDaysVisited); + + printf("%12d%12.0f%12.0f%12.0f\n", + i, estVisitorsAtDaysVisited, lbVisitorsAtDaysVisited, ubVisitorsAtDaysVisited); + } + + //The estimate and bounds of the total number of visitors comes directly from the sketch. + final double visitors = result.getEstimate(); + final double lbVisitors = result.getLowerBound(numStdDev); + final double ubVisitors = result.getUpperBound(numStdDev); + printf("\n%12s%12s%12s%12s\n","Totals", "Estimate", "LB", "UB"); + printf("%12s%12.0f%12.0f%12.0f\n", "Visitors", visitors, lbVisitors, ubVisitors); + + //The total number of visits, however, is a scaled metric and takes advantage of the fact that + //the retained entries in the sketch is a uniform random sample of all unique visitors, and + //the the rest of the unique users will likely behave in the same way. + final double estVisits = sumVisits / theta; + final double lbVisits = estVisits * lbVisitors / visitors; + final double ubVisits = estVisits * ubVisitors / visitors; + printf("%12s%12.0f%12.0f%12.0f\n\n", "Visits", estVisits, lbVisits, ubVisits); + } + + /** + * @param o object to print + */ + private static void println(final Object o) { + printf("%s\n", o.toString()); + } + + /** + * @param fmt format + * @param args arguments + */ + private static void printf(final String fmt, final Object ... args) { + //System.out.printf(fmt, args); //Enable/Disable printing here + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java new file mode 100644 index 000000000..676d82556 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import static org.testng.Assert.assertEquals; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.tuple2.AnotB; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.Intersection; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class IntegerSketchTest { + + @SuppressWarnings("deprecation") + @Test + public void serDeTest() { + final int lgK = 12; + final int K = 1 << lgK; + final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + final IntegerSketch a1Sk = new IntegerSketch(lgK, a1Mode); + final int m = 2 * K; + for (int i = 0; i < m; i++) { + a1Sk.update(i, 1); + } + final double est1 = a1Sk.getEstimate(); + final MemorySegment seg = MemorySegment.ofArray(a1Sk.toByteArray()); + final IntegerSketch a1Sk2 = new IntegerSketch(seg, a1Mode); + final double est2 = a1Sk2.getEstimate(); + assertEquals(est1, est2); + } + + @Test + public void intersectTest() { + final int lgK = 12; + final int K = 1 << lgK; + final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + final IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); + final IntegerSketch a1Sk2 = new IntegerSketch(lgK, a1Mode); + final int m = 2 * K; + for (int i = 0; i < m; i++) { + a1Sk1.update(i, 1); + a1Sk2.update(i + m/2, 1); + } + final Intersection inter = + new Intersection<>(new IntegerSummarySetOperations(a1Mode, a1Mode)); + inter.intersect(a1Sk1); + inter.intersect(a1Sk2); + final CompactSketch csk = inter.getResult(); + assertEquals(csk.getEstimate(), K * 1.0, K * .03); + } + + @Test + public void aNotBTest() { + final int lgK = 4; + final int u = 5; + final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + final IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); + final IntegerSketch a1Sk2 = null;//new IntegerSketch(lgK, a1Mode); + final AnotB anotb = new AnotB<>(); + for (int i = 0; i < u; i++) { + a1Sk1.update(i, 1); + } + anotb.setA(a1Sk1); + anotb.notB(a1Sk2); + final CompactSketch cSk = anotb.getResult(true); + assertEquals((int)cSk.getEstimate(), u); + } + + @Test + public void checkMinMaxMode() { + final int lgK = 12; + final int K = 1 << lgK; + final IntegerSummary.Mode minMode = IntegerSummary.Mode.Min; + final IntegerSummary.Mode maxMode = IntegerSummary.Mode.Max; + final IntegerSketch a1Sk1 = new IntegerSketch(lgK, minMode); + final IntegerSketch a1Sk2 = new IntegerSketch(lgK, maxMode); + final int m = K / 2; + for (int key = 0; key < m; key++) { + a1Sk1.update(key, 1); + a1Sk1.update(key, 0); + a1Sk1.update(key, 2); + a1Sk2.update(key + m/2, 1); + a1Sk2.update(key + m/2, 0); + a1Sk2.update(key + m/2, 2); + } + final double est1 = a1Sk1.getEstimate(); + final double est2 = a1Sk2.getEstimate(); + assertEquals(est1, est2); + } + + @Test + public void checkStringKey() { + final int lgK = 12; + final int K = 1 << lgK; + final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; + final IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); + final int m = K / 2; + for (int key = 0; key < m; key++) { + a1Sk1.update(Integer.toHexString(key), 1); + } + assertEquals(a1Sk1.getEstimate(), K / 2.0); + } + + /** + * @param o object to print + */ + static void println(final Object o) { + //System.out.println(o.toString()); //Disable + } + + /** + * @param fmt format + * @param args arguments + */ + static void printf(final String fmt, final Object ... args) { + //System.out.printf(fmt, args); //Disable + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java new file mode 100644 index 000000000..edf6c6691 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import org.apache.datasketches.tuple2.AnotB; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.Intersection; +import org.testng.annotations.Test; + +/** + * Issue #368, from Mikhail Lavrinovich 12 OCT 2021 + * The failure was AnotB(estimating {<1.0,1,F}, Intersect(estimating{<1.0,1,F}, newDegenerative{<1.0,0,T}, + * Which should be equal to AnotB(estimating{<1.0,1,F}, new{1.0,0,T} = estimating{<1.0, 1, F}. The AnotB + * threw a null pointer exception because it was not properly handling sketches with zero entries. + */ +public class MikhailsBugTupleTest { + + @Test + public void mikhailsBug() { + IntegerSketch x = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); + IntegerSketch y = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); + x.update(1L, 1); + IntegerSummarySetOperations setOperations = + new IntegerSummarySetOperations(IntegerSummary.Mode.Min, IntegerSummary.Mode.Min); + Intersection intersection = new Intersection<>(setOperations); + CompactSketch intersect = intersection.intersect(x, y); + AnotB.aNotB(x, intersect); // NPE was here + } + + //@Test + public void withTuple() { + IntegerSketch x = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); + IntegerSketch y = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); + x.update(1L, 1); + println("Tuple x: Estimating {<1.0,1,F}"); + println(x.toString()); + println("Tuple y: NewDegenerative {<1.0,0,T}"); + println(y.toString()); + IntegerSummarySetOperations setOperations = + new IntegerSummarySetOperations(IntegerSummary.Mode.Min, IntegerSummary.Mode.Min); + Intersection intersection = new Intersection<>(setOperations); + CompactSketch intersect = intersection.intersect(x, y); + println("Tuple Intersect(Estimating, NewDegen) = new {1.0, 0, T}"); + println(intersect.toString()); + CompactSketch csk = AnotB.aNotB(x, intersect); + println("Tuple AnotB(Estimating, New) = estimating {<1.0, 1, F}"); + println(csk.toString()); + } + + /** + * Println an object + * @param o object to print + */ + private static void println(Object o) { + //System.out.println(o.toString()); //disable here + } +} \ No newline at end of file diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java new file mode 100644 index 000000000..12d0e52f6 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.aninteger; + +import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.Min; +import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.Sum; + +import org.apache.datasketches.tuple2.AnotB; +import org.apache.datasketches.tuple2.CompactSketch; +import org.apache.datasketches.tuple2.Intersection; +//import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.Union; +import org.testng.annotations.Test; + +/** + * These tests check to make sure that no summary objects, which are mutable, and created + * as needed internally within a tuple sketch never leak into the result sketch. + * + * @author Lee Rhodes + * + */ +public class ParameterLeakageTest { + IntegerSummarySetOperations setOps = new IntegerSummarySetOperations(Sum, Min); + + @Test + public void checkUnion() { + IntegerSketch sk1 = new IntegerSketch(4, Sum); + sk1.update(1, 1); + IntegerSummary sk1sum = captureSummaries(sk1)[0]; + + IntegerSketch sk2 = new IntegerSketch(4, Sum); + sk2.update(2, 1); + IntegerSummary sk2sum = captureSummaries(sk2)[0]; + + + Union union = new Union<>(setOps); + + CompactSketch csk = union.union(sk1, sk2); + IntegerSummary[] summaries = captureSummaries(csk); + println("Union Count: " + summaries.length); + + for (IntegerSummary isum : summaries) { + if ((isum == sk1sum) || (isum == sk2sum)) { + throw new IllegalArgumentException("Parameter Leakage"); + } + } + } + + @Test + public void checkIntersectStateless() { + IntegerSketch sk1 = new IntegerSketch(4, Sum); + sk1.update(1, 1); + IntegerSummary sk1sum = captureSummaries(sk1)[0]; + + IntegerSketch sk2 = new IntegerSketch(4, Sum); + sk2.update(1, 1); + IntegerSummary sk2sum = captureSummaries(sk2)[0]; + + Intersection intersect = new Intersection<>(setOps); + + CompactSketch csk = intersect.intersect(sk1, sk2); + IntegerSummary[] summaries = captureSummaries(csk); + println("Intersect Stateless Count: " + summaries.length); + + for (IntegerSummary isum : summaries) { + if ((isum == sk1sum) || (isum == sk2sum)) { + throw new IllegalArgumentException("Parameter Leakage"); + } + } + } + + @Test + public void checkIntersectStateful() { + IntegerSketch sk1 = new IntegerSketch(4, Sum); + sk1.update(1, 1); + IntegerSummary sk1sum = captureSummaries(sk1)[0]; + + IntegerSketch sk2 = new IntegerSketch(4, Sum); + sk2.update(1, 1); + IntegerSummary sk2sum = captureSummaries(sk2)[0]; + + Intersection intersect = new Intersection<>(setOps); + + intersect.intersect(sk1); + intersect.intersect(sk2); + CompactSketch csk = intersect.getResult(); + + IntegerSummary[] summaries = captureSummaries(csk); + println("Intersect Stateful Count: " + summaries.length); + + for (IntegerSummary isum : summaries) { + if ((isum == sk1sum) || (isum == sk2sum)) { + throw new IllegalArgumentException("Parameter Leakage"); + } + } + } + + @Test + public void checkAnotbStateless() { + IntegerSketch sk1 = new IntegerSketch(4, Sum); + sk1.update(1, 1); + CompactSketch csk1 = sk1.compact(); + IntegerSummary sk1sum = captureSummaries(csk1)[0]; + + IntegerSketch sk2 = new IntegerSketch(4, Sum); //EMPTY + + CompactSketch csk = AnotB.aNotB(csk1, sk2); + IntegerSummary[] summaries = captureSummaries(csk); + println("AnotB Stateless Count: " + summaries.length); + + for (IntegerSummary isum : summaries) { + if (isum == sk1sum) { + throw new IllegalArgumentException("Parameter Leakage"); + } + } + } + + @Test + public void checkAnotbStateful() { + IntegerSketch sk1 = new IntegerSketch(4, Sum); + sk1.update(1, 1); + CompactSketch csk1 = sk1.compact(); + IntegerSummary sk1sum = captureSummaries(csk1)[0]; + + IntegerSketch sk2 = new IntegerSketch(4, Sum); //EMPTY + + AnotB anotb = new AnotB<>(); + + anotb.setA(csk1); + anotb.notB(sk2); + + CompactSketch csk = anotb.getResult(true); + IntegerSummary[] summaries = captureSummaries(csk); + println("AnotB Stateful Count: " + summaries.length); + + for (IntegerSummary isum : summaries) { + if (isum == sk1sum) { + throw new IllegalArgumentException("Parameter Leakage"); + } + } + } + + private static IntegerSummary[] captureSummaries(Sketch sk) { + int entries = sk.getRetainedEntries(); + IntegerSummary[] intSumArr = new IntegerSummary[entries]; + int cnt = 0; + TupleSketchIterator it = sk.iterator(); + while (it.next()) { + intSumArr[cnt] = it.getSummary(); + cnt++; + } + return intSumArr; + } + + /** + * @param o Object to print + */ + static void println(Object o) { + //System.out.println(o.toString()); //disable + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java new file mode 100644 index 000000000..067375ec7 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; +import static org.apache.datasketches.common.TestUtil.javaPath; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; +import java.io.IOException; +import java.nio.file.Files; + +import org.testng.annotations.Test; + +/** + * Serialize binary sketches to be tested by C++ code. + * Test deserialization of binary sketches serialized by C++ code. + */ +public class AodSketchCrossLanguageTest { + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateBinariesForCompatibilityTestingOneValue() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n: nArr) { + final ArrayOfDoublesUpdatableSketch sk = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < n; i++) { + sk.update(i, new double[] {i}); + } + Files.newOutputStream(javaPath.resolve("aod_1_n" + n + "_java.sk")).write(sk.compact().toByteArray()); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateBinariesForCompatibilityTestingThreeValues() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n: nArr) { + final ArrayOfDoublesUpdatableSketch sk = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(3).build(); + for (int i = 0; i < n; i++) { + sk.update(i, new double[] {i, i, i}); + } + Files.newOutputStream(javaPath.resolve("aod_3_n" + n + "_java.sk")).write(sk.compact().toByteArray()); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateBinariesForCompatibilityTestingNonEmptyNoEntries() throws IOException { + final ArrayOfDoublesUpdatableSketch sk = + new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); + sk.update(1, new double[] {1}); + assertFalse(sk.isEmpty()); + assertEquals(sk.getRetainedEntries(), 0); + Files.newOutputStream(javaPath.resolve("aod_1_non_empty_no_entries_java.sk")).write(sk.compact().toByteArray()); + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppOneValue() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (int n: nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_1_n" + n + "_cpp.sk")); + final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + assertEquals(sketch.getNumValues(), 1); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getKey() < sketch.getThetaLong()); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppThreeValues() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; + for (int n: nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_3_n" + n + "_cpp.sk")); + final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + assertEquals(sketch.getNumValues(), 3); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getKey() < sketch.getThetaLong()); + assertEquals(it.getValues()[0], it.getValues()[1]); + assertEquals(it.getValues()[0], it.getValues()[2]); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppOneValueNonEmptyNoEntries() throws IOException { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_1_non_empty_no_entries_cpp.sk")); + final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getRetainedEntries(), 0); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java new file mode 100644 index 000000000..dde5f28b9 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ArrayOfDoublesAnotBTest { + + @Test + public void nullOrEmptyInput() { + // calling getResult() before calling update() should yield an empty set + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + + ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + try { + aNotB.update(sketch, null); + fail(); + } catch (SketchesArgumentException e) {} + + try { + aNotB.update(null, sketch); + fail(); + } catch (SketchesArgumentException e) {} + + aNotB.update(sketch, sketch); + result = aNotB.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + + @Test + public void emptyA() { + ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketchB.update(1, new double[] {1.0}); + sketchB.update(2, new double[] {1.0}); + sketchB.update(3, new double[] {1.0}); + sketchB.update(4, new double[] {1.0}); + sketchB.update(5, new double[] {1.0}); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + + ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); + aNotB.update(sketchA, sketchB); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + + @Test + public void emptyB() { + ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketchA.update(1, new double[] {1.0}); + sketchA.update(2, new double[] {1.0}); + sketchA.update(3, new double[] {1.0}); + sketchA.update(4, new double[] {1.0}); + sketchA.update(5, new double[] {1.0}); + ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); + + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketchA, sketchB); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 5); + Assert.assertEquals(result.getEstimate(), 5.0); + Assert.assertEquals(result.getLowerBound(1), 5.0); + Assert.assertEquals(result.getUpperBound(1), 5.0); + + ArrayOfDoublesSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + } + + @Test + public void aSameAsB() { + ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch.update(1, new double[] {1.0}); + sketch.update(2, new double[] {1.0}); + sketch.update(3, new double[] {1.0}); + sketch.update(4, new double[] {1.0}); + sketch.update(5, new double[] {1.0}); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketch, sketch); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + + @Test + public void exactMode() { + ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketchA.update(1, new double[] {1}); + sketchA.update(2, new double[] {1}); + sketchA.update(3, new double[] {1}); + sketchA.update(4, new double[] {1}); + sketchA.update(5, new double[] {1}); + + ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketchB.update(3, new double[] {1}); + sketchB.update(4, new double[] {1}); + sketchB.update(5, new double[] {1}); + sketchB.update(6, new double[] {1}); + sketchB.update(7, new double[] {1}); + + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketchA, sketchB); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 2); + Assert.assertEquals(result.getEstimate(), 2.0); + Assert.assertEquals(result.getLowerBound(1), 2.0); + Assert.assertEquals(result.getUpperBound(1), 2.0); + ArrayOfDoublesSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + } + + @Test + public void exactModeTwoDoubles() { + ArrayOfDoublesUpdatableSketchBuilder bldr = new ArrayOfDoublesUpdatableSketchBuilder(); + bldr.setNominalEntries(16); + bldr.setNumberOfValues(2); + bldr.setResizeFactor(ResizeFactor.X1); + + double[] valuesArr1 = {1.0, 2.0}; + double[] valuesArr2 = {2.0, 4.0}; + ArrayOfDoublesUpdatableSketch sketch1 = bldr.build(); + sketch1.update("a", valuesArr1); + sketch1.update("b", valuesArr2); + sketch1.update("c", valuesArr1); + sketch1.update("d", valuesArr1); + ArrayOfDoublesUpdatableSketch sketch2 = bldr.build(); + sketch2.update("c", valuesArr2); + sketch2.update("d", valuesArr2); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketch1, sketch2); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 2); + double[] resultArr = new double[] {2.0,4.0,1.0,2.0}; //order specific to this test + Assert.assertEquals(result.getValuesAsOneDimension(), resultArr); + } + + @Test + public void exactModeCustomSeed() { + long seed = 1234567890; + ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + sketchA.update(1, new double[] {1}); + sketchA.update(2, new double[] {1}); + sketchA.update(3, new double[] {1}); + sketchA.update(4, new double[] {1}); + sketchA.update(5, new double[] {1}); + + ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + sketchB.update(3, new double[] {1}); + sketchB.update(4, new double[] {1}); + sketchB.update(5, new double[] {1}); + sketchB.update(6, new double[] {1}); + sketchB.update(7, new double[] {1}); + + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildAnotB(); + aNotB.update(sketchA, sketchB); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 2); + Assert.assertEquals(result.getEstimate(), 2.0); + Assert.assertEquals(result.getLowerBound(1), 2.0); + Assert.assertEquals(result.getUpperBound(1), 2.0); + ArrayOfDoublesSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + } + + @Test + public void estimationMode() { + int key = 0; + ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketchA.update(key++, new double[] {1}); + } + + key -= 4096; // overlap half of the entries + ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketchB.update(key++, new double[] {1}); + } + + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketchA, sketchB); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + ArrayOfDoublesSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + + // same operation, but compact sketches and off-heap result + aNotB.update(sketchA.compact(), sketchB.compact()); + result = aNotB.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + } + + @Test + public void estimationModeLargeB() { + int key = 0; + ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 10000; i++) { + sketchA.update(key++, new double[] {1}); + } + + key -= 2000; // overlap + ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 100000; i++) { + sketchB.update(key++, new double[] {1}); + } + + final int expected = 10000 - 2000; + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketchA, sketchB); + ArrayOfDoublesCompactSketch result = aNotB.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), expected, expected * 0.1); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + ArrayOfDoublesSketchIterator it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + + // same operation, but compact sketches and off-heap result + aNotB.update(sketchA.compact(), sketchB.compact()); + result = aNotB.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), expected, expected * 0.1); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + it = result.iterator(); + while (it.next()) { + Assert.assertEquals(it.getValues(), new double[] {1}); + } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleSeedA() { + ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(sketch, null); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleSeedB() { + ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + aNotB.update(null, sketch); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleSeeds() { + ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); + ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(2).build(); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(3).buildAnotB(); + aNotB.update(sketchA, sketchB); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleNumValues() { + ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).build(); + ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(3).buildAnotB(); + aNotB.update(sketchA, sketchB); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java new file mode 100644 index 000000000..a8d072a2e --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.testng.Assert.assertEquals; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ArrayOfDoublesCompactSketchTest { + + @Test + public void heapToDirectExactTwoDoubles() { + ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + sketch1.update("a", new double[] {1, 2}); + sketch1.update("b", new double[] {1, 2}); + sketch1.update("c", new double[] {1, 2}); + sketch1.update("d", new double[] {1, 2}); + sketch1.update("a", new double[] {1, 2}); + sketch1.update("b", new double[] {1, 2}); + sketch1.update("c", new double[] {1, 2}); + sketch1.update("d", new double[] {1, 2}); + ArrayOfDoublesCompactSketch csk = sketch1.compact(); + MemorySegment seg = MemorySegment.ofArray(csk.toByteArray()); + ArrayOfDoublesSketch sketch2 = new DirectArrayOfDoublesCompactSketch(seg); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 4.0); + Assert.assertEquals(sketch2.getUpperBound(1), 4.0); + Assert.assertEquals(sketch2.getLowerBound(1), 4.0); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 4); + for (double[] array: values) { + Assert.assertEquals(array.length, 2); + Assert.assertEquals(array[0], 2.0); + Assert.assertEquals(array[1], 4.0); + } + } + + @Test + public void directToHeapExactTwoDoubles() { + ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(MemorySegment.ofArray(new byte[1000000])); + sketch1.update("a", new double[] {1, 2}); + sketch1.update("b", new double[] {1, 2}); + sketch1.update("c", new double[] {1, 2}); + sketch1.update("d", new double[] {1, 2}); + sketch1.update("a", new double[] {1, 2}); + sketch1.update("b", new double[] {1, 2}); + sketch1.update("c", new double[] {1, 2}); + sketch1.update("d", new double[] {1, 2}); + ArrayOfDoublesSketch sketch2 = + new HeapArrayOfDoublesCompactSketch( + MemorySegment.ofArray(sketch1.compact(MemorySegment.ofArray(new byte[1000000])).toByteArray())); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 4.0); + Assert.assertEquals(sketch2.getUpperBound(1), 4.0); + Assert.assertEquals(sketch2.getLowerBound(1), 4.0); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 4); + for (double[] array: values) { + Assert.assertEquals(array.length, 2); + Assert.assertEquals(array[0], 2.0); + Assert.assertEquals(array[1], 4.0); + } + } + + @SuppressWarnings("unused") + @Test + public void checkGetValuesAndKeysMethods() { + ArrayOfDoublesUpdatableSketchBuilder bldr = new ArrayOfDoublesUpdatableSketchBuilder(); + bldr.setNominalEntries(16).setNumberOfValues(2); + + HeapArrayOfDoublesQuickSelectSketch hqssk = (HeapArrayOfDoublesQuickSelectSketch) bldr.build(); + hqssk.update("a", new double[] {1, 2}); + hqssk.update("b", new double[] {3, 4}); + hqssk.update("c", new double[] {5, 6}); + hqssk.update("d", new double[] {7, 8}); + final double[][] values = hqssk.getValues(); + final double[] values1d = hqssk.getValuesAsOneDimension(); + final long[] keys = hqssk.getKeys(); + + HeapArrayOfDoublesCompactSketch hcsk = (HeapArrayOfDoublesCompactSketch)hqssk.compact(); + final double[][] values2 = hcsk.getValues(); + final double[] values1d2 = hcsk.getValuesAsOneDimension(); + final long[] keys2 = hcsk.getKeys(); + assertEquals(values2, values); + assertEquals(values1d2, values1d); + assertEquals(keys2, keys); + + MemorySegment hqsskSeg = MemorySegment.ofArray(hqssk.toByteArray()); + + DirectArrayOfDoublesQuickSelectSketchR dqssk = + (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + final double[][] values3 = dqssk.getValues(); + final double[] values1d3 = dqssk.getValuesAsOneDimension(); + final long[] keys3 = dqssk.getKeys(); + assertEquals(values3, values); + assertEquals(values1d3, values1d); + assertEquals(keys3, keys); + + MemorySegment hcskSeg = MemorySegment.ofArray(hcsk.toByteArray()); + + DirectArrayOfDoublesCompactSketch dcsk2 = + (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + final double[][] values4 = dqssk.getValues(); + final double[] values1d4 = dqssk.getValuesAsOneDimension(); + final long[] keys4 = dqssk.getKeys(); + assertEquals(values4, values); + assertEquals(values1d4, values1d); + assertEquals(keys4, keys); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java new file mode 100644 index 000000000..94329f935 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ArrayOfDoublesIntersectionTest { + + private static ArrayOfDoublesCombiner combiner = new ArrayOfDoublesCombiner() { + + @Override + public double[] combine(final double[] a, final double[] b) { + for (int i = 0; i < a.length; i++) { + a[i] += b[i]; + } + return a; + } + }; + + @Test + public void nullInput() { + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + try { + intersection.intersect(null, null); + fail(); + } catch (SketchesArgumentException e) {} + + } + + @Test + public void empty() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + intersection.intersect(sketch1, null); + final ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getValues().length, 0); + } + + @Test + public void degenerateWithExact() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); + sketch1.update("a", new double[] {1}); // this happens to get rejected because of sampling with low probability + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch2.update(1, new double[] {1}); + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + intersection.intersect(sketch1, null); + intersection.intersect(sketch2, null); + final ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertFalse(result.isEmpty()); //Degenerate + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 184.0); + Assert.assertEquals(result.getValues().length, 0); + } + + @Test + public void heapExactWithEmpty() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1}); + sketch1.update(2, new double[] {1}); + sketch1.update(3, new double[] {1}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + intersection.intersect(sketch1, null); + intersection.intersect(sketch2, null); + final ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + + @Test + public void directExactWithEmpty() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder() + .build(MemorySegment.ofArray(new byte[1000000])); + sketch1.update(1, new double[] {1}); + sketch1.update(2, new double[] {1}); + sketch1.update(3, new double[] {1}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder() + .build(MemorySegment.ofArray(new byte[1000000])); + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder(). + buildIntersection(MemorySegment.ofArray(new byte[1000000])); + intersection.intersect(sketch1, null); + intersection.intersect(sketch2, null); + final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertTrue(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + } + + @Test + public void heapExactMode() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1}); + sketch1.update(1, new double[] {1}); + sketch1.update(2, new double[] {1}); + sketch1.update(2, new double[] {1}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch2.update(2, new double[] {1}); + sketch2.update(2, new double[] {1}); + sketch2.update(3, new double[] {1}); + sketch2.update(3, new double[] {1}); + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + intersection.intersect(sketch1, combiner); + intersection.intersect(sketch2, combiner); + ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 1); + Assert.assertEquals(result.getEstimate(), 1.0); + Assert.assertEquals(result.getLowerBound(1), 1.0); + Assert.assertEquals(result.getUpperBound(1), 1.0); + final double[][] values = result.getValues(); + for (int i = 0; i < values.length; i++) { + Assert.assertEquals(values[i][0], 4.0); + } + + intersection.reset(); + try { + intersection.intersect(null, null); + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void heapDisjointEstimationMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + intersection.intersect(sketch1, combiner); + intersection.intersect(sketch2, combiner); + final ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertFalse(result.isEmpty()); //Degenerate case + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 3.0); + Assert.assertEquals(result.getValues().length, 0); + Assert.assertTrue(result.thetaLong_ < Long.MAX_VALUE); + } + + @Test + public void directDisjointEstimationMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder(). + buildIntersection(MemorySegment.ofArray(new byte[1000000])); + intersection.intersect(sketch1, combiner); + intersection.intersect(sketch2, combiner); + final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 0); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getUpperBound(1), 3.0); + Assert.assertEquals(result.getValues().length, 0); + } + + @Test + public void heapEstimationMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + intersection.intersect(sketch1, combiner); + intersection.intersect(sketch2, combiner); + final ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + final double[][] values = result.getValues(); + for (int i = 0; i < values.length; i++) { + Assert.assertEquals(values[i][0], 2.0); + } + } + + @Test + public void directEstimationMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(MemorySegment.ofArray(new byte[1000000])); + intersection.intersect(sketch1, combiner); + intersection.intersect(sketch2, combiner); + final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) + Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); + Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); + final double[][] values = result.getValues(); + for (int i = 0; i < values.length; i++) { + Assert.assertEquals(values[i][0], 2.0); + } + } + + @Test + public void heapExactModeCustomSeed() { + final long seed = 1234567890; + + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + sketch1.update(1, new double[] {1}); + sketch1.update(1, new double[] {1}); + sketch1.update(2, new double[] {1}); + sketch1.update(2, new double[] {1}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + sketch2.update(2, new double[] {1}); + sketch2.update(2, new double[] {1}); + sketch2.update(3, new double[] {1}); + sketch2.update(3, new double[] {1}); + + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildIntersection(); + intersection.intersect(sketch1, combiner); + intersection.intersect(sketch2, combiner); + final ArrayOfDoublesCompactSketch result = intersection.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getRetainedEntries(), 1); + Assert.assertEquals(result.getEstimate(), 1.0); + Assert.assertEquals(result.getLowerBound(1), 1.0); + Assert.assertEquals(result.getUpperBound(1), 1.0); + final double[][] values = result.getValues(); + for (int i = 0; i < values.length; i++) { + Assert.assertEquals(values[i][0], 4.0); + } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleSeeds() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().setSeed(2).buildIntersection(); + intersection.intersect(sketch, combiner); + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java new file mode 100644 index 000000000..4992aa4c5 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ArrayOfDoublesQuickSelectSketchTest { + + @Test(expectedExceptions = SketchesArgumentException.class) + public void invalidSamplingProbability() { + new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(2f); + } + + @Test + public void heapToDirectExactTwoDoubles() { + double[] valuesArr = {1.0, 2.0}; + ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + sketch1.update("a", valuesArr); + sketch1.update("b", valuesArr); + sketch1.update("c", valuesArr); + sketch1.update("d", valuesArr); + sketch1.update("a", valuesArr); + noopUpdates(sketch1, valuesArr); + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); + sketch2.update("b", valuesArr); + sketch2.update("c", valuesArr); + sketch2.update("d", valuesArr); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 4.0); + Assert.assertEquals(sketch2.getUpperBound(1), 4.0); + Assert.assertEquals(sketch2.getLowerBound(1), 4.0); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 4); + for (double[] array: values) { + Assert.assertEquals(array.length, 2); + Assert.assertEquals(array[0], 2.0); + Assert.assertEquals(array[1], 4.0); + } + } + + @Test + public void heapToDirectWithSeed() { + long seed = 1; + double[] values = {1.0}; + + ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + sketch1.update("a", values); + sketch1.update("b", values); + sketch1.update("c", values); + + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray()), seed); + sketch2.update("b", values); + sketch2.update("c", values); + sketch2.update("d", values); + + Assert.assertEquals(sketch2.getEstimate(), 4.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInsertExceptions() { + ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + sketch1.update("a", new double[] {1.0}); + } + + @Test + public void directToHeapExactTwoDoubles() { + double[] valuesArr = {1.0, 2.0}; + ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder(). + setNumberOfValues(2).build(MemorySegment.ofArray(new byte[1000000])); + sketch1.update("a", valuesArr); + sketch1.update("b", valuesArr); + sketch1.update("c", valuesArr); + sketch1.update("d", valuesArr); + sketch1.update("a", valuesArr); + noopUpdates(sketch1, valuesArr); + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); + sketch2.update("b", valuesArr); + sketch2.update("c", valuesArr); + sketch2.update("d", valuesArr); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 4.0); + Assert.assertEquals(sketch2.getUpperBound(1), 4.0); + Assert.assertEquals(sketch2.getLowerBound(1), 4.0); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 4); + for (double[] array: values) { + Assert.assertEquals(array.length, 2); + Assert.assertEquals(array[0], 2.0); + Assert.assertEquals(array[1], 4.0); + } + } + + @Test + public void directToHeapWithSeed() { + long seed = 1; + double[] values = {1.0}; + + ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build( + MemorySegment.ofArray(new byte[1000000])); + sketch1.update("a", values); + sketch1.update("b", values); + sketch1.update("c", values); + + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray()), seed); + sketch2.update("b", values); + sketch2.update("c", values); + sketch2.update("d", values); + + Assert.assertEquals(sketch2.getEstimate(), 4.0); + } + + @Test + public void maxBytes() { + Assert.assertEquals(ArrayOfDoublesQuickSelectSketch.getMaxBytes(1024, 2), 49184); + } + + private static void noopUpdates(ArrayOfDoublesUpdatableSketch sketch, double[] valuesArr) { + byte[] byteArr = null; + sketch.update(byteArr, valuesArr); + byteArr = new byte[0]; + sketch.update(byteArr, valuesArr); + int[] intArr = null; + sketch.update(intArr, valuesArr); + intArr = new int[0]; + sketch.update(intArr, valuesArr); + long[] longArr = null; + sketch.update(longArr, valuesArr); + longArr = new long[0]; + sketch.update(longArr, valuesArr); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java new file mode 100644 index 000000000..ace9e02c0 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java @@ -0,0 +1,513 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; +import java.util.Arrays; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ArrayOfDoublesUnionTest { + + @Test + public void heapExactMode() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(2, new double[] {1.0}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch2.update(2, new double[] {1.0}); + sketch2.update(2, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union.union(sketch1); + union.union(sketch2); + final int maxBytes = ArrayOfDoublesUnion.getMaxBytes( + ArrayOfDoublesSetOperationBuilder.DEFAULT_NOMINAL_ENTRIES, + ArrayOfDoublesSetOperationBuilder.DEFAULT_NUMBER_OF_VALUES); + Assert.assertEquals(maxBytes, 131120); // 48 bytes preamble + 2 * nominal entries * (key size + value size) + ArrayOfDoublesCompactSketch result = union.getResult(); + Assert.assertEquals(result.getEstimate(), 3.0); + double[][] values = result.getValues(); + Assert.assertEquals(values[0][0], 3.0); + Assert.assertEquals(values[1][0], 3.0); + Assert.assertEquals(values[2][0], 3.0); + + final MemorySegment wseg = MemorySegment.ofArray(union.toByteArray()); + final ArrayOfDoublesUnion wrappedUnion = ArrayOfDoublesSketches.wrapUnion(wseg); + result = wrappedUnion.getResult(); + Assert.assertEquals(result.getEstimate(), 3.0); + values = result.getValues(); + Assert.assertEquals(values[0][0], 3.0); + Assert.assertEquals(values[1][0], 3.0); + Assert.assertEquals(values[2][0], 3.0); + + + union.reset(); + result = union.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + } + + @Test + public void heapEstimationMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union.union(sketch1); + union.union(sketch2); + ArrayOfDoublesCompactSketch result = union.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertTrue(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + + union.reset(); + result = union.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + } + + @Test + public void heapEstimationModeFullOverlapTwoValuesAndDownsizing() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0, 2.0}); + } + + key = 0; // full overlap + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0, 2.0}); + } + + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setNumberOfValues(2).setNominalEntries(1024).buildUnion(); + union.union(sketch1); + union.union(sketch2); + final ArrayOfDoublesCompactSketch result = union.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertTrue(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 8192.0, 8192 * 0.01); + Assert.assertEquals(result.getRetainedEntries(), 1024); // union was downsampled + + final ArrayOfDoublesSketchIterator it = result.iterator(); + final double[] expected = {2, 4}; + while (it.next()) { + Assert.assertEquals(it.getValues(), expected, Arrays.toString(it.getValues()) + " != " + Arrays.toString(expected)); + } + } + + @Test + public void heapMixedMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 1000; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 500; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.2f).build(); + for (int i = 0; i < 20000; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union.union(sketch1); + union.union(sketch2); + final ArrayOfDoublesCompactSketch result = union.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertTrue(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 20500.0, 20500 * 0.01); + } + + @Test + public void heapSerializeDeserialize() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union1.union(sketch1); + union1.union(sketch2); + + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(union1.toByteArray())); + ArrayOfDoublesCompactSketch result = union2.getResult(); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + + union2.reset(); + result = union2.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + final double[][] values = result.getValues(); + for (int i = 0; i < values.length; i++) { + Assert.assertEquals(values[i][0], 2.0); + } + } + + @Test + public void heapSerializeDeserializeWithSeed() { + final long seed = 1; + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildUnion(); + union1.union(sketch1); + union1.union(sketch2); + + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(union1.toByteArray()), seed); + final ArrayOfDoublesCompactSketch result = union2.getResult(); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + } + + @Test + public void directSerializeDeserialize() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build( + MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build( + MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion( + MemorySegment.ofArray(new byte[1000000])); + union1.union(sketch1); + union1.union(sketch2); + + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(union1.toByteArray())); + ArrayOfDoublesCompactSketch result = union2.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + + union2.reset(); + result = union2.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + final double[][] values = result.getValues(); + for (int i = 0; i < values.length; i++) { + Assert.assertEquals(values[i][0], 2.0); + } + } + + @Test + public void directSerializeDeserializeWithSeed() { + final long seed = 1; + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) + .build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) + .build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed) + .buildUnion(MemorySegment.ofArray(new byte[1000000])); + union1.union(sketch1); + union1.union(sketch2); + + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(union1.toByteArray()), seed); + final ArrayOfDoublesCompactSketch result = union2.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + } + + @Test + public void directExactMode() { + final ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(2, new double[] {1.0}); + + final ArrayOfDoublesUpdatableSketch sketch2 = + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + sketch2.update(2, new double[] {1.0}); + sketch2.update(2, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + + final ArrayOfDoublesUnion union = + new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); + union.union(sketch1); + union.union(sketch2); + ArrayOfDoublesCompactSketch result = union.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertEquals(result.getEstimate(), 3.0); + final double[][] values = result.getValues(); + Assert.assertEquals(values[0][0], 3.0); + Assert.assertEquals(values[1][0], 3.0); + Assert.assertEquals(values[2][0], 3.0); + + union.reset(); + result = union.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + } + + @Test + public void directEstimationMode() { + int key = 0; + final ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch1.update(key++, new double[] {1.0}); + } + + key -= 4096; // overlap half of the entries + final ArrayOfDoublesUpdatableSketch sketch2 = + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + sketch2.update(key++, new double[] {1.0}); + } + + final ArrayOfDoublesUnion union = + new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); + union.union(sketch1); + union.union(sketch2); + ArrayOfDoublesCompactSketch result = union.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); + + union.reset(); + result = union.getResult(); + Assert.assertTrue(result.isEmpty()); + Assert.assertFalse(result.isEstimationMode()); + Assert.assertEquals(result.getEstimate(), 0.0); + Assert.assertEquals(result.getUpperBound(1), 0.0); + Assert.assertEquals(result.getLowerBound(1), 0.0); + Assert.assertEquals(result.getTheta(), 1.0); + } + + @Test + public void heapToDirect() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(2, new double[] {1.0}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch2.update(2, new double[] {1.0}); + sketch2.update(2, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + + final ArrayOfDoublesUnion heapUnion = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + heapUnion.union(sketch1); + + final ArrayOfDoublesUnion directUnion = + ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(heapUnion.toByteArray())); + directUnion.union(sketch2); //throws + + final ArrayOfDoublesCompactSketch result = directUnion.getResult(MemorySegment.ofArray(new byte[1000000])); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 3.0); + final double[][] values = result.getValues(); + Assert.assertEquals(values.length, 3); + Assert.assertEquals(values[0][0], 3.0); + Assert.assertEquals(values[1][0], 3.0); + Assert.assertEquals(values[2][0], 3.0); + } + + @Test + public void directToHeap() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(1, new double[] {1.0}); + sketch1.update(2, new double[] {1.0}); + + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch2.update(2, new double[] {1.0}); + sketch2.update(2, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + sketch2.update(3, new double[] {1.0}); + + final ArrayOfDoublesUnion directUnion = + new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); + directUnion.union(sketch1); + + final ArrayOfDoublesUnion heapUnion = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(directUnion.toByteArray())); + heapUnion.union(sketch2); + + final ArrayOfDoublesCompactSketch result = heapUnion.getResult(); + Assert.assertFalse(result.isEmpty()); + Assert.assertEquals(result.getEstimate(), 3.0); + final double[][] values = result.getValues(); + Assert.assertEquals(values.length, 3); + Assert.assertEquals(values[0][0], 3.0); + Assert.assertEquals(values[1][0], 3.0); + Assert.assertEquals(values[2][0], 3.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleSeeds() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setSeed(2).buildUnion(); + union.union(sketch); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleInputSketchFewerValues() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setNumberOfValues(2).buildUnion(); + union.union(sketch); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void incompatibleInputSketchMoreValues() { + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); + final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + union.union(sketch); + } + + @Test + public void directDruidUsageOneSketch() { + final MemorySegment seg = MemorySegment.ofArray(new byte[1_000_000]); + new ArrayOfDoublesSetOperationBuilder().buildUnion(seg); // just set up MemorySegment to wrap later + + final int n = 100_000; // estimation mode + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < n; i++) { + sketch.update(i, new double[] {1.0}); + } + sketch.trim(); // pretend this is a result from a union + + // as Druid wraps MemorySegment + MemorySegment seg2 = MemorySegment.ofArray(new byte[1_000_000]); + ArrayOfDoublesCompactSketch dcsk = sketch.compact(seg2); + ArrayOfDoublesUnion union = ArrayOfDoublesSketches.wrapUnion(seg); //empty union + union.union(dcsk); + //ArrayOfDoublesSketches.wrapUnion(seg).union(sketch.compact(MemorySegment.ofArray(new byte[1_000_000]))); + + final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(seg).getResult(); + Assert.assertEquals(result.getEstimate(), sketch.getEstimate());//expected [98045.91060164096] but found [4096.0] + Assert.assertEquals(result.isEstimationMode(), sketch.isEstimationMode()); + } + + @Test + public void directDruidUsageTwoSketches() { + final MemorySegment seg = MemorySegment.ofArray(new byte[1000000]); + new ArrayOfDoublesSetOperationBuilder().buildUnion(seg); // just set up MemorySegment to wrap later + + int key = 0; + + final int n1 = 100000; // estimation mode + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < n1; i++) { + sketch1.update(key++, new double[] {1.0}); + } + // as Druid wraps MemorySegment + ArrayOfDoublesSketches.wrapUnion(seg).union(sketch1.compact(MemorySegment.ofArray(new byte[1000000]))); + + final int n2 = 1000000; // estimation mode + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < n2; i++) { + sketch2.update(key++, new double[] {1.0}); + } + // as Druid wraps MemorySegment + ArrayOfDoublesSketches.wrapUnion(seg).union(sketch2.compact(MemorySegment.ofArray(new byte[1000000]))); + + // build one sketch that must be the same as union + key = 0; // reset to have the same keys + final int n = n1 + n2; + final ArrayOfDoublesUpdatableSketch expected = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < n; i++) { + expected.update(key++, new double[] {1.0}); + } + expected.trim(); // union result is trimmed, so we need to trim this sketch for valid comparison + + final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(seg).getResult(); + Assert.assertEquals(result.getEstimate(), expected.getEstimate()); + Assert.assertEquals(result.isEstimationMode(), expected.isEstimationMode()); + Assert.assertEquals(result.getUpperBound(1), expected.getUpperBound(1)); + Assert.assertEquals(result.getLowerBound(1), expected.getLowerBound(1)); + Assert.assertEquals(result.getRetainedEntries(), expected.getRetainedEntries()); + Assert.assertEquals(result.getNumValues(), expected.getNumValues()); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java new file mode 100644 index 000000000..92de38bf1 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java @@ -0,0 +1,581 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import static org.apache.datasketches.common.Util.zeroPad; +import static org.apache.datasketches.hash.MurmurHash3.hash; +import static org.testng.Assert.assertTrue; + +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.testng.annotations.Test; + +public class CornerCaseArrayOfDoublesSetOperationsTest { + //Stateful Intersection with intersect(sketch A, combiner), followed by getResult() + //Essentially Stateless AnotB with update(Sketch A, Sketch B), followed by getResult() + //Stateful Union with union(Sketch A), followed by getResult() + + /* Hashes and Hash Equivalents + * Top8bits Hex Decimal + * MAX: 01111111, 7fffffffffffffff, 9223372036854775807 + * GT_MIDP: 01011101, 5d6906dac1b340ba, 6730918654704304314 3L + * MIDP_THETALONG:01000000, 4000000000000000, 4611686018427387904 + * GT_LOWP: 00010000, 10bc98fb132116fe, 1206007004353599230 6L + * LOWP_THETALONG:00010000, 1000000000000000, 1152921504606846976 + * LT_LOWP: 00001000, 83ddbc9e12ede40, 593872385995628096 4L + */ + + + private static final float MIDP_FLT = 0.5f; + private static final float LOWP_FLT = 0.125f; + private static final long GT_MIDP_KEY = 3L; + private static final long GT_LOWP_KEY = 6L; + private static final long LT_LOWP_KEY = 4L; + + private static final long MAX_LONG = Long.MAX_VALUE; + + private static final long HASH_GT_MIDP = getLongHash(GT_MIDP_KEY); + private static final long MIDP_THETALONG = (long)(MAX_LONG * MIDP_FLT); + + private static final long HASH_GT_LOWP = getLongHash(GT_LOWP_KEY); + private static final long LOWP_THETALONG = (long)(MAX_LONG * LOWP_FLT); + private static final long HASH_LT_LOWP = getLongHash(LT_LOWP_KEY); + + private static final String LS = System.getProperty("line.separator"); + + private enum SkType { + EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05 + EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value + ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value + DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value + } + + private static class MinCombiner implements ArrayOfDoublesCombiner { + MinCombiner() {} + + @Override + public double[] combine(double[] a, double[] b) { + return new double[] { Math.min(a[0], b[0]) }; + } + } + + private static MinCombiner minCombiner = new MinCombiner(); + + //=================================f + + @Test + public void emptyEmpty() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = true; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyExact() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyDegenerate() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void emptyEstimation() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void exactEmpty() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactExact() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = 1.0; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = true; + final double expectedUnionTheta = 1.0; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactDegenerate() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); //entries = 0 + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void exactEstimation() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void estimationEmpty() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationExact() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationDegenerate() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 1; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void estimationEstimation() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_KEY); + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 1; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + + @Test + public void degenerateEmpty() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); //entries = 0 + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); + final double expectedIntersectTheta = 1.0; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = true; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateExact() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); //entries = 0 + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateDegenerate() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_KEY); //entries = 0 + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 0; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + @Test + public void degenerateEstimation() { + ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_KEY); //entries = 0 + ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); + final double expectedIntersectTheta = LOWP_FLT; + final int expectedIntersectCount = 0; + final boolean expectedIntersectEmpty = false; + final double expectedAnotbTheta = LOWP_FLT; + final int expectedAnotbCount = 0; + final boolean expectedAnotbEmpty = false; + final double expectedUnionTheta = LOWP_FLT; + final int expectedUnionCount = 1; + final boolean expectedUnionEmpty = false; + + checks(thetaA, thetaB, + expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, + expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, + expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + //================================= + //================================= + + private static void checks( + ArrayOfDoublesUpdatableSketch tupleA, + ArrayOfDoublesUpdatableSketch tupleB, + double expectedIntersectTheta, + int expectedIntersectCount, + boolean expectedIntersectEmpty, + double expectedAnotbTheta, + int expectedAnotbCount, + boolean expectedAnotbEmpty, + double expectedUnionTheta, + int expectedUnionCount, + boolean expectedUnionEmpty) { + ArrayOfDoublesCompactSketch csk; + ArrayOfDoublesIntersection inter = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); + ArrayOfDoublesAnotB anotb = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); + ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); + + //Intersection Tuple, Tuple Updatable Stateful + inter.intersect(tupleA, minCombiner); + inter.intersect(tupleB, minCombiner); + csk = inter.getResult(); + inter.reset(); + checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + //Intersection Tuple, Tuple Compact Stateful + inter.intersect(tupleA.compact(), minCombiner); + inter.intersect(tupleB.compact(), minCombiner); + csk = inter.getResult(); + inter.reset(); + checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, + expectedIntersectEmpty); + + //AnotB Stateless Tuple, Tuple Updatable + anotb.update(tupleA, tupleB); + csk = anotb.getResult(); + checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + //AnotB Stateless Tuple, Tuple Compact + anotb.update(tupleA, tupleB); + csk = anotb.getResult(); + checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); + + //Union Stateful Tuple, Tuple Updatable + union.union(tupleA); + union.union(tupleB); + csk = union.getResult(); + union.reset(); + checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + //Union Stateful Tuple, Tuple Compact + union.union(tupleA.compact()); + union.union(tupleB.compact()); + csk = union.getResult(); + union.reset(); + checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); + } + + private static void checkResult( + String comment, + ArrayOfDoublesCompactSketch csk, + double expectedTheta, + int expectedEntries, + boolean expectedEmpty) { + double actualTheta = csk.getTheta(); + int actualEntries = csk.getRetainedEntries(); + boolean actualEmpty = csk.isEmpty(); + + boolean thetaOk = actualTheta == expectedTheta; + boolean entriesOk = actualEntries == expectedEntries; + boolean emptyOk = actualEmpty == expectedEmpty; + if (!thetaOk || !entriesOk || !emptyOk) { + StringBuilder sb = new StringBuilder(); + sb.append(comment + ": "); + if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); } + if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); } + if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); } + throw new IllegalArgumentException(sb.toString()); + } + } + + private static ArrayOfDoublesUpdatableSketch getSketch( + SkType skType, + float p, + long updateKey) { + + ArrayOfDoublesUpdatableSketchBuilder bldr = new ArrayOfDoublesUpdatableSketchBuilder(); + bldr.setNominalEntries(16); + //Assume defaults: 1 double value, resize factor, seed + double[] summaryVal = {1.0}; + + ArrayOfDoublesUpdatableSketch sk; + switch(skType) { + case EMPTY: { // { 1.0, 0, T} p and value are not used + sk = bldr.build(); + break; + } + case EXACT: { // { 1.0, >0, F} p is not used + sk = bldr.build(); + sk.update(updateKey, summaryVal); + break; + } + case ESTIMATION: { // {<1.0, >0, F} + checkValidUpdate(p, updateKey); + bldr.setSamplingProbability(p); + sk = bldr.build(); + sk.update(updateKey, summaryVal); + break; + } + case DEGENERATE: { // {<1.0, 0, F} + checkInvalidUpdate(p, updateKey); + bldr.setSamplingProbability(p); + sk = bldr.build(); + sk.update(updateKey, summaryVal); // > theta + break; + } + + default: { return null; } // should not happen + } + return sk; + } + + private static void checkValidUpdate(float p, long updateKey) { + assertTrue( getLongHash(updateKey) < (long) (p * Long.MAX_VALUE)); + } + + private static void checkInvalidUpdate(float p, long updateKey) { + assertTrue( getLongHash(updateKey) > (long) (p * Long.MAX_VALUE)); + } + + //******************************************* + //Helper functions for setting the hash values + + //@Test + public void printTable() { + println(" Top8bits Hex Decimal"); + printf("MAX: %8s, %16x, %19d" + LS, getTop8(MAX_LONG), MAX_LONG, MAX_LONG); + printf("GT_MIDP: %8s, %16x, %19d" + LS, getTop8(HASH_GT_MIDP), HASH_GT_MIDP, HASH_GT_MIDP); + printf("MIDP_THETALONG:%8s, %16x, %19d" + LS, getTop8(MIDP_THETALONG), MIDP_THETALONG, MIDP_THETALONG); + printf("GT_LOWP: %8s, %16x, %19d" + LS, getTop8(HASH_GT_LOWP), HASH_GT_LOWP, HASH_GT_LOWP); + printf("LOWP_THETALONG:%8s, %16x, %19d" + LS, getTop8(LOWP_THETALONG), LOWP_THETALONG, LOWP_THETALONG); + printf("LT_LOWP: %8s, %16x, %19d" + LS, getTop8(HASH_LT_LOWP), HASH_LT_LOWP, HASH_LT_LOWP); + println(LS +"Doubles"); + + println(LS + "Longs"); + for (long v = 1L; v < 10; v++) { + long hash = (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + printLong(v, hash); + } + } + + static long getLongHash(long v) { + return (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + } + + static void printLong(long v, long hash) { + System.out.printf(" %8d, %8s, %16x, %19d" + LS,v, getTop8(hash), hash, hash); + } + + static String getTop8(final long v) { + int i = (int) (v >>> 56); + String s = Integer.toBinaryString(i); + return zeroPad(s, 8); + } + + private static void println(Object o) { + System.out.println(o.toString()); + } + + private static void printf(String fmt, Object ...args) { + System.out.printf(fmt, args); + } +} + diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java new file mode 100644 index 000000000..6c39dd67a --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.tuple2.Util; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class DirectArrayOfDoublesCompactSketchTest { + + @Test + public void emptyFromQuickSelectSketch() { + ArrayOfDoublesUpdatableSketch us = + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesCompactSketch sketch = us.compact(MemorySegment.ofArray(new byte[1000000])); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getRetainedEntries(), 0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertNotNull(sketch.getValues()); + Assert.assertEquals(sketch.getValues().length, 0); + ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + public void exactModeFromQuickSelectSketch() { + ArrayOfDoublesUpdatableSketch us = + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + us.update(1, new double[] {1.0}); + us.update(2, new double[] {1.0}); + us.update(3, new double[] {1.0}); + us.update(1, new double[] {1.0}); + us.update(2, new double[] {1.0}); + us.update(3, new double[] {1.0}); + ArrayOfDoublesCompactSketch sketch = us.compact(MemorySegment.ofArray(new byte[1000000])); + Assert.assertFalse(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 3.0); + Assert.assertEquals(sketch.getLowerBound(1), 3.0); + Assert.assertEquals(sketch.getUpperBound(1), 3.0); + Assert.assertEquals(sketch.getRetainedEntries(), 3); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + double[][] values = sketch.getValues(); + Assert.assertEquals(values.length, 3); + for (double[] array: values) { + Assert.assertEquals(array[0], 2.0); + } + } + + @Test + public void serializeDeserializeSmallExact() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + us.update("a", new double[] {1.0}); + us.update("b", new double[] {1.0}); + us.update("c", new double[] {1.0}); + ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 3.0); + Assert.assertEquals(sketch2.getLowerBound(1), 3.0); + Assert.assertEquals(sketch2.getUpperBound(1), 3.0); + Assert.assertEquals(sketch2.getRetainedEntries(), 3); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 3); + for (double[] array: values) { + Assert.assertEquals(array[0], 1.0); + } + } + + @Test + public void serializeDeserializeEstimation() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + us.update(i, new double[] {1.0}); + } + ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); + Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void deserializeWithWrongSeed() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < 8192; i++) { + us.update(i, new double[] {1.0}); + } + ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray()), 123); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void fromQuickSelectSketchNotEnoughMemory() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + us.update(1, new double[] {1.0}); + us.compact(MemorySegment.ofArray(new byte[39])); + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java new file mode 100644 index 000000000..0a4e226aa --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class DirectArrayOfDoublesQuickSelectSketchTest { + @Test + public void isEmpty() { + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + public void isEmptyWithSampling() { + final float samplingProbability = 0.1f; + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + setSamplingProbability(samplingProbability). + build(MemorySegment.ofArray(new byte[1000000])); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertTrue(((DirectArrayOfDoublesQuickSelectSketch)sketch).isInSamplingMode()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + } + + @Test + // very low probability of being sampled + // once the an input value is chosen so that it is rejected, the test will continue to work + // unless the hash function and the seed are the same + public void sampling() { + final float samplingProbability = 0.001f; + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + setSamplingProbability(samplingProbability). + build(MemorySegment.ofArray(new byte[1000000])); + sketch.update("a", new double[] {1.0}); + Assert.assertFalse(sketch.isEmpty()); + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertTrue(sketch.getUpperBound(1) > 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); + Assert.assertEquals( + (float)(sketch.getThetaLong() / (double) Long.MAX_VALUE), samplingProbability); + Assert.assertEquals((float)sketch.getTheta(), samplingProbability); + } + + @Test + public void exactMode() { + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + for (int i = 0; i < 4096; i++) { + sketch.update(i, new double[] {1.0}); + } + Assert.assertFalse(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 4096.0); + Assert.assertEquals(sketch.getUpperBound(1), 4096.0); + Assert.assertEquals(sketch.getLowerBound(1), 4096.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + + final double[][] values = sketch.getValues(); + Assert.assertEquals(values.length, 4096); + int count = 0; + for (int i = 0; i < values.length; i++) { + if (values[i] != null) { + count++; + } + } + Assert.assertEquals(count, 4096); + for (int i = 0; i < 4096; i++) { + Assert.assertEquals(values[i][0], 1.0); + } + + sketch.reset(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + // The moment of going into the estimation mode is, to some extent, an implementation detail + // Here we assume that presenting as many unique values as twice the nominal size of the sketch + // will result in estimation mode + public void estimationMode() { + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[4096 * 2 * 16 + 32])); + Assert.assertEquals(sketch.getEstimate(), 0.0); + for (int i = 1; i <= 8192; i++) { + sketch.update(i, new double[] {1.0}); + } + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); + Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); + Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); + + final double[][] values = sketch.getValues(); + Assert.assertTrue(values.length >= 4096); + int count = 0; + for (final double[] array: values) { + if (array != null) { + count++; + Assert.assertEquals(array.length, 1); + Assert.assertEquals(array[0], 1.0); + } + } + Assert.assertEquals(count, values.length); + + sketch.reset(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertEquals(sketch.getSamplingProbability(), 1.0F); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + public void updatesOfAllKeyTypes() { + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + sketch.update(1L, new double[] {1.0}); + sketch.update(2.0, new double[] {1.0}); + final byte[] bytes = new byte[] {3, 4}; + sketch.update(bytes, new double[] {1.0}); + sketch.update(ByteBuffer.wrap(bytes), new double[] {1.0}); // same as previous + sketch.update(ByteBuffer.wrap(bytes, 0, 1), new double[] {1.0}); // slice + sketch.update(new int[] {4}, new double[] {1.0}); + sketch.update(new long[] {5L}, new double[] {1.0}); + sketch.update("a", new double[] {1.0}); + Assert.assertEquals(sketch.getEstimate(), 7.0); + } + + @Test + public void doubleSum() { + final ArrayOfDoublesUpdatableSketch sketch = + new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + sketch.update(1, new double[] {1.0}); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + Assert.assertEquals(sketch.getValues()[0][0], 1.0); + sketch.update(1, new double[] {0.7}); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + Assert.assertEquals(sketch.getValues()[0][0], 1.7); + sketch.update(1, new double[] {0.8}); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + Assert.assertEquals(sketch.getValues()[0][0], 2.5); + } + + @Test + public void serializeDeserializeExact() throws Exception { + final ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder(). + build(MemorySegment.ofArray(new byte[1000000])); + sketch1.update(1, new double[] {1.0}); + + final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); + + Assert.assertEquals(sketch2.getEstimate(), 1.0); + final double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 1); + Assert.assertEquals(values[0][0], 1.0); + + // the same key, so still one unique + sketch2.update(1, new double[] {1.0}); + Assert.assertEquals(sketch2.getEstimate(), 1.0); + + sketch2.update(2, new double[] {1.0}); + Assert.assertEquals(sketch2.getEstimate(), 2.0); + } + + @Test + public void serializeDeserializeEstimationNoResize() throws Exception { + final ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder().setResizeFactor(ResizeFactor.X1). + build(MemorySegment.ofArray(new byte[1000000])); + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 8192; i++) { + sketch1.update(i, new double[] {1.0}); + } + } + final byte[] byteArray = sketch1.toByteArray(); + + //for visual testing + //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); + + final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(byteArray)); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); + Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); + final double[][] values = sketch2.getValues(); + Assert.assertTrue(values.length >= 4096); + for (final double[] array: values) { + Assert.assertEquals(array[0], 10.0); + } + } + + @Test + public void serializeDeserializeSampling() { + final int sketchSize = 16384; + final int numberOfUniques = sketchSize; + final ArrayOfDoublesUpdatableSketch sketch1 = + new ArrayOfDoublesUpdatableSketchBuilder(). + setNominalEntries(sketchSize).setSamplingProbability(0.5f). + build(MemorySegment.ofArray(new byte[1000000])); + for (int i = 0; i < numberOfUniques; i++) { + sketch1.update(i, new double[] {1.0}); + } + final ArrayOfDoublesSketch sketch2 = + ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); + Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); + Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void notEnoughMemory() { + new ArrayOfDoublesUpdatableSketchBuilder(). + setNominalEntries(32).build(MemorySegment.ofArray(new byte[1055])); + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java new file mode 100644 index 000000000..77d5a5fe0 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class HeapArrayOfDoublesCompactSketchTest { + + @Test + public void emptyFromQuickSelectSketch() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); + ArrayOfDoublesCompactSketch sketch = us.compact(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getRetainedEntries(), 0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + Assert.assertNotNull(sketch.getValues()); + Assert.assertEquals(sketch.getValues().length, 0); + ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + public void exactModeFromQuickSelectSketch() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); + us.update(1, new double[] {1.0}); + us.update(2, new double[] {1.0}); + us.update(3, new double[] {1.0}); + us.update(1, new double[] {1.0}); + us.update(2, new double[] {1.0}); + us.update(3, new double[] {1.0}); + ArrayOfDoublesCompactSketch sketch = us.compact(); + Assert.assertFalse(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 3.0); + Assert.assertEquals(sketch.getLowerBound(1), 3.0); + Assert.assertEquals(sketch.getUpperBound(1), 3.0); + Assert.assertEquals(sketch.getRetainedEntries(), 3); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + double[][] values = sketch.getValues(); + Assert.assertEquals(values.length, 3); + for (double[] array: values) { + Assert.assertEquals(array[0], 2.0); + } + } + + @Test + public void serializeDeserializeSmallExact() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); + us.update("a", new double[] {1.0}); + us.update("b", new double[] {1.0}); + us.update("c", new double[] {1.0}); + ArrayOfDoublesCompactSketch sketch1 = us.compact(); + ArrayOfDoublesSketch sketch2 = + ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertFalse(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 3.0); + Assert.assertEquals(sketch2.getLowerBound(1), 3.0); + Assert.assertEquals(sketch2.getUpperBound(1), 3.0); + Assert.assertEquals(sketch2.getRetainedEntries(), 3); + Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch2.getTheta(), 1.0); + double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 3); + for (double[] array: values) { + Assert.assertEquals(array[0], 1.0); + } + } + + @Test + public void serializeDeserializeEstimation() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + us.update(i, new double[] {1.0}); + } + MemorySegment wseg = MemorySegment.ofArray(us.toByteArray()); + ArrayOfDoublesUpdatableSketch wrappedUS = ArrayOfDoublesSketches.wrapUpdatableSketch(wseg); + Assert.assertFalse(wrappedUS.isEmpty()); + Assert.assertTrue(wrappedUS.isEstimationMode()); + Assert.assertEquals(wrappedUS.getEstimate(), us.getEstimate()); + Assert.assertEquals(wrappedUS.getThetaLong(), us.getThetaLong()); + + ArrayOfDoublesUpdatableSketch heapUS = ArrayOfDoublesSketches.heapifyUpdatableSketch(wseg); + Assert.assertFalse(heapUS.isEmpty()); + Assert.assertTrue(heapUS.isEstimationMode()); + Assert.assertEquals(heapUS.getEstimate(), us.getEstimate()); + Assert.assertEquals(heapUS.getThetaLong(), us.getThetaLong()); + + ArrayOfDoublesCompactSketch sketch1 = us.compact(); + ArrayOfDoublesSketch sketch2 = + ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); + Assert.assertFalse(sketch2.isEmpty()); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); + Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void deserializeWithWrongSeed() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); + for (int i = 0; i < 8192; i++) { + us.update(i, new double[] {1.0}); + } + ArrayOfDoublesCompactSketch sketch1 = us.compact(); + MemorySegment seg = MemorySegment.ofArray(sketch1.toByteArray()); + ArrayOfDoublesSketches.heapifySketch(seg, 123); + } +} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java new file mode 100644 index 000000000..5a6b1d3f5 --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.arrayofdoubles; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.ResizeFactor; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class HeapArrayOfDoublesQuickSelectSketchTest { + @Test + public void isEmpty() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + Assert.assertNotNull(sketch.toString()); + } + + @Test + public void isEmptyWithSampling() { + final float samplingProbability = 0.1f; + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + } + + @Test + public void sampling() { + final float samplingProbability = 0.001f; + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); + sketch.update("a", new double[] {1.0}); + Assert.assertFalse(sketch.isEmpty()); + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertTrue(sketch.getUpperBound(1) > 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); + Assert.assertEquals((float)(sketch.getThetaLong() / (double) Long.MAX_VALUE), samplingProbability); + Assert.assertEquals((float)sketch.getTheta(), samplingProbability); + } + + @Test + public void exactMode() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + for (int i = 1; i <= 4096; i++) { + sketch.update(i, new double[] {1.0}); + } + Assert.assertFalse(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 4096.0); + Assert.assertEquals(sketch.getUpperBound(1), 4096.0); + Assert.assertEquals(sketch.getLowerBound(1), 4096.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + + final double[][] values = sketch.getValues(); + Assert.assertEquals(values.length, 4096); + int count = 0; + for (int i = 0; i < values.length; i++) { + if (values[i] != null) { + count++; + } + } + Assert.assertEquals(count, 4096); + for (int i = 0; i < 4096; i++) { + Assert.assertEquals(values[i][0], 1.0); + } + + sketch.reset(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + // The moment of going into the estimation mode is, to some extent, an implementation detail + // Here we assume that presenting as many unique values as twice the nominal size of the sketch will result in estimation mode + public void estimationMode() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + Assert.assertEquals(sketch.getEstimate(), 0.0); + for (int i = 1; i <= 8192; i++) { + sketch.update(i, new double[] {1.0}); + } + Assert.assertTrue(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); + Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); + Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); + Assert.assertTrue(sketch.getRetainedEntries() > 4096); + sketch.trim(); + Assert.assertEquals(sketch.getRetainedEntries(), 4096); + + final double[][] values = sketch.getValues(); + int count = 0; + for (final double[] array: values) { + if (array != null) { + count++; + Assert.assertEquals(array.length, 1); + Assert.assertEquals(array[0], 1.0); + } + } + Assert.assertEquals(count, values.length); + + sketch.reset(); + Assert.assertTrue(sketch.isEmpty()); + Assert.assertFalse(sketch.isEstimationMode()); + Assert.assertEquals(sketch.getEstimate(), 0.0); + Assert.assertEquals(sketch.getUpperBound(1), 0.0); + Assert.assertEquals(sketch.getLowerBound(1), 0.0); + Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); + Assert.assertEquals(sketch.getTheta(), 1.0); + final ArrayOfDoublesSketchIterator it = sketch.iterator(); + while (it.next()) { + Assert.fail("empty sketch expected"); + } + } + + @Test + public void updatesOfAllKeyTypes() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch.update(1L, new double[] {1.0}); + sketch.update(2.0, new double[] {1.0}); + sketch.update(new byte[] {3}, new double[] {1.0}); + sketch.update(new int[] {4}, new double[] {1.0}); + sketch.update(new long[] {5L}, new double[] {1.0}); + sketch.update("a", new double[] {1.0}); + Assert.assertEquals(sketch.getEstimate(), 6.0); + } + + @Test + public void doubleSum() { + final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch.update(1, new double[] {1.0}); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + Assert.assertEquals(sketch.getValues()[0][0], 1.0); + sketch.update(1, new double[] {0.7}); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + Assert.assertEquals(sketch.getValues()[0][0], 1.7); + sketch.update(1, new double[] {0.8}); + Assert.assertEquals(sketch.getRetainedEntries(), 1); + Assert.assertEquals(sketch.getValues()[0][0], 2.5); + } + + @Test + public void serializeDeserializeExact() { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); + sketch1.update(1, new double[] {1.0}); + + final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); + + Assert.assertEquals(sketch2.getEstimate(), 1.0); + final double[][] values = sketch2.getValues(); + Assert.assertEquals(values.length, 1); + Assert.assertEquals(values[0][0], 1.0); + + // the same key, so still one unique + sketch2.update(1, new double[] {1.0}); + Assert.assertEquals(sketch2.getEstimate(), 1.0); + + sketch2.update(2, new double[] {1.0}); + Assert.assertEquals(sketch2.getEstimate(), 2.0); + } + + @Test + public void serializeDeserializeEstimationNoResize() throws Exception { + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). + setResizeFactor(ResizeFactor.X1).build(); + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 8192; i++) { + sketch1.update(i, new double[] {1.0}); + } + } + final byte[] byteArray = sketch1.toByteArray(); + + //for visual testing + //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); + + final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(MemorySegment.ofArray(byteArray)); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); + Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); + final double[][] values = sketch2.getValues(); + Assert.assertTrue(values.length >= 4096); + for (final double[] array: values) { + Assert.assertEquals(array[0], 10.0); + } + } + + @Test + public void serializeDeserializeSampling() { + final int sketchSize = 16384; + final int numberOfUniques = sketchSize; + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); + for (int i = 0; i < numberOfUniques; i++) { + sketch1.update(i, new double[] {1.0}); + } + final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); + Assert.assertTrue(sketch2.isEstimationMode()); + Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); + Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); + Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); + } + +} From a00f7325d96e12ad4b05c52f53eb928dacdd2c0f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Tue, 24 Jun 2025 18:48:12 -0700 Subject: [PATCH 16/25] Finished all of theta and tuple --- .../org/apache/datasketches/theta2/AnotB.java | 6 +- .../datasketches/theta2/CompactSketch.java | 7 +- .../theta2/DirectQuickSelectSketch.java | 24 +- .../theta2/DirectQuickSelectSketchR.java | 4 +- .../theta2/ForwardCompatibility.java | 6 +- .../datasketches/theta2/HeapAlphaSketch.java | 10 +- .../theta2/HeapQuickSelectSketch.java | 10 +- .../datasketches/theta2/Intersection.java | 8 +- .../datasketches/theta2/IntersectionImpl.java | 30 +- .../datasketches/theta2/PreambleUtil.java | 10 +- .../datasketches/theta2/SingleItemSketch.java | 2 +- .../apache/datasketches/theta2/Sketch.java | 13 +- .../apache/datasketches/theta2/UnionImpl.java | 1 - .../datasketches/theta2/UpdateSketch.java | 14 +- .../ArrayOfStringsSummaryDeserializer.java | 4 +- .../datasketches/theta2/AnotBimplTest.java | 26 +- .../theta2/CompactSketchTest.java | 28 +- ...ConcurrentDirectQuickSelectSketchTest.java | 4 +- .../theta2/DirectIntersectionTest.java | 102 +++---- .../theta2/DirectQuickSelectSketchTest.java | 20 +- .../datasketches/theta2/DirectUnionTest.java | 262 +++++++++--------- .../apache/datasketches/theta2/EmptyTest.java | 20 +- .../theta2/ForwardCompatibilityTest.java | 1 - .../theta2/HeapAlphaSketchTest.java | 20 +- .../theta2/HeapIntersectionTest.java | 8 +- .../theta2/HeapQuickSelectSketchTest.java | 36 +-- .../datasketches/theta2/HeapUnionTest.java | 38 +-- .../theta2/HeapifyWrapSerVer1and2Test.java | 91 +++--- .../datasketches/theta2/IteratorTest.java | 4 +- .../datasketches/theta2/PreambleUtilTest.java | 98 +++---- .../datasketches/theta2/SetOperationTest.java | 104 +++---- .../theta2/SingleItemSketchTest.java | 58 ++-- .../datasketches/theta2/UnionImplTest.java | 114 ++++---- .../strings/ArrayOfStringsSketchTest.java | 134 +++++++++ .../strings/ArrayOfStringsSummaryTest.java | 98 +++++++ 35 files changed, 820 insertions(+), 595 deletions(-) create mode 100644 src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java diff --git a/src/main/java/org/apache/datasketches/theta2/AnotB.java b/src/main/java/org/apache/datasketches/theta2/AnotB.java index 72f7e89b2..5e6efe16b 100644 --- a/src/main/java/org/apache/datasketches/theta2/AnotB.java +++ b/src/main/java/org/apache/datasketches/theta2/AnotB.java @@ -134,12 +134,11 @@ public Family getFamily() { * See Destination Ordered. * * @param dstSeg if not null the given MemorySegment will be the target location of the result. - * See Destination MemorySegment. * * @param reset If true, clears this operator to the empty state after this result is * returned. Set this to false if you wish to obtain an intermediate result. * - * @return the result of this operation as a {@link CompactSketch} in the given dstMem. + * @return the result of this operation as a {@link CompactSketch} in the given dstSeg. */ public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg, boolean reset); @@ -195,8 +194,7 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB) { * @param skB The incoming sketch for the second argument. It must not be null. * @param dstOrdered * See Destination Ordered. - * @param dstSeg - * See Destination MemorySegment. + * @param dstSeg the destination MemorySegment * @return the result as a CompactSketch. */ public abstract CompactSketch aNotB(Sketch skA, Sketch skB, boolean dstOrdered, diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java index 6ee764678..acd51cc21 100644 --- a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java @@ -92,7 +92,6 @@ public static CompactSketch heapify(final MemorySegment srcSeg) { * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

* * @param srcSeg an image of a CompactSketch that was created using the given expectedSeed. - * See Memory. * @param expectedSeed the seed used to validate the given Memory image. * See Update Hash Seed. * @return a CompactSketch on the heap. @@ -115,7 +114,7 @@ private static CompactSketch heapify(final MemorySegment srcSeg, final long seed final int flags = extractFlags(srcSeg); final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0; final boolean empty = (flags & EMPTY_FLAG_MASK) != 0; - if (enforceSeed && !empty) { PreambleUtil.checkMemorySeedHash(srcSeg, seed); } + if (enforceSeed && !empty) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } return CompactOperations.memoryToCompact(srcSeg, srcOrdered, null); } //not SerVer 3, assume compact stored form @@ -326,7 +325,7 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo //Sketch Overrides @Override - public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstMem); + public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); @Override public int getCompactBytes() { @@ -452,7 +451,7 @@ private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long se final int entryBits = extractEntryBitsV4(srcSeg); final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg); final short seedHash = (short) extractSeedHash(srcSeg); - if (enforceSeed) { PreambleUtil.checkMemorySeedHash(srcSeg, seed); } + if (enforceSeed) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } int offsetBytes = 8; long theta = Long.MAX_VALUE; if (preLongs > 1) { diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java index 213dd7f4a..64423a21c 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java @@ -34,7 +34,7 @@ import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta2.PreambleUtil.getSegBytes; import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; @@ -91,12 +91,12 @@ private DirectQuickSelectSketch( * @param seed See Update Hash Seed. * @param p * See Sampling Probability, p - * @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid + * @param rf Currently internally fixed at 2. Unless dstSeg is not configured with a valid * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the - * dstMem must be large enough for a full sketch. + * dstSeg must be large enough for a full sketch. * See Resize Factor * @param memReqSvr the given MemoryRequestServer - * @param dstSeg the given Memory object destination. It cannot be null. + * @param dstSeg the given MemorySegment object destination. It cannot be null. * It will be cleared prior to use. * @param unionGadget true if this sketch is implementing the Union gadget function. * Otherwise, it is behaving as a normal QuickSelectSketch. @@ -109,7 +109,7 @@ private DirectQuickSelectSketch( final MemorySegment dstSeg, final boolean unionGadget) { this( - checkMemSize(lgNomLongs, rf, dstSeg, unionGadget), + checkSegSize(lgNomLongs, rf, dstSeg, unionGadget), //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J lgNomLongs, @@ -171,16 +171,16 @@ private DirectQuickSelectSketch( hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); } - private static final boolean checkMemSize( + private static final boolean checkSegSize( final int lgNomLongs, final ResizeFactor rf, final MemorySegment dstSeg, final boolean unionGadget) { final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); final int lgRF = rf.lg(); final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); - final long curMemCapBytes = dstSeg.byteSize(); - if (curMemCapBytes < minReqBytes) { + final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); + final long curSegCapBytes = dstSeg.byteSize(); + if (curSegCapBytes < minReqBytes) { throw new SketchesArgumentException( - "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); + "MemorySegment capacity is too small: " + curSegCapBytes + " < " + minReqBytes); } return true; } @@ -198,7 +198,7 @@ static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final lo final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); - checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { //If incorrect it sets it to X2 which always works. @@ -297,7 +297,7 @@ UpdateReturnState hashUpdate(final long hash) { return InsertedCountIncrementedRebuilt; } //end of rebuild, exit - else { //Not at full size, resize. Should not get here if lgRF = 0 and memCap is too small. + else { //Not at full size, resize. Should not get here if lgRF = 0 and segCap is too small. final int lgRF = getLgRF(); final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF); int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1); diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java index b7c47de47..adbc8757c 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java @@ -87,7 +87,7 @@ static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final l final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); - checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); final DirectQuickSelectSketchR dqssr = new DirectQuickSelectSketchR(seed, srcSeg); @@ -172,7 +172,7 @@ public HashIterator iterator() { } @Override - public byte[] toByteArray() { //MY_FAMILY is stored in wmem_ + public byte[] toByteArray() { //MY_FAMILY is stored in wseg_ checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wseg_)); final int lengthBytes = getCurrentBytes(); final byte[] byteArray = new byte[lengthBytes]; diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java index a6635653c..ea23b4033 100644 --- a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java +++ b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java @@ -155,11 +155,11 @@ static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short s throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs); } - private static final void validateInputSize(final int reqBytesIn, final int memCap) { - if (reqBytesIn > memCap) { + private static final void validateInputSize(final int reqBytesIn, final int segCap) { + if (reqBytesIn > segCap) { throw new SketchesArgumentException( "Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn - + ", bytesIn: " + memCap); + + ", bytesIn: " + segCap); } } diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java index a3b7bd145..7450253cc 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java @@ -128,21 +128,21 @@ static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long ex final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 checkAlphaFamily(srcSeg, preambleLongs, lgNomLongs); - checkMemIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); + checkSegIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); final float p = extractP(srcSeg); //bytes 12-15 - final int memlgRF = extractLgResizeFactor(srcSeg); //byte 0 - ResizeFactor memRF = ResizeFactor.getRF(memlgRF); + final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor segRF = ResizeFactor.getRF(seglgRF); final double nomLongs = (1L << lgNomLongs); final double alpha = nomLongs / (nomLongs + 1.0); final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { - memRF = ResizeFactor.X2; //X2 always works. + segRF = ResizeFactor.X2; //X2 always works. } - final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, memRF, alpha, split1); + final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, segRF, alpha, split1); has.lgArrLongs_ = lgArrLongs; has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); has.curCount_ = extractCurCount(srcSeg); diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java index 082259c21..720f68ea5 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java @@ -113,19 +113,19 @@ static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final l final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); - checkMemIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); final float p = extractP(srcSeg); //bytes 12-15 - final int memlgRF = extractLgResizeFactor(srcSeg); //byte 0 - ResizeFactor memRF = ResizeFactor.getRF(memlgRF); + final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor segRF = ResizeFactor.getRF(seglgRF); final int familyID = extractFamilyID(srcSeg); final Family family = Family.idToFamily(familyID); if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { - memRF = ResizeFactor.X2; //X2 always works. + segRF = ResizeFactor.X2; //X2 always works. } - final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF, + final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, segRF, preambleLongs, family); hqss.lgArrLongs_ = lgArrLongs; hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java index 685dd3eac..2dd82bce0 100644 --- a/src/main/java/org/apache/datasketches/theta2/Intersection.java +++ b/src/main/java/org/apache/datasketches/theta2/Intersection.java @@ -60,7 +60,7 @@ public CompactSketch getResult() { } /** - * Gets the result of this operation as a CompactSketch in the given dstMem. + * Gets the result of this operation as a CompactSketch in the given dstSeg. * This does not disturb the underlying data structure of this intersection. * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an * exception will be thrown. This is because a virgin Intersection object represents the @@ -78,7 +78,7 @@ public CompactSketch getResult() { * * @param dstSeg the destination MemorySegment. * - * @return the result of this operation as a CompactSketch stored in the given dstMem, + * @return the result of this operation as a CompactSketch stored in the given dstSeg, * which can be either on or off-heap.. */ public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); @@ -188,7 +188,7 @@ static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs return cacheOut; } - protected static void memChecks(final MemorySegment srcSeg) { + protected static void segChecks(final MemorySegment srcSeg) { //Get Preamble //Note: Intersection does not use lgNomLongs (or k), per se. //seedHash loaded and checked in private constructor @@ -209,7 +209,7 @@ protected static void memChecks(final MemorySegment srcSeg) { if (empty) { if (curCount != 0) { throw new SketchesArgumentException( - "srcMem empty state inconsistent with curCount: " + empty + "," + curCount); + "srcSeg empty state inconsistent with curCount: " + empty + "," + curCount); } //empty = true AND curCount_ = 0: OK } //else empty = false, curCount could be anything diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java index 74228b1fa..855c1d953 100644 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -96,20 +96,20 @@ final class IntersectionImpl extends Intersection { * Constructor: Sets the class finals and computes, sets and checks the seedHash. * @param wseg Can be either a Source(e.g. wrap) or Destination (new offHeap) MemorySegment. * @param seed Used to validate incoming sketch arguments. - * @param dstMemFlag The given MemorySegment is a Destination (new offHeap) MemorySegment. + * @param dstSegFlag The given MemorySegment is a Destination (new offHeap) MemorySegment. * @param readOnly True if MemorySegment is to be treated as read only. */ - protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstMemFlag, + protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstSegFlag, final boolean readOnly) { readOnly_ = readOnly; if (wseg != null) { wseg_ = wseg; - if (dstMemFlag) { //DstMem: compute & store seedHash, no seedHash checking + if (dstSegFlag) { //DstSeg: compute & store seedHash, no seedHash checking checkMinSizeMemory(wseg); maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap seedHash_ = ThetaUtil.computeSeedHash(seed); wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); - } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed + } else { //SrcSeg:gets and stores the seedHash, checks seg_seedHash against the seed seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); ThetaUtil.checkSeedHashes(seedHash_, ThetaUtil.computeSeedHash(seed)); //check for seed hash conflict maxLgArrLongs_ = 0; @@ -129,9 +129,9 @@ protected IntersectionImpl(final MemorySegment wseg, final long seed, final bool * @return a new IntersectionImpl on the Java heap */ static IntersectionImpl initNewHeapInstance(final long seed) { - final boolean dstMemFlag = false; + final boolean dstSegFlag = false; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); impl.hardReset(); return impl; } @@ -162,9 +162,9 @@ static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegme //thetaLong set by hardReset //Initialize - final boolean dstMemFlag = true; + final boolean dstSegFlag = true; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstMemFlag, readOnly); + final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstSegFlag, readOnly); impl.hardReset(); return impl; } @@ -176,10 +176,10 @@ static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegme * @return a IntersectionImpl instance on the Java heap */ static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long seed) { - final boolean dstMemFlag = false; + final boolean dstSegFlag = false; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); - memChecks(srcSeg); + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); + segChecks(srcSeg); //Initialize impl.lgArrLongs_ = extractLgArrLongs(srcSeg); @@ -207,9 +207,9 @@ static IntersectionImpl wrapInstance( final MemorySegment srcSeg, final long seed, final boolean readOnly) { - final boolean dstMemFlag = false; - final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstMemFlag, readOnly); - memChecks(srcSeg); + final boolean dstSegFlag = false; + final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstSegFlag, readOnly); + segChecks(srcSeg); impl.lgArrLongs_ = extractLgArrLongs(srcSeg); impl.curCount_ = extractCurCount(srcSeg); impl.thetaLong_ = extractThetaLong(srcSeg); @@ -266,7 +266,7 @@ public void intersect(final Sketch sketchIn) { if (curCount_ == 0 || sketchInEntries == 0) { curCount_ = 0; if (wseg_ != null) { insertCurCount(wseg_, 0); } - hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid + hashTable_ = null; //No need for a HT. Don't bother clearing seg if valid } //end of states 1,2,3,6 // state 5 diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java index 564446ed4..931e3ccfe 100644 --- a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java @@ -208,7 +208,7 @@ private PreambleUtil() {} * @param preambleLongs current preamble size * @return the size in bytes */ - static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { + static final int getSegBytes(final int lgArrLongs, final int preambleLongs) { return (8 << lgArrLongs) + (preambleLongs << 3); } @@ -515,10 +515,10 @@ static int getAndCheckPreLongs(final MemorySegment seg) { return preLongs; } - static final short checkMemorySeedHash(final MemorySegment seg, final long seed) { - final short seedHashMem = (short) extractSeedHash(seg); - ThetaUtil.checkSeedHashes(seedHashMem, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash - return seedHashMem; + static final short checkSegmentSeedHash(final MemorySegment seg, final long seed) { + final short seedHashSeg = (short) extractSeedHash(seg); + ThetaUtil.checkSeedHashes(seedHashSeg, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash + return seedHashSeg; } private static void throwNotBigEnough(final long cap, final int required) { diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java index 222eef877..99d2dd71b 100644 --- a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java @@ -79,7 +79,7 @@ private SingleItemSketch(final long hash) { * Creates a SingleItemSketch on the heap given a SingleItemSketch MemorySegment image and a seedHash. * Checks the seed hash of the given MemorySegment against the given seedHash. * @param srcSeg the MemorySegment to be heapified. - * @param expectedSeedHash the given seedHash to be checked against the srcMem seedHash + * @param expectedSeedHash the given seedHash to be checked against the srcSeg seedHash * @return a SingleItemSketch */ //does not override Sketch static SingleItemSketch heapify(final MemorySegment srcSeg, final short expectedSeedHash) { diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java index 5d619c580..bec936a8c 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -123,7 +123,6 @@ public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed * correct hash seed, so it is not checked.

* * @param srcSeg an image of a Sketch. - * See Memory. * @return a Sketch backed by the given MemorySegment */ public static Sketch wrap(final MemorySegment srcSeg) { @@ -221,9 +220,9 @@ public CompactSketch compact() { *

A CompactSketch is always immutable.

* *

A new CompactSketch object is created:

- *
  • if dstMem != null
  • - *
  • if dstMem == null and this.hasMemorySegment() == true
  • - *
  • if dstMem == null and this has more than 1 item and this.isOrdered() == false + *
    • if dstSeg!= null
    • + *
    • if dstSeg == null and this.hasMemorySegment() == true
    • + *
    • if dstSeg == null and this has more than 1 item and this.isOrdered() == false * and dstOrdered == true.
    • *
    * @@ -536,11 +535,11 @@ public static String toString(final byte[] byteArr) { /** * Returns a human readable string of the preamble of a MemorySegment image of a Theta Sketch. - * @param mem the given Memory object + * @param seg the given MemorySegment object * @return a human readable string of the preamble of a MemorySegment image of a Theta Sketch. */ - public static String toString(final MemorySegment mem) { - return PreambleUtil.preambleToString(mem); + public static String toString(final MemorySegment seg) { + return PreambleUtil.preambleToString(seg); } //Restricted methods diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java index f3bdbe2f4..1b931b974 100644 --- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java @@ -95,7 +95,6 @@ static UnionImpl initNewHeapInstance( * @param seed See seed * @param p See Sampling Probability, p * @param rf See Resize Factor - * @param memReqSvr a given instance of a MemoryRequestServer * @param dstSeg the given MemorySegment object destination. It will be cleared prior to use. * @return this class */ diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java index 8a8dc5fd3..891792cac 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java @@ -33,14 +33,14 @@ import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.checkMemorySeedHash; +import static org.apache.datasketches.theta2.PreambleUtil.checkSegmentSeedHash; import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; import static org.apache.datasketches.theta2.PreambleUtil.extractP; import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta2.PreambleUtil.getSegBytes; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedNullOrEmpty; import java.lang.foreign.MemorySegment; @@ -420,7 +420,7 @@ else if (family.equals(Family.QUICKSELECT)) { } } - static void checkMemIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs, + static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs, final int lgNomLongs, final int lgArrLongs) { //Check SerVer @@ -436,16 +436,16 @@ static void checkMemIntegrity(final MemorySegment srcSeg, final long expectedSee ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK; if ((flags & flagsMask) > 0) { throw new SketchesArgumentException( - "Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only"); + "Possible corruption: Input srcSeg cannot be: big-endian, compact, ordered, nor read-only"); } //Check seed hashes - final short seedHash = checkMemorySeedHash(srcSeg, expectedSeed); //byte 6,7 + final short seedHash = checkSegmentSeedHash(srcSeg, expectedSeed); //byte 6,7 ThetaUtil.checkSeedHashes(seedHash, ThetaUtil.computeSeedHash(expectedSeed)); - //Check mem capacity, lgArrLongs + //Check seg capacity, lgArrLongs final long curCapBytes = srcSeg.byteSize(); - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); if (curCapBytes < minReqBytes) { throw new SketchesArgumentException( "Possible corruption: Current Memory size < min required size: " diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java index f48f6c95e..3a82da6df 100644 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java @@ -34,7 +34,7 @@ public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer heapifySummary(final MemorySegment seg) { - return ArrayOfStringsSummaryDeserializer.fromMemory(seg); + return ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); } /** @@ -42,7 +42,7 @@ public DeserializeResult heapifySummary(final MemorySegme * @param seg the given MemorySegment * @return the DeserializeResult */ - static DeserializeResult fromMemory(final MemorySegment seg) { + static DeserializeResult fromMemorySegment(final MemorySegment seg) { final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(seg); final int totBytes = seg.get(JAVA_INT_UNALIGNED, 0); return new DeserializeResult<>(nsum, totBytes); diff --git a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java index a192e9875..1dba0bb4b 100644 --- a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java +++ b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java @@ -214,12 +214,12 @@ public void checkAnotBnotC() { final UpdateSketch cU = UpdateSketch.builder().setNominalEntries(k).build(); for (int i=k/2; i<3*k/4; i++) { cU.update(i); } //third 256 - final int memBytes = Sketch.getMaxUpdateSketchBytes(k); + final int segBytes = Sketch.getMaxUpdateSketchBytes(k); CompactSketch result1, result2, result3; - final MemorySegment wmem1 = MemorySegment.ofArray(new byte[memBytes]); - final MemorySegment wmem2 = MemorySegment.ofArray(new byte[memBytes]); - final MemorySegment wmem3 = MemorySegment.ofArray(new byte[memBytes]); + final MemorySegment wseg1 = MemorySegment.ofArray(new byte[segBytes]); + final MemorySegment wseg2 = MemorySegment.ofArray(new byte[segBytes]); + final MemorySegment wseg3 = MemorySegment.ofArray(new byte[segBytes]); final AnotB aNb = SetOperation.builder().buildANotB(); @@ -227,11 +227,11 @@ public void checkAnotBnotC() { aNb.setA(aU); //stateful - result1 = aNb.aNotB(aU, bU, ordered, wmem1); //stateless + result1 = aNb.aNotB(aU, bU, ordered, wseg1); //stateless aNb.notB(bU); //stateful - result2 = aNb.aNotB(result1, cU, ordered, wmem2); //stateless + result2 = aNb.aNotB(result1, cU, ordered, wseg2); //stateless aNb.notB(cU); //stateful @@ -239,13 +239,13 @@ public void checkAnotBnotC() { println("est: "+est2); assertEquals(est2, k/4.0, 0.0); - result3 = aNb.getResult(ordered, wmem3, true); //stateful result, then reset + result3 = aNb.getResult(ordered, wseg3, true); //stateful result, then reset final double est3 = result3.getEstimate(); assertEquals(est3, k/4.0, 0.0); } @Test - public void checkAnotBnotC_sameMemory() { + public void checkAnotBnotC_sameMemorySegment() { final int k = 1024; final boolean ordered = true; @@ -258,8 +258,8 @@ public void checkAnotBnotC_sameMemory() { final UpdateSketch c = UpdateSketch.builder().setNominalEntries(k).build(); for (int i=k/2; i<3*k/4; i++) { c.update(i); } //third 256 - final int memBytes = Sketch.getMaxCompactSketchBytes(a.getRetainedEntries(true)); - final MemorySegment mem = MemorySegment.ofArray(new byte[memBytes]); + final int segBytes = Sketch.getMaxCompactSketchBytes(a.getRetainedEntries(true)); + final MemorySegment seg = MemorySegment.ofArray(new byte[segBytes]); CompactSketch result1, result2; final AnotB aNb = SetOperation.builder().buildANotB(); @@ -268,15 +268,15 @@ public void checkAnotBnotC_sameMemory() { aNb.setA(a); //stateful - result1 = aNb.aNotB(a, b, ordered, mem); //stateless + result1 = aNb.aNotB(a, b, ordered, seg); //stateless aNb.notB(b); //stateful - result1 = aNb.aNotB(result1, c, ordered, mem); //stateless + result1 = aNb.aNotB(result1, c, ordered, seg); //stateless aNb.notB(c); //stateful - result2 = aNb.getResult(ordered, mem, true); //stateful result, then reset + result2 = aNb.getResult(ordered, seg, true); //stateful result, then reset final double est1 = result1.getEstimate(); //check stateless result println("est: "+est1); diff --git a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java index 6ad5e8cdc..91b1a40a2 100644 --- a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java @@ -184,7 +184,7 @@ public void checkDirectSingleItemSketch() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMemTooSmall() { + public void checkSegTooSmall() { int k = 512; int u = k; boolean ordered = false; @@ -200,7 +200,7 @@ public void checkMemTooSmall() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMemTooSmallOrdered() { + public void checkSegTooSmallOrdered() { int k = 512; int u = k; boolean ordered = true; @@ -235,7 +235,7 @@ public void checkCompactCachePart() { @Test /** - * Empty, memory-based Compact sketches are always ordered + * Empty, segment-based Compact sketches are always ordered */ public void checkEmptyMemoryCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); @@ -275,7 +275,7 @@ public void checkEmptyMemoryCompactSketch() { @Test /** - * Single-Item, memory-based Compact sketches are always ordered: + * Single-Item, segment-based Compact sketches are always ordered: */ public void checkSingleItemMemoryCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); @@ -290,14 +290,14 @@ public void checkSingleItemMemoryCompactSketch() { CompactSketch csk2 = sk.compact(false, wseg2); //the first parameter is ignored when single item state1.check(csk2); - assertNotEquals(csk1, csk2); //different object because memory is valid + assertNotEquals(csk1, csk2); //different object because segment is valid assertFalse(csk1 == csk2); MemorySegment wseg3 = MemorySegment.ofArray(new byte[16]); CompactSketch csk3 = csk1.compact(false, wseg3); state1.check(csk3); - assertNotEquals(csk1, csk3); //different object because memory is valid + assertNotEquals(csk1, csk3); //different object because segment is valid assertFalse(csk1 == csk3); CompactSketch cskc = csk1.compact(); @@ -325,14 +325,14 @@ public void checkMultipleItemMemoryCompactSketch() { State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION); state2.check(csk2); - assertNotEquals(csk1, csk2); //different object because memory is valid + assertNotEquals(csk1, csk2); //different object because segment is valid assertFalse(csk1 == csk2); MemorySegment wseg3 = MemorySegment.ofArray(new byte[50]); CompactSketch csk3 = csk1.compact(false, wseg3); state2.check(csk3); - assertNotEquals(csk1, csk3); //different object because memory is valid + assertNotEquals(csk1, csk3); //different object because segment is valid assertFalse(csk1 == csk3); CompactSketch cskc = csk1.compact(); @@ -426,10 +426,10 @@ public void checkMultipleItemHeapCompactSketch() { CompactSketch csk3 = csk1.compact(true, null); state1.check(csk3); - assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered + assertEquals(csk1, csk3); //the same object because wseg = null and csk1.ordered = dstOrdered assertTrue(csk1 == csk3); - assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true + assertNotEquals(csk2, csk3); //different object because wseg = null and csk2.ordered = false && dstOrdered = true assertFalse(csk2 == csk3); CompactSketch cskc = csk1.compact(); @@ -629,19 +629,19 @@ private static class State { boolean compact = false; boolean empty = false; boolean direct = false; - boolean memSeg = false; + boolean hasSeg = false; boolean ordered = false; boolean estimation = false; State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct, - boolean memory, boolean ordered, boolean estimation) { + boolean hasSeg, boolean ordered, boolean estimation) { this.classType = classType; this.count = count; this.bytes = bytes; this.compact = compact; this.empty = empty; this.direct = direct; - this.memSeg = memory; + this.hasSeg = hasSeg; this.ordered = ordered; this.estimation = estimation; } @@ -653,7 +653,7 @@ void check(CompactSketch csk) { assertEquals(csk.isCompact(), compact, "Compact"); assertEquals(csk.isEmpty(), empty, "Empty"); assertEquals(csk.isDirect(), direct, "Direct"); - assertEquals(csk.hasMemorySegment(), memSeg, "MemorySegment"); + assertEquals(csk.hasMemorySegment(), hasSeg, "MemorySegment"); assertEquals(csk.isOrdered(), ordered, "Ordered"); assertEquals(csk.isEstimationMode(), estimation, "Estimation"); } diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java index 7a7b89cef..60d409cde 100644 --- a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java @@ -525,7 +525,7 @@ public void checkNullMemory() { //checks Alex's bug where lgArrLongs > lgNomLongs +1. @Test - public void checkResizeInBigMem() { + public void checkResizeInBigSeg() { int lgK = 14; int u = 1 << 20; boolean useSeg = true; @@ -543,7 +543,7 @@ public void checkConstructorKtooSmall() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkConstructorMemTooSmall() { + public void checkConstructorSegTooSmall() { int lgK = 4; int k = 1 << lgK; MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]); diff --git a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java index c27c3b085..20eb9a382 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java @@ -63,9 +63,9 @@ public void checkExactIntersectionNoOverlap() { final int segBytes = getMaxIntersectionBytes(k); final byte[] segArr = new byte[segBytes]; - final MemorySegment iMem = MemorySegment.ofArray(segArr); + final MemorySegment iSeg = MemorySegment.ofArray(segArr); - inter = SetOperation.builder().buildIntersection(iMem); + inter = SetOperation.builder().buildIntersection(iSeg); inter.intersect(usk1); inter.intersect(usk2); @@ -112,9 +112,9 @@ public void checkExactIntersectionFullOverlap() { final int segBytes = getMaxIntersectionBytes(k); final byte[] segArr = new byte[segBytes]; - final MemorySegment iMem = MemorySegment.ofArray(segArr); + final MemorySegment iSeg = MemorySegment.ofArray(segArr); - inter = SetOperation.builder().buildIntersection(iMem); + inter = SetOperation.builder().buildIntersection(iSeg); inter.intersect(usk1); inter.intersect(usk2); @@ -156,13 +156,13 @@ public void checkIntersectionEarlyStop() { final int segBytes = getMaxIntersectionBytes(k); final byte[] segArr = new byte[segBytes]; - final MemorySegment iMem = MemorySegment.ofArray(segArr); + final MemorySegment iSeg = MemorySegment.ofArray(segArr); final CompactSketch csk1 = usk1.compact(true, null); final CompactSketch csk2 = usk2.compact(true, null); final Intersection inter = - SetOperation.builder().buildIntersection(iMem); + SetOperation.builder().buildIntersection(iSeg); inter.intersect(csk1); inter.intersect(csk2); @@ -179,9 +179,9 @@ public void checkNoCall() { final int segBytes = getMaxIntersectionBytes(k); final byte[] segArr = new byte[segBytes]; - final MemorySegment iMem = MemorySegment.ofArray(segArr); + final MemorySegment iSeg = MemorySegment.ofArray(segArr); - inter = SetOperation.builder().buildIntersection(iMem); + inter = SetOperation.builder().buildIntersection(iSeg); assertFalse(inter.hasResult()); inter.getResult(false, null); } @@ -192,8 +192,8 @@ public void checkIntersectionNull() { final int k = 1< lgNomLongs +1. @Test - public void checkResizeInBigMem() { + public void checkResizeInBigSeg() { int k = 1 << 14; int u = 1 << 20; MemorySegment seg = MemorySegment.ofArray(new byte[(8*k*16) +24]); diff --git a/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java index ae3e84de6..2d911497a 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java @@ -62,8 +62,8 @@ public void checkExactUnionNoOverlap() { assertEquals(u, usk1.getEstimate() + usk2.getEstimate(), 0.0); //exact, no overlap - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(usk2); //update with heap UpdateSketch @@ -87,8 +87,8 @@ public void checkEstUnionNoOverlap() { usk2.update(i); //2*k no overlap } - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(usk2); //update with heap UpdateSketch @@ -114,8 +114,8 @@ public void checkExactUnionWithOverlap() { assertEquals(u, usk1.getEstimate() + usk2.getEstimate()/2, 0.0); //exact, overlapped - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(usk2); //update with heap UpdateSketch @@ -141,8 +141,8 @@ public void checkHeapifyExact() { assertEquals(u, usk1.getEstimate() + usk2.getEstimate(), 0.0); //exact, no overlap - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(usk2); //update with heap UpdateSketch @@ -173,8 +173,8 @@ public void checkWrapExact() { assertEquals(u, usk1.getEstimate() + usk2.getEstimate(), 0.0); //exact, no overlap - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(usk2); //update with heap UpdateSketch @@ -202,8 +202,8 @@ public void checkWrapEstNoOverlap() { usk2.update(i); //2k no overlap, exact } - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(usk2); //update with heap UpdateSketch, early stop not possible @@ -233,8 +233,8 @@ public void checkWrapEstNoOverlapOrderedIn() { final CompactSketch cosk2 = usk2.compact(true, null); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //update with heap UpdateSketch union.union(cosk2); //update with heap Compact, Ordered input, early stop @@ -270,11 +270,11 @@ public void checkWrapEstNoOverlapOrderedDirectIn() { usk2.update(i); //2k no overlap, exact, will force early stop } - final MemorySegment cskMem2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); - final CompactSketch cosk2 = usk2.compact(true, cskMem2); //ordered, loads the cskMem2 as ordered + final MemorySegment cskSeg2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); + final CompactSketch cosk2 = usk2.compact(true, cskSeg2); //ordered, loads the cskSeg2 as ordered - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //updates with heap UpdateSketch union.union(cosk2); //updates with direct CompactSketch, ordered, use early stop @@ -295,7 +295,7 @@ public void checkWrapEstNoOverlapOrderedDirectIn() { } @Test - public void checkWrapEstNoOverlapOrderedMemIn() { + public void checkWrapEstNoOverlapOrderedSegIn() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u = 4*k; @@ -310,14 +310,14 @@ public void checkWrapEstNoOverlapOrderedMemIn() { usk2.update(i); //2k no overlap, exact, will force early stop } - final MemorySegment cskMem2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); - usk2.compact(true, cskMem2); //ordered, loads the cskMem2 as ordered + final MemorySegment cskSeg2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); + usk2.compact(true, cskSeg2); //ordered, loads the cskSeg2 as ordered - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //updates with heap UpdateSketch - union.union(cskMem2); //updates with direct CompactSketch, ordered, use early stop + union.union(cskSeg2); //updates with direct CompactSketch, ordered, use early stop UpdateSketch emptySketch = UpdateSketch.builder().setNominalEntries(k).build(); union.union(emptySketch); //updates with empty sketch @@ -335,7 +335,7 @@ public void checkWrapEstNoOverlapOrderedMemIn() { } @Test - public void checkWrapEstNoOverlapUnorderedMemIn() { + public void checkWrapEstNoOverlapUnorderedSegIn() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u = 4*k; @@ -350,14 +350,14 @@ public void checkWrapEstNoOverlapUnorderedMemIn() { usk2.update(i); //2k no overlap, exact, will force early stop } - final MemorySegment cskMem2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); - usk2.compact(false, cskMem2); //unordered, loads the cskMem2 as unordered + final MemorySegment cskSeg2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); + usk2.compact(false, cskSeg2); //unordered, loads the cskSeg2 as unordered - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //updates with heap UpdateSketch - union.union(cskMem2); //updates with direct CompactSketch, ordered, use early stop + union.union(cskSeg2); //updates with direct CompactSketch, ordered, use early stop UpdateSketch emptySketch = UpdateSketch.builder().setNominalEntries(k).build(); union.union(emptySketch); //updates with empty sketch @@ -404,8 +404,8 @@ public void checkMultiUnion() { } v += u; - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(usk1); //updates with heap UpdateSketch union.union(usk2); //updates with heap UpdateSketch @@ -418,7 +418,7 @@ public void checkMultiUnion() { } @Test - public void checkDirectMemoryIn() { + public void checkDirectSegmentIn() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u1 = 2*k; @@ -435,14 +435,14 @@ public void checkDirectMemoryIn() { usk2.update(i); //2*k + 1024 no overlap } - final MemorySegment skMem1 = MemorySegment.ofArray(usk1.compact(false, null).toByteArray()).asReadOnly(); - final MemorySegment skMem2 = MemorySegment.ofArray(usk2.compact(true, null).toByteArray()).asReadOnly(); + final MemorySegment skSeg1 = MemorySegment.ofArray(usk1.compact(false, null).toByteArray()).asReadOnly(); + final MemorySegment skSeg2 = MemorySegment.ofArray(usk2.compact(true, null).toByteArray()).asReadOnly(); - final CompactSketch csk1 = (CompactSketch)Sketch.wrap(skMem1); - final CompactSketch csk2 = (CompactSketch)Sketch.wrap(skMem2); + final CompactSketch csk1 = (CompactSketch)Sketch.wrap(skSeg1); + final CompactSketch csk2 = (CompactSketch)Sketch.wrap(skSeg2); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(csk1); union.union(csk2); @@ -469,14 +469,14 @@ public void checkSerVer1Handling() { usk2.update(i); //2*k + 1024 no overlap } - final MemorySegment v1mem1 = convertSerVer3toSerVer1(usk1.compact(true, null)).asReadOnly(); - final MemorySegment v1mem2 = convertSerVer3toSerVer1(usk2.compact(true, null)).asReadOnly(); + final MemorySegment v1seg1 = convertSerVer3toSerVer1(usk1.compact(true, null)).asReadOnly(); + final MemorySegment v1seg2 = convertSerVer3toSerVer1(usk2.compact(true, null)).asReadOnly(); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); - union.union(v1mem1); - union.union(v1mem2); + union.union(v1seg1); + union.union(v1seg2); final CompactSketch cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), totU, .05*k); @@ -500,58 +500,58 @@ public void checkSerVer2Handling() { usk2.update(i); //2*k + 1024 no overlap } - final MemorySegment v2mem1 = convertSerVer3toSerVer2(usk1.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); - final MemorySegment v2mem2 = convertSerVer3toSerVer2(usk2.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg2 = convertSerVer3toSerVer2(usk2.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); - union.union(v2mem1); - union.union(v2mem2); + union.union(v2seg1); + union.union(v2seg2); final CompactSketch cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), totU, .05*k); } @Test - public void checkUpdateMemorySpecialCases() { + public void checkUpdateSegmentSpecialCases() { final int lgK = 12; //4096 final int k = 1 << lgK; final UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).build(); final CompactSketch usk1c = usk1.compact(true, null); - MemorySegment v3mem1 = MemorySegment.ofArray(usk1c.toByteArray()); + MemorySegment v3seg1 = MemorySegment.ofArray(usk1c.toByteArray()); - final MemorySegment v1mem1 = convertSerVer3toSerVer1(usk1c).asReadOnly(); + final MemorySegment v1seg1 = convertSerVer3toSerVer1(usk1c).asReadOnly(); - MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); - union.union(v1mem1); + MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); + union.union(v1seg1); CompactSketch cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), 0.0, 0.0); - final MemorySegment v2mem1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); - uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); - union.union(v2mem1); + uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); + union.union(v2seg1); cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), 0.0, 0.0); - uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); - union.union(v3mem1); + uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); + union.union(v3seg1); cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), 0.0, 0.0); - uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), 0.0, 0.0); } @Test - public void checkUpdateMemorySpecialCases2() { + public void checkUpdateSegmentSpecialCases2() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u = 2*k; @@ -562,15 +562,15 @@ public void checkUpdateMemorySpecialCases2() { usk1.update(i); //force prelongs to 3 } final CompactSketch usk1c = usk1.compact(true, null); - final MemorySegment v3mem1 = MemorySegment.ofArray(usk1c.toByteArray()); + final MemorySegment v3seg1 = MemorySegment.ofArray(usk1c.toByteArray()); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); - union.union(v3mem1); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); + union.union(v3seg1); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMemBadSerVer() { + public void checkSegBadSerVer() { final int lgK = 12; //4096 final int k = 1 << lgK; @@ -578,14 +578,14 @@ public void checkMemBadSerVer() { usk1.update(1); usk1.update(2); final CompactSketch usk1c = usk1.compact(true, null); - final MemorySegment v3mem1 = MemorySegment.ofArray(usk1c.toByteArray()); + final MemorySegment v3seg1 = MemorySegment.ofArray(usk1c.toByteArray()); //corrupt SerVer - v3mem1.set(JAVA_BYTE, SER_VER_BYTE, (byte)0); + v3seg1.set(JAVA_BYTE, SER_VER_BYTE, (byte)0); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); - union.union(v3mem1); + union.union(v3seg1); } @Test @@ -596,27 +596,27 @@ public void checkEmptySerVer2and3() { final CompactSketch usk1c = usk1.compact(true, null); final byte[] skArr = usk1c.toByteArray(); final byte[] skArr2 = Arrays.copyOf(skArr, skArr.length * 2); - final MemorySegment v3mem1 = MemorySegment.ofArray(skArr2); + final MemorySegment v3seg1 = MemorySegment.ofArray(skArr2); - MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); - union.union(v3mem1); + MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); + union.union(v3seg1); - final MemorySegment v2mem1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); - final MemorySegment v2mem2 = MemorySegment.ofArray(new byte[16]); - MemorySegment.copy(v2mem1, 0, v2mem2, 0, 8); + final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg2 = MemorySegment.ofArray(new byte[16]); + MemorySegment.copy(v2seg1, 0, v2seg2, 0, 8); - uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); - union.union(v2mem2); + uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); + union.union(v2seg2); } //Special DirectUnion cases @Test //Himanshu's issue public void checkDirectWrap() { final int nomEntries = 16; - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(nomEntries)]); - SetOperation.builder().setNominalEntries(nomEntries).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(nomEntries)]); + SetOperation.builder().setNominalEntries(nomEntries).buildUnion(uSeg); final UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(nomEntries).build(); sk1.update("a"); @@ -626,10 +626,10 @@ public void checkDirectWrap() { sk2.update("c"); sk2.update("d"); - Union union = Sketches.wrapUnion(uMem); + Union union = Sketches.wrapUnion(uSeg); union.union(sk1); - union = Sketches.wrapUnion(uMem); + union = Sketches.wrapUnion(uSeg); union.union(sk2); final CompactSketch sketch = union.getResult(true, null); @@ -640,11 +640,11 @@ public void checkDirectWrap() { public void checkEmptyUnionCompactResult() { final int k = 64; - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); - final MemorySegment mem = MemorySegment.ofArray(new byte[Sketch.getMaxCompactSketchBytes(0)]); - final CompactSketch csk = union.getResult(false, mem); //DirectCompactSketch + final MemorySegment seg = MemorySegment.ofArray(new byte[Sketch.getMaxCompactSketchBytes(0)]); + final CompactSketch csk = union.getResult(false, seg); //DirectCompactSketch assertTrue(csk.isEmpty()); } @@ -652,20 +652,20 @@ public void checkEmptyUnionCompactResult() { public void checkEmptyUnionCompactOrderedResult() { final int k = 64; - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); - final MemorySegment mem = MemorySegment.ofArray(new byte[Sketch.getMaxCompactSketchBytes(0)]); - final CompactSketch csk = union.getResult(true, mem); //DirectCompactSketch + final MemorySegment seg = MemorySegment.ofArray(new byte[Sketch.getMaxCompactSketchBytes(0)]); + final CompactSketch csk = union.getResult(true, seg); //DirectCompactSketch assertTrue(csk.isEmpty()); } @Test - public void checkUnionMemToString() { + public void checkUnionSegToString() { final int k = 64; - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union memory - SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment + SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); } @Test @@ -673,11 +673,11 @@ public void checkGetResult() { final int k = 1024; final UpdateSketch sk = Sketches.updateSketchBuilder().build(); - final int memBytes = getMaxUnionBytes(k); - final byte[] memArr = new byte[memBytes]; - final MemorySegment iMem = MemorySegment.ofArray(memArr); + final int segBytes = getMaxUnionBytes(k); + final byte[] segArr = new byte[segBytes]; + final MemorySegment iSeg = MemorySegment.ofArray(segArr); - final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(iMem); + final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(iSeg); union.union(sk); final CompactSketch csk = union.getResult(); assertEquals(csk.getCompactBytes(), 8); @@ -686,8 +686,8 @@ public void checkGetResult() { @Test public void checkPrimitiveUpdates() { final int k = 32; - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.update(1L); union.update(1.5); //#1 double @@ -729,35 +729,35 @@ public void checkPrimitiveUpdates() { @Test public void checkGetFamily() { final int k = 16; - final MemorySegment mem = MemorySegment.ofArray(new byte[k*16 +32]); - final SetOperation setOp = new SetOperationBuilder().setNominalEntries(k).build(Family.UNION, mem); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 +32]); + final SetOperation setOp = new SetOperationBuilder().setNominalEntries(k).build(Family.UNION, seg); assertEquals(setOp.getFamily(), Family.UNION); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkPreambleLongsCorruption() { final int k = 16; - final MemorySegment mem = MemorySegment.ofArray(new byte[k*16 +32]); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 +32]); - final SetOperation setOp = new SetOperationBuilder().setNominalEntries(k).build(Family.UNION, mem); + final SetOperation setOp = new SetOperationBuilder().setNominalEntries(k).build(Family.UNION, seg); println(setOp.toString()); - final int familyID = PreambleUtil.extractFamilyID(mem); - final int preLongs = PreambleUtil.extractPreLongs(mem); + final int familyID = PreambleUtil.extractFamilyID(seg); + final int preLongs = PreambleUtil.extractPreLongs(seg); assertEquals(familyID, Family.UNION.getID()); assertEquals(preLongs, Family.UNION.getMaxPreLongs()); - PreambleUtil.insertPreLongs(mem, 3); //Corrupt with 3; correct value is 4 - DirectQuickSelectSketch.writableWrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + PreambleUtil.insertPreLongs(seg, 3); //Corrupt with 3; correct value is 4 + DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkSizeTooSmall() { final int k = 16; - final MemorySegment mem = MemorySegment.ofArray(new byte[k*16 +32]); //initialized - final SetOperation setOp = new SetOperationBuilder().setNominalEntries(k).build(Family.UNION, mem); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 +32]); //initialized + final SetOperation setOp = new SetOperationBuilder().setNominalEntries(k).build(Family.UNION, seg); println(setOp.toString()); - final MemorySegment mem2 = MemorySegment.ofArray(new byte[32]); //for just preamble - MemorySegment.copy(mem, 0, mem2, 0, 32); //too small - DirectQuickSelectSketch.writableWrap(mem2, ThetaUtil.DEFAULT_UPDATE_SEED); + final MemorySegment seg2 = MemorySegment.ofArray(new byte[32]); //for just preamble + MemorySegment.copy(seg, 0, seg2, 0, 32); //too small + DirectQuickSelectSketch.writableWrap(seg2, ThetaUtil.DEFAULT_UPDATE_SEED); } @Test @@ -771,9 +771,9 @@ public void checkForDruidBug() { final Sketch s = usk.compact(); - //create empty target union in off-heap mem - final MemorySegment mem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(mem); + //create empty target union in off-heap segment + final MemorySegment seg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(seg); union1.union(s); @@ -787,21 +787,21 @@ public void checkForDruidBug() { } @Test - public void checkForDruidBug2() { //update union with just sketch memory reference + public void checkForDruidBug2() { //update union with just sketch segment reference final int k = 16384; final UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(); for (int i = 0; i < 100000; i++) { usk.update(Integer.toString(i)); } usk.rebuild(); //optional but created the symptom - final MemorySegment memIn = MemorySegment.ofArray(new byte[usk.getCompactBytes()]); - usk.compact(true, memIn); //side effect of loading the memIn + final MemorySegment segIn = MemorySegment.ofArray(new byte[usk.getCompactBytes()]); + usk.compact(true, segIn); //side effect of loading the segIn - //create empty target union in off-heap mem - final MemorySegment mem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(mem); + //create empty target union in off-heap segment + final MemorySegment seg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(seg); - union1.union(memIn); + union1.union(segIn); final CompactSketch csk = union1.getResult(); diff --git a/src/test/java/org/apache/datasketches/theta2/EmptyTest.java b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java index c5492cf34..01e16cf25 100644 --- a/src/test/java/org/apache/datasketches/theta2/EmptyTest.java +++ b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java @@ -113,23 +113,23 @@ public void checkBackwardCompatibility1() { final int k = 16; final int bytes = Sketches.getMaxUnionBytes(k); //288 final Union union = SetOperation.builder().buildUnion(MemorySegment.ofArray(new byte[bytes])); - final MemorySegment mem = badEmptySk(); - final Sketch wsk = Sketches.wrapSketch(mem); - union.union(wsk); //union has memory + final MemorySegment seg = badEmptySk(); + final Sketch wsk = Sketches.wrapSketch(seg); + union.union(wsk); //union has segment } @Test public void checkBackwardCompatibility2() { final Union union = SetOperation.builder().setNominalEntries(16).buildUnion(); - final MemorySegment mem = badEmptySk(); - final Sketch wsk = Sketches.wrapSketch(mem); + final MemorySegment seg = badEmptySk(); + final Sketch wsk = Sketches.wrapSketch(seg); union.union(wsk); //heap union } @Test public void checkBackwardCompatibility3() { - final MemorySegment mem = badEmptySk(); - Sketches.heapifySketch(mem); + final MemorySegment seg = badEmptySk(); + Sketches.heapifySketch(seg); } @Test @@ -154,9 +154,9 @@ private static MemorySegment badEmptySk() { final long seedHash = 0x93CC; final long badEmptySk = seedHash << 48 | flags << 40 | family << 16 | serVer << 8 | preLongs; - final MemorySegment wmem = MemorySegment.ofArray(new byte[8]); - wmem.set(JAVA_LONG_UNALIGNED, 0, badEmptySk); - return wmem; + final MemorySegment wseg = MemorySegment.ofArray(new byte[8]); + wseg.set(JAVA_LONG_UNALIGNED, 0, badEmptySk); + return wseg; } /** diff --git a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java index f04b01829..546d0524f 100644 --- a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java @@ -190,7 +190,6 @@ public void checkSerVer2_3PreLongs_1Value_ThLessthan1() { MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1 - //srcMemW.putLong(16, Long.MAX_VALUE); long[] cache = csk.getCache(); srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value diff --git a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java index 47420ae0e..a0d6cadae 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java @@ -86,7 +86,7 @@ public void checkConstructorKtooSmall() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkAlphaIncompatibleWithMem() { + public void checkAlphaIncompatibleWithSeg() { MemorySegment seg = MemorySegment.ofArray(new byte[(512*16)+24]); UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(512).build(seg); } @@ -112,7 +112,7 @@ public void checkIllegalSketchID_UpdateSketch() { MemorySegment seg = MemorySegment.ofArray(byteArray); seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to heapify the corrupted mem + //try to heapify the corrupted seg Sketch.heapify(seg, seed); } @@ -266,8 +266,8 @@ public void checkAlphaToCompactForms() { int bytes = usk.getCompactBytes(); int alphaBytes = sk1.getRetainedEntries(true) * 8; assertEquals(bytes, alphaBytes + (Family.COMPACT.getMaxPreLongs() << 3)); - byte[] memArr2 = new byte[bytes]; - MemorySegment seg2 = MemorySegment.ofArray(memArr2); + byte[] segArr2 = new byte[bytes]; + MemorySegment seg2 = MemorySegment.ofArray(segArr2); comp3 = usk.compact(false, seg2); @@ -587,7 +587,7 @@ public void checkNegativeHashes() { } @Test - public void checkMemDeSerExceptions() { + public void checkSegDeSerExceptions() { int k = 1024; UpdateSketch sk1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); sk1.update(1L); //forces preLongs to 3 @@ -595,16 +595,16 @@ public void checkMemDeSerExceptions() { MemorySegment seg = MemorySegment.ofArray(bytearray1); long pre0 = seg.get(JAVA_LONG_UNALIGNED, 0); - tryBadMem(seg, PREAMBLE_LONGS_BYTE, 2); //Corrupt PreLongs + tryBadSeg(seg, PREAMBLE_LONGS_BYTE, 2); //Corrupt PreLongs seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, SER_VER_BYTE, 2); //Corrupt SerVer + tryBadSeg(seg, SER_VER_BYTE, 2); //Corrupt SerVer seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, FAMILY_BYTE, 2); //Corrupt Family + tryBadSeg(seg, FAMILY_BYTE, 2); //Corrupt Family seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, FLAGS_BYTE, 2); //Corrupt READ_ONLY to true + tryBadSeg(seg, FLAGS_BYTE, 2); //Corrupt READ_ONLY to true seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore final long origThetaLong = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); @@ -633,7 +633,7 @@ public void checkMemDeSerExceptions() { assertEquals(rf, ResizeFactor.X2);//ResizeFactor recovered to X2, which always works. } - private static void tryBadMem(MemorySegment seg, int byteOffset, int byteValue) { + private static void tryBadSeg(MemorySegment seg, int byteOffset, int byteValue) { try { seg.set(JAVA_BYTE, byteOffset, (byte) byteValue); //Corrupt HeapAlphaSketch.heapifyInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED); diff --git a/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java index 0abbe26b1..7ba1fecf2 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java @@ -115,12 +115,12 @@ public void checkExactIntersectionFullOverlap() { final int bytes = rsk1.getCompactBytes(); final byte[] byteArray = new byte[bytes]; - final MemorySegment mem = MemorySegment.ofArray(byteArray); + final MemorySegment seg = MemorySegment.ofArray(byteArray); - rsk1 = inter.getResult(!ordered, mem); //executed twice to fully exercise the internal state machine + rsk1 = inter.getResult(!ordered, seg); //executed twice to fully exercise the internal state machine assertEquals(rsk1.getEstimate(), k); - rsk1 = inter.getResult(ordered, mem); + rsk1 = inter.getResult(ordered, seg); assertEquals(rsk1.getEstimate(), k); } @@ -353,7 +353,7 @@ public void checkHeapifyAndWrap() { assertEquals(inter1est, cSk1Est, 0.0); println("Inter1Est: " + inter1est); - //Put the intersection into memory + //Put the intersection into segment final byte[] byteArray = inter.toByteArray(); final MemorySegment seg = MemorySegment.ofArray(byteArray); //Heapify diff --git a/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java index 6aa002853..0584e34b3 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java @@ -99,7 +99,7 @@ public void checkIllegalSketchID_UpdateSketch() { MemorySegment seg = MemorySegment.ofArray(byteArray); seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to heapify the corrupted mem + //try to heapify the corrupted seg Sketch.heapify(seg, seed); } @@ -110,16 +110,16 @@ public void checkHeapifySeedConflict() { long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1).setNominalEntries(k).build(); byte[] byteArray = usk.toByteArray(); - MemorySegment srcMem = MemorySegment.ofArray(byteArray).asReadOnly(); - Sketch.heapify(srcMem, seed2); + MemorySegment srcSeg = MemorySegment.ofArray(byteArray).asReadOnly(); + Sketch.heapify(srcSeg, seed2); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifyCorruptLgNomLongs() { UpdateSketch usk = UpdateSketch.builder().setNominalEntries(16).build(); - MemorySegment srcMem = MemorySegment.ofArray(usk.toByteArray()); - srcMem.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + MemorySegment srcSeg = MemorySegment.ofArray(usk.toByteArray()); + srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt + Sketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); } @Test @@ -167,8 +167,8 @@ public void checkHeapifyByteArrayEstimating() { assertEquals(usk.isEstimationMode(), true); byte[] byteArray = usk.toByteArray(); - MemorySegment srcMem = MemorySegment.ofArray(byteArray).asReadOnly(); - UpdateSketch usk2 = UpdateSketch.heapify(srcMem, seed); + MemorySegment srcSeg = MemorySegment.ofArray(byteArray).asReadOnly(); + UpdateSketch usk2 = UpdateSketch.heapify(srcSeg, seed); assertEquals(usk2.getEstimate(), uskEst); assertEquals(usk2.getLowerBound(2), uskLB); assertEquals(usk2.getUpperBound(2), uskUB); @@ -195,9 +195,9 @@ public void checkHeapifyMemoryEstimating() { assertTrue(sk1.isEstimationMode()); byte[] byteArray = sk1.toByteArray(); - MemorySegment mem = MemorySegment.ofArray(byteArray).asReadOnly(); + MemorySegment seg = MemorySegment.ofArray(byteArray).asReadOnly(); - UpdateSketch sk2 = UpdateSketch.heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch sk2 = UpdateSketch.heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED); assertEquals(sk2.getEstimate(), sk1est); assertEquals(sk2.getLowerBound(2), sk1lb); @@ -264,7 +264,7 @@ public void checkHQStoCompactForms() { byte[] segArr = new byte[uskCompBytes]; MemorySegment seg = MemorySegment.ofArray(segArr); //allocate seg for compact form - comp3 = usk.compact(false, seg); //load the mem2 + comp3 = usk.compact(false, seg); //load the seg2 assertEquals(comp3.getEstimate(), uskEst); assertEquals(comp3.getLowerBound(2), uskLB); @@ -569,7 +569,7 @@ public void checkFamily() { } @Test - public void checkMemSerDeExceptions() { + public void checkSegSerDeExceptions() { int k = 1024; UpdateSketch sk1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); sk1.update(1L); //forces preLongs to 3 @@ -577,19 +577,19 @@ public void checkMemSerDeExceptions() { MemorySegment seg = MemorySegment.ofArray(bytearray1); long pre0 = seg.get(JAVA_LONG_UNALIGNED, 0); - tryBadMem(seg, PREAMBLE_LONGS_BYTE, 2); //Corrupt PreLongs + tryBadSeg(seg, PREAMBLE_LONGS_BYTE, 2); //Corrupt PreLongs seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, SER_VER_BYTE, 2); //Corrupt SerVer + tryBadSeg(seg, SER_VER_BYTE, 2); //Corrupt SerVer seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, FAMILY_BYTE, 1); //Corrupt Family + tryBadSeg(seg, FAMILY_BYTE, 1); //Corrupt Family seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, FLAGS_BYTE, 2); //Corrupt READ_ONLY to true + tryBadSeg(seg, FLAGS_BYTE, 2); //Corrupt READ_ONLY to true seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore - tryBadMem(seg, FAMILY_BYTE, 4); //Corrupt, Family to Union + tryBadSeg(seg, FAMILY_BYTE, 4); //Corrupt, Family to Union seg.set(JAVA_LONG_UNALIGNED, 0, pre0); //restore final long origThetaLong = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); @@ -617,7 +617,7 @@ public void checkMemSerDeExceptions() { assertEquals(hqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2 } - private static void tryBadMem(MemorySegment seg, int byteOffset, int byteValue) { + private static void tryBadSeg(MemorySegment seg, int byteOffset, int byteValue) { try { seg.set(JAVA_BYTE, byteOffset, (byte) byteValue); //Corrupt HeapQuickSelectSketch.heapifyInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED); diff --git a/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java b/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java index 69e25ef51..dceea2a2b 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java @@ -27,10 +27,10 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.util.Arrays; -import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -229,8 +229,8 @@ public void checkWrapEstNoOverlapOrderedDirectIn() { usk2.update(i); //2k no overlap, exact, will force early stop } - final MemorySegment cskMem2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); - final CompactSketch cosk2 = usk2.compact(true, cskMem2); //ordered, loads the cskMem2 as ordered + final MemorySegment cskSeg2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); + final CompactSketch cosk2 = usk2.compact(true, cskSeg2); //ordered, loads the cskSeg2 as ordered final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); @@ -253,7 +253,7 @@ public void checkWrapEstNoOverlapOrderedDirectIn() { } @Test - public void checkHeapifyEstNoOverlapOrderedMemIn() { + public void checkHeapifyEstNoOverlapOrderedSegIn() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u = 4*k; @@ -268,13 +268,13 @@ public void checkHeapifyEstNoOverlapOrderedMemIn() { usk2.update(i); //2k no overlap, exact, will force early stop } - final MemorySegment cskMem2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); - usk2.compact(true, cskMem2); //ordered, loads the cskMem2 as ordered + final MemorySegment cskSeg2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); + usk2.compact(true, cskSeg2); //ordered, loads the cskSeg2 as ordered final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); union.union(usk1); //updates with heap UpdateSketch - union.union(cskMem2); //updates with direct CompactSketch, ordered, use early stop + union.union(cskSeg2); //updates with direct CompactSketch, ordered, use early stop UpdateSketch emptySketch = UpdateSketch.builder().setNominalEntries(k).build(); union.union(emptySketch); //updates with empty sketch @@ -292,7 +292,7 @@ public void checkHeapifyEstNoOverlapOrderedMemIn() { } @Test - public void checkHeapifyEstNoOverlapUnorderedMemIn() { + public void checkHeapifyEstNoOverlapUnorderedSegIn() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u = 4*k; @@ -307,13 +307,13 @@ public void checkHeapifyEstNoOverlapUnorderedMemIn() { usk2.update(i); //2k no overlap, exact, will force early stop } - final MemorySegment cskMem2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); - usk2.compact(false, cskMem2); //unordered, loads the cskMem2 as unordered + final MemorySegment cskSeg2 = MemorySegment.ofArray(new byte[usk2.getCompactBytes()]); + usk2.compact(false, cskSeg2); //unordered, loads the cskSeg2 as unordered final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); union.union(usk1); //updates with heap UpdateSketch - union.union(cskMem2); //updates with direct CompactSketch, ordered, use early stop + union.union(cskSeg2); //updates with direct CompactSketch, ordered, use early stop UpdateSketch emptySketch = UpdateSketch.builder().setNominalEntries(k).build(); union.union(emptySketch); //updates with empty sketch @@ -373,7 +373,7 @@ public void checkMultiUnion() { } @Test - public void checkDirectMemoryIn() { + public void checkDirectSegmentIn() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u1 = 2*k; @@ -390,11 +390,11 @@ public void checkDirectMemoryIn() { usk2.update(i); //2*k + 1024 no overlap } - final MemorySegment skMem1 = MemorySegment.ofArray(usk1.compact(false, null).toByteArray()); - final MemorySegment skMem2 = MemorySegment.ofArray(usk2.compact(true, null).toByteArray()); + final MemorySegment skSeg1 = MemorySegment.ofArray(usk1.compact(false, null).toByteArray()); + final MemorySegment skSeg2 = MemorySegment.ofArray(usk2.compact(true, null).toByteArray()); - final CompactSketch csk1 = (CompactSketch)Sketch.wrap(skMem1); - final CompactSketch csk2 = (CompactSketch)Sketch.wrap(skMem2); + final CompactSketch csk1 = (CompactSketch)Sketch.wrap(skSeg1); + final CompactSketch csk2 = (CompactSketch)Sketch.wrap(skSeg2); final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); @@ -466,7 +466,7 @@ public void checkSerVer2Handling() { } @Test - public void checkUpdateMemorySpecialCases() { + public void checkUpdateSegmentSpecialCases() { final int lgK = 12; //4096 final int k = 1 << lgK; @@ -499,7 +499,7 @@ public void checkUpdateMemorySpecialCases() { } @Test - public void checkUpdateMemorySpecialCases2() { + public void checkUpdateSegmentSpecialCases2() { final int lgK = 12; //4096 final int k = 1 << lgK; final int u = 2*k; @@ -517,7 +517,7 @@ public void checkUpdateMemorySpecialCases2() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMemBadSerVer() { + public void checkSegBadSerVer() { final int lgK = 12; //4096 final int k = 1 << lgK; final UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).build(); diff --git a/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java b/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java index 5d389704c..2e9519562 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java @@ -26,7 +26,6 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.Util; import org.testng.annotations.Test; @@ -74,23 +73,23 @@ public void checkHeapifyCompactSketchAssumedDifferentSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = Sketches.heapifyCompactSketch(cskMem); + cskResult = Sketches.heapifyCompactSketch(cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = Sketches.heapifyCompactSketch(sv2cskMem); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = Sketches.heapifyCompactSketch(sv2cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = Sketches.heapifyCompactSketch(sv1cskMem); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = Sketches.heapifyCompactSketch(sv1cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), defaultSeedHash); } @@ -104,23 +103,23 @@ public void checkHeapifyCompactSketchGivenDefaultSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = Sketches.heapifyCompactSketch(cskMem, seed); + cskResult = Sketches.heapifyCompactSketch(cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = Sketches.heapifyCompactSketch(sv2cskMem, seed); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = Sketches.heapifyCompactSketch(sv2cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = Sketches.heapifyCompactSketch(sv1cskMem, seed); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = Sketches.heapifyCompactSketch(sv1cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); } @@ -134,23 +133,23 @@ public void checkHeapifyCompactSketchGivenDifferentSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = Sketches.heapifyCompactSketch(cskMem, seed); + cskResult = Sketches.heapifyCompactSketch(cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = Sketches.heapifyCompactSketch(sv2cskMem, seed); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = Sketches.heapifyCompactSketch(sv2cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = Sketches.heapifyCompactSketch(sv1cskMem, seed); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = Sketches.heapifyCompactSketch(sv1cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); } @@ -164,23 +163,23 @@ public void checkHeapifySketchAssumedDefaultSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = (CompactSketch) Sketches.heapifySketch(cskMem); + cskResult = (CompactSketch) Sketches.heapifySketch(cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskMem); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskMem); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); } @@ -194,23 +193,23 @@ public void checkHeapifySketchAssumedDifferentSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = (CompactSketch) Sketches.heapifySketch(cskMem); + cskResult = (CompactSketch) Sketches.heapifySketch(cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskMem); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskMem); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskSeg); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), defaultSeedHash); } @@ -224,23 +223,23 @@ public void checkHeapifySketchGivenDefaultSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = (CompactSketch) Sketches.heapifySketch(cskMem, seed); + cskResult = (CompactSketch) Sketches.heapifySketch(cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskMem, seed); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskMem, seed); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); } @@ -254,23 +253,23 @@ public void checkHeapifySketchGivenDifferentSeed() { for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch csk = usk.compact(); - MemorySegment cskMem = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); + MemorySegment cskSeg = MemorySegment.ofArray(csk.toByteArray()).asReadOnly(); CompactSketch cskResult; //SerialVersion3 test - cskResult = (CompactSketch) Sketches.heapifySketch(cskMem, seed); + cskResult = (CompactSketch) Sketches.heapifySketch(cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion2 test - MemorySegment sv2cskMem = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskMem, seed); + MemorySegment sv2cskSeg = BackwardConversions.convertSerVer3toSerVer2(csk, seed).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv2cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); //SerialVersion1 test - MemorySegment sv1cskMem = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); - cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskMem, seed); + MemorySegment sv1cskSeg = BackwardConversions.convertSerVer3toSerVer1(csk).asReadOnly(); + cskResult = (CompactSketch) Sketches.heapifySketch(sv1cskSeg, seed); assertEquals(cskResult.getEstimate(), usk.getEstimate()); assertEquals(cskResult.getSeedHash(), seedHash); } @@ -589,9 +588,9 @@ public void checkWrapSketchGivenDifferentSeed() { private static MemorySegment putOffHeap(MemorySegment heapSeg, Arena arena) { final long cap = heapSeg.byteSize(); - MemorySegment wmem = arena.allocate(cap); - MemorySegment.copy(heapSeg, 0, wmem, 0, cap); - return wmem; + MemorySegment wseg = arena.allocate(cap); + MemorySegment.copy(heapSeg, 0, wseg, 0, cap); + return wseg; } @Test diff --git a/src/test/java/org/apache/datasketches/theta2/IteratorTest.java b/src/test/java/org/apache/datasketches/theta2/IteratorTest.java index 684e368ba..09505ae85 100644 --- a/src/test/java/org/apache/datasketches/theta2/IteratorTest.java +++ b/src/test/java/org/apache/datasketches/theta2/IteratorTest.java @@ -35,8 +35,8 @@ public class IteratorTest { public void checkDirectCompactSketch() { int k = 16; int maxBytes = Sketch.getMaxUpdateSketchBytes(k); - MemorySegment wmem = MemorySegment.ofArray(new byte[maxBytes]); - UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build(wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[maxBytes]); + UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build(wseg); println(sk1.getClass().getSimpleName()); for (int i = 0; i < (k/2); i++) { sk1.update(i); } HashIterator itr1 = sk1.iterator(); diff --git a/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java index 477a8f7a5..e5db72838 100644 --- a/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java @@ -73,9 +73,9 @@ public void checkToString() { final int u = 2*k; final int bytes = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3); final byte[] byteArray = new byte[bytes]; - final MemorySegment mem = MemorySegment.ofArray(byteArray); + final MemorySegment seg = MemorySegment.ofArray(byteArray); - final UpdateSketch quick1 = UpdateSketch.builder().setNominalEntries(k).build(mem); + final UpdateSketch quick1 = UpdateSketch.builder().setNominalEntries(k).build(seg); println(Sketch.toString(byteArray)); Assert.assertTrue(quick1.isEmpty()); @@ -88,12 +88,12 @@ public void checkToString() { assertEquals(quick1.getEstimate(), u, .05*u); assertTrue(quick1.getRetainedEntries(false) > k); println(quick1.toString()); - println(PreambleUtil.preambleToString(mem)); + println(PreambleUtil.preambleToString(seg)); - final MemorySegment uMem = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(quick1); - println(PreambleUtil.preambleToString(uMem)); + println(PreambleUtil.preambleToString(uSeg)); } @Test @@ -156,64 +156,64 @@ public void checkPreLongs() { @Test public void checkInsertsAndExtracts() { final byte[] arr = new byte[32]; - final MemorySegment wmem = MemorySegment.ofArray(arr); + final MemorySegment wseg = MemorySegment.ofArray(arr); int v = 0; - insertPreLongs(wmem, ++v); - assertEquals(extractPreLongs(wmem), v); - insertPreLongs(wmem, 0); + insertPreLongs(wseg, ++v); + assertEquals(extractPreLongs(wseg), v); + insertPreLongs(wseg, 0); - insertLgResizeFactor(wmem, 3); //limited to 2 bits - assertEquals(extractLgResizeFactor(wmem), 3); - insertLgResizeFactor(wmem, 0); + insertLgResizeFactor(wseg, 3); //limited to 2 bits + assertEquals(extractLgResizeFactor(wseg), 3); + insertLgResizeFactor(wseg, 0); - insertSerVer(wmem, ++v); - assertEquals(extractSerVer(wmem), v); - insertSerVer(wmem, 0); + insertSerVer(wseg, ++v); + assertEquals(extractSerVer(wseg), v); + insertSerVer(wseg, 0); - insertFamilyID(wmem, ++v); - assertEquals(extractFamilyID(wmem), v); - insertFamilyID(wmem, 0); + insertFamilyID(wseg, ++v); + assertEquals(extractFamilyID(wseg), v); + insertFamilyID(wseg, 0); - insertLgNomLongs(wmem, ++v); - assertEquals(extractLgNomLongs(wmem), v); - insertLgNomLongs(wmem, 0); + insertLgNomLongs(wseg, ++v); + assertEquals(extractLgNomLongs(wseg), v); + insertLgNomLongs(wseg, 0); - insertLgArrLongs(wmem, ++v); - assertEquals(extractLgArrLongs(wmem), v); - insertLgArrLongs(wmem, 0); + insertLgArrLongs(wseg, ++v); + assertEquals(extractLgArrLongs(wseg), v); + insertLgArrLongs(wseg, 0); - insertFlags(wmem, 3); - assertEquals(extractFlags(wmem), 3); - assertEquals(extractLgResizeRatioV1(wmem), 3); //also at byte 5, limited to 2 bits - insertFlags(wmem, 0); + insertFlags(wseg, 3); + assertEquals(extractFlags(wseg), 3); + assertEquals(extractLgResizeRatioV1(wseg), 3); //also at byte 5, limited to 2 bits + insertFlags(wseg, 0); - insertSeedHash(wmem, ++v); - assertEquals(extractSeedHash(wmem), v); - assertEquals(extractFlagsV1(wmem), v); //also at byte 6 - insertSeedHash(wmem, 0); + insertSeedHash(wseg, ++v); + assertEquals(extractSeedHash(wseg), v); + assertEquals(extractFlagsV1(wseg), v); //also at byte 6 + insertSeedHash(wseg, 0); - insertCurCount(wmem, ++v); - assertEquals(extractCurCount(wmem), v); - insertCurCount(wmem, 0); + insertCurCount(wseg, ++v); + assertEquals(extractCurCount(wseg), v); + insertCurCount(wseg, 0); - insertP(wmem, (float) 1.0); - assertEquals(extractP(wmem), (float) 1.0); - insertP(wmem, (float) 0.0); + insertP(wseg, (float) 1.0); + assertEquals(extractP(wseg), (float) 1.0); + insertP(wseg, (float) 0.0); - insertThetaLong(wmem, ++v); - assertEquals(extractThetaLong(wmem), v); - insertThetaLong(wmem, 0L); + insertThetaLong(wseg, ++v); + assertEquals(extractThetaLong(wseg), v); + insertThetaLong(wseg, 0L); - insertUnionThetaLong(wmem, ++v); - assertEquals(extractUnionThetaLong(wmem), v); - insertUnionThetaLong(wmem, 0L); + insertUnionThetaLong(wseg, ++v); + assertEquals(extractUnionThetaLong(wseg), v); + insertUnionThetaLong(wseg, 0L); - setEmpty(wmem); - assertTrue(isEmptyFlag(wmem)); + setEmpty(wseg); + assertTrue(isEmptyFlag(wseg)); - clearEmpty(wmem); - assertFalse(isEmptyFlag(wmem)); + clearEmpty(wseg); + assertFalse(isEmptyFlag(wseg)); } @Test diff --git a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java index 02efffd75..f7011c538 100644 --- a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java @@ -124,9 +124,9 @@ public void checkBuilderValidP() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBuilderAnotB_noMem() { - final MemorySegment mem = MemorySegment.ofArray(new byte[64]); - SetOperation.builder().build(Family.A_NOT_B, mem); + public void checkBuilderAnotB_noSeg() { + final MemorySegment seg = MemorySegment.ofArray(new byte[64]); + SetOperation.builder().build(Family.A_NOT_B, seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -167,8 +167,8 @@ public void checkIllegalSetOpHeapify() { usk1.update(i); //64 } final byte[] byteArray = usk1.toByteArray(); - final MemorySegment mem = MemorySegment.ofArray(byteArray).asReadOnly(); - SetOperation.heapify(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArray).asReadOnly(); + SetOperation.heapify(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -179,8 +179,8 @@ public void checkIllegalSetOpWrap() { usk1.update(i); //64 } final byte[] byteArray = usk1.toByteArray(); - final MemorySegment mem = MemorySegment.ofArray(byteArray).asReadOnly(); - Sketches.wrapIntersection(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArray).asReadOnly(); + Sketches.wrapIntersection(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -190,10 +190,10 @@ public void checkIllegalSetOpWrap2() { for (int i=0; i union = new Union<>(new ArrayOfStringsSummarySetOperations()); + union.union(sketch1); + union.union(sketch2); + CompactSketch csk = union.getResult(); + //printSummaries(csk.iterator()); + assertEquals(csk.getRetainedEntries(), 4); + + Intersection inter = + new Intersection<>(new ArrayOfStringsSummarySetOperations()); + inter.intersect(sketch1); + inter.intersect(sketch2); + csk = inter.getResult(); + assertEquals(csk.getRetainedEntries(), 3); + + AnotB aNotB = new AnotB<>(); + aNotB.setA(sketch2); + aNotB.notB(sketch1); + csk = aNotB.getResult(true); + assertEquals(csk.getRetainedEntries(), 1); + + } + + private static void checkSummaries(ArrayOfStringsSketch sk1, ArrayOfStringsSketch sk2) { + TupleSketchIterator it1 = sk1.iterator(); + TupleSketchIterator it2 = sk2.iterator(); + while(it1.next() && it2.next()) { + ArrayOfStringsSummary sum1 = it1.getSummary(); + ArrayOfStringsSummary sum2 = it2.getSummary(); + assertTrue(sum1.equals(sum2)); + } + } + + static void printSummaries(TupleSketchIterator it) { + while (it.next()) { + String[] strArr = it.getSummary().getValue(); + for (String s : strArr) { + print(s + ", "); + } + println(""); + } + } + + @Test + public void checkCopyCtor() { + ArrayOfStringsSketch sk1 = new ArrayOfStringsSketch(); + String[][] strArrArr = {{"a","b"},{"c","d"},{"e","f"}}; + int len = strArrArr.length; + for (int i = 0; i < len; i++) { + sk1.update(strArrArr[i], strArrArr[i]); + } + assertEquals(sk1.getRetainedEntries(), 3); + final ArrayOfStringsSketch sk2 = sk1.copy(); + assertEquals(sk2.getRetainedEntries(), 3); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + print(s + LS); + } + + /** + * @param s value to print + */ + static void print(String s) { + //System.out.print(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java b/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java new file mode 100644 index 000000000..9fd17aecc --- /dev/null +++ b/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.tuple2.strings; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import java.lang.foreign.MemorySegment; + +import org.testng.annotations.Test; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.DeserializeResult; + +/** + * @author Lee Rhodes + */ +public class ArrayOfStringsSummaryTest { + + @Test + public void checkToByteArray() { + String[] strArr = new String[] {"abcd", "abcd", "abcd"}; + ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(strArr); + ArrayOfStringsSummary copy = nsum.copy(); + assertTrue(copy.equals(nsum)); + byte[] out = nsum.toByteArray(); + + MemorySegment seg = MemorySegment.ofArray(out); + ArrayOfStringsSummary nsum2 = new ArrayOfStringsSummary(seg); + String[] nodesArr = nsum2.getValue(); + for (String s : nodesArr) { + println(s); + } + + println("\nfromMemory(seg)"); + DeserializeResult dres = ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); + ArrayOfStringsSummary nsum3 = dres.getObject(); + nodesArr = nsum3.getValue(); + for (String s : nodesArr) { + println(s); + } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkNumNodes() { + ArrayOfStringsSummary.checkNumNodes(200); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkInBytes() { + MemorySegment seg = MemorySegment.ofArray(new byte[100]); + ArrayOfStringsSummary.checkInBytes(seg, 200); + } + + @SuppressWarnings("unlikely-arg-type") + @Test + public void checkHashCode() { + String[] strArr = new String[] {"abcd", "abcd", "abcd"}; + ArrayOfStringsSummary sum1 = new ArrayOfStringsSummary(strArr); + ArrayOfStringsSummary sum2 = new ArrayOfStringsSummary(strArr); + int hc1 = sum1.hashCode(); + int hc2 = sum2.hashCode(); + assertEquals(hc1, hc2); + assertTrue(sum1.equals(sum2)); + assertFalse(sum1.equals(hc2)); + assertFalse(sum1.equals(null)); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + //System.out.println(s); + } + +} From 9a556f42d4ec8e23144a033300b102958b1040fc Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 25 Jun 2025 22:02:57 -0700 Subject: [PATCH 17/25] Some needed clean up. --- .../apache/datasketches/theta2/AnotBimpl.java | 12 +- .../theta2/CompactOperations.java | 14 +- .../datasketches/theta2/CompactSketch.java | 20 +- .../ConcurrentDirectQuickSelectSketch.java | 2 +- .../theta2/ConcurrentHeapThetaBuffer.java | 2 +- .../theta2/DirectCompactCompressedSketch.java | 6 +- .../theta2/DirectCompactSketch.java | 8 +- .../theta2/DirectQuickSelectSketch.java | 25 +- .../theta2/DirectQuickSelectSketchR.java | 10 +- .../theta2/EmptyCompactSketch.java | 4 +- .../theta2/ForwardCompatibility.java | 2 +- .../datasketches/theta2/HeapAlphaSketch.java | 6 +- .../theta2/HeapCompactSketch.java | 6 +- .../theta2/HeapQuickSelectSketch.java | 6 +- .../datasketches/theta2/HeapUpdateSketch.java | 2 +- .../datasketches/theta2/Intersection.java | 4 +- .../datasketches/theta2/IntersectionImpl.java | 18 +- .../theta2/JaccardSimilarity.java | 8 +- ...SegmentCompactCompressedHashIterator.java} | 4 +- ...or.java => MemorySegmentHashIterator.java} | 4 +- .../datasketches/theta2/PreambleUtil.java | 12 +- .../apache/datasketches/theta2/Rebuilder.java | 28 +- .../datasketches/theta2/SetOperation.java | 2 +- .../theta2/SetOperationBuilder.java | 14 +- .../datasketches/theta2/SingleItemSketch.java | 2 +- .../apache/datasketches/theta2/Sketch.java | 22 +- .../apache/datasketches/theta2/Sketches.java | 9 +- .../apache/datasketches/theta2/UnionImpl.java | 8 +- .../datasketches/theta2/UpdateSketch.java | 16 +- .../theta2/UpdateSketchBuilder.java | 23 +- .../WrappedCompactCompressedSketch.java | 2 +- .../theta2/WrappedCompactSketch.java | 8 +- .../thetacommon/HashOperations.java | 104 ---- .../thetacommon2/BinomialBoundsN.java | 284 +++++++++++ .../BoundsOnRatiosInThetaSketchedSets.java | 121 +++++ .../BoundsOnRatiosInTupleSketchedSets.java | 204 ++++++++ .../thetacommon2/EquivTables.java | 292 ++++++++++++ .../thetacommon2/HashOperations.java | 401 ++++++++++++++++ .../thetacommon2/QuickSelect.java | 242 ++++++++++ .../thetacommon2/SetOperationCornerCases.java | 313 ++++++++++++ .../datasketches/thetacommon2/ThetaUtil.java | 152 ++++++ .../thetacommon2/package-info.java | 24 + .../org/apache/datasketches/tuple2/AnotB.java | 12 +- .../datasketches/tuple2/CompactSketch.java | 2 +- .../datasketches/tuple2/HashTables.java | 6 +- .../datasketches/tuple2/Intersection.java | 2 +- .../tuple2/JaccardSimilarity.java | 8 +- .../tuple2/QuickSelectSketch.java | 12 +- .../apache/datasketches/tuple2/Sketch.java | 2 +- .../org/apache/datasketches/tuple2/Union.java | 4 +- .../datasketches/tuple2/UpdatableSketch.java | 2 +- .../tuple2/UpdatableSketchBuilder.java | 2 +- .../org/apache/datasketches/tuple2/Util.java | 34 +- .../tuple2/adouble/DoubleSummary.java | 2 +- .../adouble/DoubleSummaryDeserializer.java | 2 +- .../tuple2/aninteger/IntegerSummary.java | 2 +- .../aninteger/IntegerSummaryDeserializer.java | 2 +- .../ArrayOfDoublesAnotBImpl.java | 20 +- .../ArrayOfDoublesIntersection.java | 7 +- .../ArrayOfDoublesQuickSelectSketch.java | 4 +- .../ArrayOfDoublesSetOperationBuilder.java | 12 +- .../arrayofdoubles/ArrayOfDoublesSketch.java | 4 +- .../ArrayOfDoublesSketches.java | 2 +- .../arrayofdoubles/ArrayOfDoublesUnion.java | 6 +- .../ArrayOfDoublesUpdatableSketch.java | 6 +- .../ArrayOfDoublesUpdatableSketchBuilder.java | 2 +- .../DirectArrayOfDoublesCompactSketch.java | 21 +- .../DirectArrayOfDoublesIntersection.java | 2 +- ...DirectArrayOfDoublesQuickSelectSketch.java | 30 +- .../DirectArrayOfDoublesSketchIterator.java | 4 +- .../DirectArrayOfDoublesUnion.java | 4 +- .../DirectArrayOfDoublesUnionR.java | 2 +- .../tuple2/arrayofdoubles/HashTables.java | 6 +- .../HeapArrayOfDoublesCompactSketch.java | 9 +- .../HeapArrayOfDoublesQuickSelectSketch.java | 8 +- .../tuple2/strings/ArrayOfStringsSketch.java | 4 +- .../tuple2/strings/ArrayOfStringsSummary.java | 4 +- .../datasketches/theta2/AnotBimplTest.java | 2 +- .../theta2/BackwardConversions.java | 2 +- .../theta2/CompactSketchTest.java | 26 +- ...ConcurrentDirectQuickSelectSketchTest.java | 22 +- .../ConcurrentHeapQuickSelectSketchTest.java | 4 +- .../theta2/DirectIntersectionTest.java | 2 +- .../theta2/DirectQuickSelectSketchTest.java | 14 +- .../datasketches/theta2/DirectUnionTest.java | 2 +- .../theta2/ForwardCompatibilityTest.java | 2 +- .../theta2/HeapAlphaSketchTest.java | 4 +- .../theta2/HeapIntersectionTest.java | 2 +- .../theta2/HeapQuickSelectSketchTest.java | 4 +- .../datasketches/theta2/HeapUnionTest.java | 2 +- .../theta2/HeapifyWrapSerVer1and2Test.java | 37 +- .../datasketches/theta2/PreambleUtilTest.java | 2 +- ...st.java => ReadOnlyMemorySegmentTest.java} | 2 +- .../datasketches/theta2/SetOperationTest.java | 10 +- .../theta2/SetOpsCornerCasesTest.java | 18 +- .../theta2/SingleItemSketchTest.java | 2 +- .../datasketches/theta2/SketchTest.java | 11 +- .../datasketches/theta2/SketchesTest.java | 16 +- .../datasketches/theta2/UnionImplTest.java | 4 +- .../datasketches/theta2/UpdateSketchTest.java | 9 +- .../thetacommon2/BinomialBoundsNTest.java | 181 +++++++ ...oundsOnRatiosInThetaSketchedSetsTest.java} | 24 +- ...BoundsOnRatiosInTupleSketchedSetsTest.java | 159 +++++++ .../thetacommon2/HashOperationsTest.java | 218 +++++++++ .../thetacommon2/QuickSelectTest.java | 446 ++++++++++++++++++ .../thetacommon2/ThetaUtilTest.java | 44 ++ .../datasketches/tuple2/IntegerSummary.java | 2 +- .../tuple2/IntegerSummaryDeserializer.java | 2 +- .../apache/datasketches/tuple2/MiscTest.java | 2 +- ...st.java => ReadOnlyMemorySegmentTest.java} | 25 +- .../CornerCaseTupleSetOperationsTest.java | 2 +- .../ArrayOfDoublesCompactSketchTest.java | 2 +- ...erCaseArrayOfDoublesSetOperationsTest.java | 2 +- ...DirectArrayOfDoublesCompactSketchTest.java | 7 +- ...ctArrayOfDoublesQuickSelectSketchTest.java | 2 +- .../strings/ArrayOfStringsSummaryTest.java | 2 +- 116 files changed, 3480 insertions(+), 536 deletions(-) rename src/main/java/org/apache/datasketches/theta2/{MemoryCompactCompressedHashIterator.java => MemorySegmentCompactCompressedHashIterator.java} (95%) rename src/main/java/org/apache/datasketches/theta2/{MemoryHashIterator.java => MemorySegmentHashIterator.java} (90%) create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java create mode 100644 src/main/java/org/apache/datasketches/thetacommon2/package-info.java rename src/test/java/org/apache/datasketches/theta2/{ReadOnlyMemoryTest.java => ReadOnlyMemorySegmentTest.java} (99%) create mode 100644 src/test/java/org/apache/datasketches/thetacommon2/BinomialBoundsNTest.java rename src/test/java/org/apache/datasketches/{thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java => thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java} (76%) create mode 100644 src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java create mode 100644 src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java create mode 100644 src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java create mode 100644 src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java rename src/test/java/org/apache/datasketches/tuple2/{ReadOnlyMemoryTest.java => ReadOnlyMemorySegmentTest.java} (82%) diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java index 21872ecd6..3374c5992 100644 --- a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java @@ -20,17 +20,17 @@ package org.apache.datasketches.theta2; import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon.HashOperations.checkThetaCorruption; -import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsert; -import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import static org.apache.datasketches.thetacommon2.HashOperations.checkThetaCorruption; +import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsert; +import static org.apache.datasketches.thetacommon2.HashOperations.minLgHashTableSize; import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Implements the A-and-not-B operations. diff --git a/src/main/java/org/apache/datasketches/theta2/CompactOperations.java b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java index ab342a1f9..719b4b19c 100644 --- a/src/main/java/org/apache/datasketches/theta2/CompactOperations.java +++ b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java @@ -93,7 +93,7 @@ static CompactSketch componentsToCompact( //No error checking flags |= single ? SINGLEITEM_FLAG_MASK : 0; final MemorySegment seg = - loadCompactMemory(hashArrOut, seedHash, curCount, thetaLong, dstWSeg, (byte)flags, preLongs); + loadCompactMemorySegment(hashArrOut, seedHash, curCount, thetaLong, dstWSeg, (byte)flags, preLongs); return new DirectCompactSketch(seg); } else { //Heap @@ -116,7 +116,7 @@ static CompactSketch componentsToCompact( //No error checking * @return a CompactSketch of the correct form. */ @SuppressWarnings("unused") - static CompactSketch memoryToCompact( + static CompactSketch segmentToCompact( final MemorySegment srcSeg, final boolean dstOrdered, final MemorySegment dstWSeg) @@ -187,7 +187,7 @@ static CompactSketch memoryToCompact( //load the destination. if (dstWSeg != null) { - final MemorySegment tgtSeg = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstWSeg, + final MemorySegment tgtSeg = loadCompactMemorySegment(hashArr, srcSeedHash, curCount, thetaLong, dstWSeg, (byte)flagsOut, srcPreLongs); return new DirectCompactSketch(tgtSeg); } else { //heap @@ -215,9 +215,9 @@ private static final void checkFamilyAndFlags( } //All arguments must be valid and correct including flags. - // Used as helper to create byte arrays as well as loading Memory for direct compact sketches + // Used as helper to create byte arrays as well as loading MemorySegment for direct compact sketches //Input must be writable, return can be Read Only - static final MemorySegment loadCompactMemory( + static final MemorySegment loadCompactMemorySegment( final long[] compactHashArr, final short seedHash, final int curCount, @@ -231,12 +231,12 @@ static final MemorySegment loadCompactMemory( final int outBytes = outLongs << 3; final int dstBytes = (int) dstWSeg.byteSize(); if (outBytes > dstBytes) { - throw new SketchesArgumentException("Insufficient Memory: " + dstBytes + throw new SketchesArgumentException("Insufficient Space in MemorySegment: " + dstBytes + ", Need: " + outBytes); } final byte famID = (byte) Family.COMPACT.getID(); - //Caution: The following loads directly into Memory without creating a heap byte[] first, + //Caution: The following loads directly into a MemorySegment without creating a heap byte[] first, // which would act as a pre-clearing, initialization mechanism. So it is important to make sure // that all fields are initialized, even those that are not used by the CompactSketch. // Otherwise, uninitialized fields could be filled with off-heap garbage, which could cause diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java index acd51cc21..594b5076f 100644 --- a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java @@ -46,7 +46,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The parent class of all the CompactSketches. CompactSketches are never created directly. @@ -82,17 +82,17 @@ public static CompactSketch heapify(final MemorySegment srcSeg) { } /** - * Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch. + * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch. * - *

    The resulting sketch will not retain any link to the source Memory and all of its data will be + *

    The resulting sketch will not retain any link to the source MemorySegment and all of its data will be * copied to the heap CompactSketch.

    * - *

    This method checks if the given expectedSeed was used to create the source Memory image. + *

    This method checks if the given expectedSeed was used to create the source MemorySegment image. * However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

    * * @param srcSeg an image of a CompactSketch that was created using the given expectedSeed. - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * @return a CompactSketch on the heap. */ @@ -115,7 +115,7 @@ private static CompactSketch heapify(final MemorySegment srcSeg, final long seed final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0; final boolean empty = (flags & EMPTY_FLAG_MASK) != 0; if (enforceSeed && !empty) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } - return CompactOperations.memoryToCompact(srcSeg, srcOrdered, null); + return CompactOperations.segmentToCompact(srcSeg, srcOrdered, null); } //not SerVer 3, assume compact stored form final short seedHash = ThetaUtil.computeSeedHash(seed); @@ -141,7 +141,7 @@ private static CompactSketch heapify(final MemorySegment srcSeg, final long seed * *

    Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

    + * This is actually faster and consumes less overall space.

    * *

    This method assumes that the sketch image was created with the correct hash seed, so it is not checked. * However, Serial Version 1 sketch images do not have a seedHash field, @@ -166,7 +166,7 @@ public static CompactSketch wrap(final MemorySegment srcSeg) { * *

    Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

    + * This is actually faster and consumes less overall space.

    * *

    This method checks if the given expectedSeed was used to create the source MemorySegment image. * However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, @@ -239,7 +239,7 @@ else if (serVer == 2) { * *

    Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

    + * This is actually faster and consumes less overall space.

    * *

    This method checks if the DEFAULT_UPDATE_SEED was used to create the source MemorySegment image. * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, @@ -265,7 +265,7 @@ public static CompactSketch wrap(final byte[] bytes) { * *

    Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

    + * This is actually faster and consumes less overall space.

    * *

    This method checks if the given expectedSeed was used to create the source MemorySegment image. * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java index af5917123..317db8180 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java @@ -64,7 +64,7 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch private volatile long epoch_; /** - * Construct a new sketch instance and initialize the given Memory as its backing store. + * Construct a new sketch instance and initialize the given MemorySegment as its backing store. * * @param lgNomLongs See lgNomLongs. * @param seed See Update Hash Seed. diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java index f8f5a0947..b2867b5a5 100644 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java @@ -27,7 +27,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon2.HashOperations; /** * This is a theta filtering, bounded size buffer that operates in the context of a single writing diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java index 8ed907321..521334c22 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java @@ -29,7 +29,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. @@ -51,7 +51,7 @@ final class DirectCompactCompressedSketch extends DirectCompactSketch { /** * Wraps the given MemorySegment, which must be a SerVer 4 compressed CompactSketch image. - * Must check the validity of the Memory before calling. + * Must check the validity of the MemorySegment before calling. * @param srcSeg The source MemorySegment * @param seedHash The update seedHash. * See Seed Hash. @@ -117,7 +117,7 @@ public boolean isOrdered() { @Override public HashIterator iterator() { - return new MemoryCompactCompressedHashIterator( + return new MemorySegmentCompactCompressedHashIterator( seg_, (extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) + extractNumEntriesBytesV4(seg_), diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java index ef0e4b604..f6eda994f 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java @@ -22,7 +22,7 @@ import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta2.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta2.CompactOperations.segmentToCompact; import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; @@ -34,7 +34,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered @@ -76,7 +76,7 @@ static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short @Override public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - return memoryToCompact(seg_, dstOrdered, dstSeg); + return segmentToCompact(seg_, dstOrdered, dstSeg); } @Override @@ -132,7 +132,7 @@ public boolean isSameResource(final MemorySegment that) { @Override public HashIterator iterator() { - return new MemoryHashIterator(seg_, getRetainedEntries(true), getThetaLong()); + return new MemorySegmentHashIterator(seg_, getRetainedEntries(true), getThetaLong()); } @Override diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java index 64423a21c..f4a9e72f6 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java @@ -62,15 +62,15 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The default Theta Sketch using the QuickSelect algorithm. * This subclass implements methods, which affect the state (update, rebuild, reset) * - *

    This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

    This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

    * * @author Lee Rhodes @@ -91,11 +91,8 @@ private DirectQuickSelectSketch( * @param seed See Update Hash Seed. * @param p * See Sampling Probability, p - * @param rf Currently internally fixed at 2. Unless dstSeg is not configured with a valid - * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the - * dstSeg must be large enough for a full sketch. + * @param rf Resize Factor * See Resize Factor - * @param memReqSvr the given MemoryRequestServer * @param dstSeg the given MemorySegment object destination. It cannot be null. * It will be cleared prior to use. * @param unionGadget true if this sketch is implementing the Union gadget function. @@ -213,7 +210,7 @@ static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final lo /** * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from - * this sketch. This does NO validity checking of the given Memory. + * this sketch. This does NO validity checking of the given MemorySegment. * @param srcSeg The given MemorySegment must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch @@ -279,7 +276,7 @@ UpdateReturnState hashUpdate(final long hash) { //The duplicate test final int index = - HashOperations.hashSearchOrInsertMemory(wseg_, lgArrLongs, hash, preambleLongs << 3); + HashOperations.hashSearchOrInsertMemorySegment(wseg_, lgArrLongs, hash, preambleLongs << 3); if (index >= 0) { return RejectedDuplicate; //Duplicate, not inserted } @@ -302,15 +299,15 @@ UpdateReturnState hashUpdate(final long hash) { final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF); int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1); - if (actLgRF > 0) { //Expand in current Memory + if (actLgRF > 0) { //Expand in current MemorySegment //lgArrLongs will change; thetaLong, curCount will not resize(wseg_, preambleLongs, lgArrLongs, tgtLgArrLongs); hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; - } //end of Expand in current memory, exit. + } //end of Expand in current MemorySegment, exit. else { - //Request more memory, then resize. lgArrLongs will change; thetaLong, curCount will not + //Request more space, then resize. lgArrLongs will change; thetaLong, curCount will not final int preBytes = preambleLongs << 3; tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1); final int tgtArrBytes = 8 << tgtLgArrLongs; @@ -322,7 +319,7 @@ UpdateReturnState hashUpdate(final long hash) { hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; - } //end of Request more memory to resize + } //end of Request more space to resize } //end of resize } //end of isOutOfSpace return InsertedCountIncremented; diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java index adbc8757c..e2ba9f1fe 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java @@ -48,14 +48,14 @@ import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SuppressFBWarnings; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The default Theta Sketch using the QuickSelect algorithm. * This is the read-only implementation with non-functional methods, which affect the state. * - *

    This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

    This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

    * * @author Lee Rhodes @@ -65,7 +65,7 @@ class DirectQuickSelectSketchR extends UpdateSketch { static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space final long seed_; //provided, kept only on heap, never serialized. int hashTableThreshold_; //computed, kept only on heap, never serialized. - MemorySegment wseg_; //A WritableMemory for child class, but no write methods here + MemorySegment wseg_; //A MemorySegment for child class, but no write methods here //only called by DirectQuickSelectSketch and below DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) { @@ -168,7 +168,7 @@ public boolean isSameResource(final MemorySegment that) { @Override public HashIterator iterator() { - return new MemoryHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); + return new MemorySegmentHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); } @Override diff --git a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java index c4679374b..28cf578f8 100644 --- a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java @@ -55,8 +55,8 @@ static synchronized EmptyCompactSketch getHeapInstance(final MemorySegment srcSe return EMPTY_COMPACT_SKETCH; } final long maskedPre0 = pre0 & EMPTY_SKETCH_MASK; - throw new SketchesArgumentException("Input Memory does not match required Preamble. " - + "Memory Pre0: " + Long.toHexString(maskedPre0) + throw new SketchesArgumentException("Input MemorySegment does not match required Preamble. " + + "MemorySegment Pre0: " + Long.toHexString(maskedPre0) + ", required Pre0: " + Long.toHexString(EMPTY_SKETCH_TEST)); } diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java index ea23b4033..6758c98e2 100644 --- a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java +++ b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java @@ -158,7 +158,7 @@ static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short s private static final void validateInputSize(final int reqBytesIn, final int segCap) { if (reqBytesIn > segCap) { throw new SketchesArgumentException( - "Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn + "Input MemorySegment or byte[] size is too small: Required Bytes: " + reqBytesIn + ", bytesIn: " + segCap); } } diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java index 7450253cc..b534ee8e2 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java @@ -37,7 +37,7 @@ import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountNotIncremented; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; -import static org.apache.datasketches.thetacommon.HashOperations.STRIDE_MASK; +import static org.apache.datasketches.thetacommon2.HashOperations.STRIDE_MASK; import java.lang.foreign.MemorySegment; import java.util.Objects; @@ -45,8 +45,8 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * This sketch uses the diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java index 1c0cbb0cc..d7040841c 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java @@ -24,7 +24,7 @@ import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; import static org.apache.datasketches.theta2.CompactOperations.isSingleItem; -import static org.apache.datasketches.theta2.CompactOperations.loadCompactMemory; +import static org.apache.datasketches.theta2.CompactOperations.loadCompactMemorySegment; import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; @@ -133,7 +133,7 @@ short getSeedHash() { return seedHash_; } - //use of Memory is convenient. The byteArray and Memory are loaded simultaneously. + //use of a MemorySegment is convenient. The byteArray and MemorySegment are loaded simultaneously. @Override public byte[] toByteArray() { final int bytes = getCurrentBytes(); @@ -145,7 +145,7 @@ public byte[] toByteArray() { final byte flags = (byte) (emptyBit | READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | orderedBit | singleItemBit); final int preLongs = getCompactPreambleLongs(); - loadCompactMemory(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), + loadCompactMemorySegment(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), dstSeg, flags, preLongs); return byteArray; } diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java index 720f68ea5..abba02515 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java @@ -36,14 +36,14 @@ import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; +import static org.apache.datasketches.thetacommon2.QuickSelect.selectExcludingZeros; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java index 87737bfa8..b50c25af0 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java @@ -39,7 +39,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The parent class for Heap Updatable Theta Sketches. diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java index 2dd82bce0..143a5a1fb 100644 --- a/src/main/java/org/apache/datasketches/theta2/Intersection.java +++ b/src/main/java/org/apache/datasketches/theta2/Intersection.java @@ -33,7 +33,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The API for intersection operations @@ -148,7 +148,7 @@ protected static int getMaxLgArrLongs(final MemorySegment dstSeg) { return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3); } - protected static void checkMinSizeMemory(final MemorySegment seg) { + protected static void checkMinSizeMemorySegment(final MemorySegment seg) { final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << ThetaUtil.MIN_LG_ARR_LONGS);//280 final long cap = seg.byteSize(); if (cap < minBytes) { diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java index 855c1d953..e16faa6e8 100644 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -52,11 +52,11 @@ import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; import static org.apache.datasketches.theta2.PreambleUtil.setEmpty; -import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemory; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnlyMemorySegment; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.minLgHashTableSize; import java.lang.foreign.MemorySegment; import java.util.Arrays; @@ -66,7 +66,7 @@ import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Intersection operation for Theta Sketches. @@ -105,7 +105,7 @@ protected IntersectionImpl(final MemorySegment wseg, final long seed, final bool if (wseg != null) { wseg_ = wseg; if (dstSegFlag) { //DstSeg: compute & store seedHash, no seedHash checking - checkMinSizeMemory(wseg); + checkMinSizeMemorySegment(wseg); maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap seedHash_ = ThetaUtil.computeSeedHash(seed); wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); @@ -496,7 +496,7 @@ private void moveDataToTgt(final long[] arr, final int count) { for (int i = 0; i < arrLongsIn; i++ ) { final long hashIn = arr[i]; if (continueCondition(thetaLong, hashIn)) { continue; } - hashInsertOnlyMemory(wseg_, lgArrLongs, hashIn, preBytes); + hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hashIn, preBytes); tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough @@ -521,7 +521,7 @@ private void moveDataToTgt(final Sketch sketch) { while (it.next()) { final long hash = it.get(); if (continueCondition(thetaLong, hash)) { continue; } - hashInsertOnlyMemory(wseg_, lgArrLongs, hash, preBytes); + hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hash, preBytes); tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough diff --git a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java index de5fff58c..2ecdf18a0 100644 --- a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java +++ b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java @@ -22,11 +22,11 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA; -import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA; -import static org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA; +import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA; +import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA; +import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Jaccard similarity of two Theta Sketches. diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemorySegmentCompactCompressedHashIterator.java similarity index 95% rename from src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java rename to src/main/java/org/apache/datasketches/theta2/MemorySegmentCompactCompressedHashIterator.java index 11d0168a0..89804f63b 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemoryCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/MemorySegmentCompactCompressedHashIterator.java @@ -30,7 +30,7 @@ /* * This is to uncompress serial version 4 sketch incrementally */ -final class MemoryCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus { +final class MemorySegmentCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus { private MemorySegment seg; private int offset; private int entryBits; @@ -43,7 +43,7 @@ final class MemoryCompactCompressedHashIterator implements HashIterator, MemoryS private boolean isBlockMode; private boolean isFirstUnpack1; - MemoryCompactCompressedHashIterator( + MemorySegmentCompactCompressedHashIterator( final MemorySegment srcSeg, final int offset, final int entryBits, diff --git a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta2/MemorySegmentHashIterator.java similarity index 90% rename from src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java rename to src/main/java/org/apache/datasketches/theta2/MemorySegmentHashIterator.java index 3022d59ff..aefc44ffd 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemoryHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta2/MemorySegmentHashIterator.java @@ -26,7 +26,7 @@ /** * @author Lee Rhodes */ -final class MemoryHashIterator implements HashIterator { +final class MemorySegmentHashIterator implements HashIterator { private MemorySegment seg; private int arrLongs; private long thetaLong; @@ -34,7 +34,7 @@ final class MemoryHashIterator implements HashIterator { private int index; private long hash; - MemoryHashIterator(final MemorySegment srcSeg, final int arrLongs, final long thetaLong) { + MemorySegmentHashIterator(final MemorySegment srcSeg, final int arrLongs, final long thetaLong) { this.seg = srcSeg; this.arrLongs = arrLongs; this.thetaLong = thetaLong; diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java index 931e3ccfe..403ba704c 100644 --- a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java @@ -34,7 +34,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; //@formatter:off @@ -227,12 +227,12 @@ static String preambleToString(final byte[] byteArr) { } /** - * Returns a human readable string summary of the preamble state of the given Memory. - * Note: other than making sure that the given Memory size is large + * Returns a human readable string summary of the preamble state of the given MemorySegment. + * Note: other than making sure that the given MemorySegment size is large * enough for just the preamble, this does not do much value checking of the contents of the * preamble as this is primarily a tool for debugging the preamble visually. * - * @param seg the given Memory. + * @param seg the given MemorySegment. * @return the summary preamble string. */ static String preambleToString(final MemorySegment seg) { @@ -498,7 +498,7 @@ static boolean isEmptyFlag(final MemorySegment seg) { } /** - * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. + * Checks MemorySegment for capacity to hold the preamble and returns the extracted preLongs. * @param seg the given MemorySegment * @return the extracted prelongs value. */ @@ -523,7 +523,7 @@ static final short checkSegmentSeedHash(final MemorySegment seg, final long seed private static void throwNotBigEnough(final long cap, final int required) { throw new SketchesArgumentException( - "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap + "Possible Corruption: Size of byte array or MemorySegment not large enough: Size: " + cap + ", Required: " + required); } diff --git a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java index 5e92447f9..b436a4e19 100644 --- a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java @@ -28,21 +28,21 @@ import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; +import static org.apache.datasketches.thetacommon2.QuickSelect.selectExcludingZeros; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon2.HashOperations; /** - * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments. - * + * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments. + * *

    NOTE: These operations copy data from the input MemorySegment into local arrays, perform the required operations on the * arrays, and then copies the result to the destination MemorySegment. Attempting to perform these operations directly on the - * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are + * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are * vectorized at the machine level and are quite fast. Measurements reveal that this is a good tradeoff.

    - * + * * @author Lee Rhodes */ final class Rebuilder { @@ -87,7 +87,7 @@ static final void quickSelectAndRebuild(final MemorySegment seg, final int pream * Moves me (the entire updatable sketch) to a new larger MemorySegment location and rebuilds the hash table. * This assumes a MemorySegment preamble of standard form with the correct value of thetaLong. * Afterwards, the caller must update the local MemorySegment reference, lgArrLongs - * and hashTableThreshold from the dstMemory and free the source MemorySegment. + * and hashTableThreshold from the destination MemorySegment and free the source MemorySegment. * * @param srcSeg the source MemorySegment * @param preambleLongs size of preamble in longs @@ -124,7 +124,7 @@ static final void moveAndResize(final MemorySegment srcSeg, final int preambleLo * Resizes existing hash array into a larger one within a single MemorySegment, assuming enough space. * This assumes a preamble of standard form with the correct value of thetaLong. * The lgArrLongs will change. - * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold + * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold * from the returned MemorySegment. * * @param seg the source and destination MemorySegment @@ -137,21 +137,21 @@ static final void resize(final MemorySegment seg, final int preambleLongs, //Preamble stays in place final int preBytes = preambleLongs << 3; - + //Bulk copy source to on-heap buffer final int srcHTLen = 1 << srcLgArrLongs; //current value final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer //seg.getLongArray(preBytes, srcHTArr, 0, srcHTLen); MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); - + //Create destination on-heap buffer final int dstHTLen = 1 << tgtLgArrLongs; final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer - + //Rebuild hash table in destination buffer HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, extractThetaLong(seg)); - - //Bulk copy to destination memory + + //Bulk copy to destination segment MemorySegment.copy(dstHTArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); insertLgArrLongs(seg, tgtLgArrLongs); //update in mem } @@ -159,7 +159,7 @@ static final void resize(final MemorySegment seg, final int preambleLongs, /** * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be * an integer value between zero and the given lgRF, inclusive; - * @param capBytes the current memory capacity in bytes + * @param capBytes the current MemorySegment capacity in bytes * @param lgArrLongs the current lg hash table size in longs * @param preLongs the current preamble size in longs * @param lgRF the configured lg Resize Factor diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java index 55dda17b0..795712dca 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java @@ -31,7 +31,7 @@ import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; //import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The parent API for all Set Operations diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java index 5a05a7d1f..c57eaddc6 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java @@ -28,7 +28,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * For building a new SetOperation. @@ -45,11 +45,11 @@ public final class SetOperationBuilder { * Constructor for building a new SetOperation. The default configuration is *
      *
    • Max Nominal Entries (max K): - * {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
    • - *
    • Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
    • + * {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_NOMINAL_ENTRIES} + *
    • Seed: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}
    • *
    • {@link ResizeFactor#X8}
    • *
    • Input Sampling Probability: 1.0
    • - *
    • Memory: null
    • + *
    • MemorySegment: null
    • *
    */ public SetOperationBuilder() { @@ -168,7 +168,7 @@ public SetOperation build(final Family family) { /** * Returns a SetOperation with the current configuration of this Builder, the given Family - * and the given destination memory. Note that the destination MemorySegment cannot be used with AnotB. + * and the given destination MemorySegment. Note that the destination MemorySegment cannot be used with AnotB. * @param family the chosen SetOperation family * @param dstSeg The destination MemorySegment. * @return a SetOperation @@ -243,8 +243,8 @@ public Intersection buildIntersection() { /** * Convenience method, returns a configured SetOperation Intersection with * Default Nominal Entries - * and the given destination memory. - * @param dstSeg The destination Memory. + * and the given destination MemorySegment. + * @param dstSeg The destination MemorySegment. * @return an Intersection object */ public Intersection buildIntersection(final MemorySegment dstSeg) { diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java index 99d2dd71b..8c92766d1 100644 --- a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java @@ -34,7 +34,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * A CompactSketch that holds only one item hash. diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java index bec936a8c..b915ec6ba 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -30,15 +30,15 @@ import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.thetacommon.HashOperations.count; +import static org.apache.datasketches.thetacommon2.HashOperations.count; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.BinomialBoundsN; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.BinomialBoundsN; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The top-level class for all theta sketches. This class is never constructed directly. @@ -74,7 +74,7 @@ public static Sketch heapify(final MemorySegment srcSeg) { if (family == Family.COMPACT) { return CompactSketch.heapify(srcSeg); } - return heapifyUpdateFromMemory(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUpdateFromMemorySegment(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); } /** @@ -97,7 +97,7 @@ public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed if (family == Family.COMPACT) { return CompactSketch.heapify(srcSeg, expectedSeed); } - return heapifyUpdateFromMemory(srcSeg, expectedSeed); + return heapifyUpdateFromMemorySegment(srcSeg, expectedSeed); } /** @@ -113,7 +113,7 @@ public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed * *

    Wrapping any subclass of this class that is empty or contains only a single item will * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

    + * This is actually faster and consumes less overall space.

    * *

    For Update Sketches this method checks if the * Default Update Seed

    @@ -158,7 +158,7 @@ public static Sketch wrap(final MemorySegment srcSeg) { * *

    Wrapping any subclass of this class that is empty or contains only a single item will * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

    + * This is actually faster and consumes less overall space.

    * *

    For Update and Compact Sketches this method checks if the given expectedSeed was used to * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

    @@ -166,7 +166,7 @@ public static Sketch wrap(final MemorySegment srcSeg) { * @param srcSeg a MemorySegment with an image of a Sketch. * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a UpdateSketch backed by the given Memory except as above. + * @return a UpdateSketch backed by the given MemorySegment except as above. */ public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) { final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; @@ -595,10 +595,10 @@ static final boolean isValidSketchID(final int id) { } /** - * Checks Ordered and Compact flags for integrity between sketch and Memory + * Checks Ordered and Compact flags for integrity between sketch and a MemorySegment * @param sketch the given sketch */ - static final void checkSketchAndMemoryFlags(final Sketch sketch) { + static final void checkSketchAndMemorySegmentFlags(final Sketch sketch) { final MemorySegment seg = sketch.getMemorySegment(); if (seg == null) { return; } final int flags = PreambleUtil.extractFlags(seg); @@ -639,7 +639,7 @@ private static final boolean estMode(final long thetaLong, final boolean empty) * See Update Hash Seed. * @return a Sketch */ - private static final Sketch heapifyUpdateFromMemory(final MemorySegment srcSeg, final long expectedSeed) { + private static final Sketch heapifyUpdateFromMemorySegment(final MemorySegment srcSeg, final long expectedSeed) { final long cap = srcSeg.byteSize(); if (cap < 8) { throw new SketchesArgumentException( diff --git a/src/main/java/org/apache/datasketches/theta2/Sketches.java b/src/main/java/org/apache/datasketches/theta2/Sketches.java index cbcfac7d0..377345967 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketches.java @@ -34,7 +34,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * This class brings together the common sketch and set operation creation methods and @@ -209,7 +209,7 @@ public static SetOperation heapifySetOperation(final MemorySegment srcSeg) { * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)} * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, * {@code srcSeg} - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, * {@code expectedSeed} * @return {@link SetOperation SetOperation} @@ -302,8 +302,7 @@ public static SetOperation wrapSetOperation(final MemorySegment srcSeg) { * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)} * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed the seed used to validate the given MemorySegment image. - * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(Memory, long)}, - * {@code expectedSeed} + * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code expectedSeed} * @return {@link SetOperation SetOperation} */ public static SetOperation wrapSetOperation(final MemorySegment srcSeg, final long expectedSeed) { @@ -364,7 +363,7 @@ public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg, final lo static void checkIfValidThetaSketch(final MemorySegment srcSeg) { final int fam = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); if (!Sketch.isValidSketchID(fam)) { - throw new SketchesArgumentException("Source Memory not a valid Sketch. Family: " + throw new SketchesArgumentException("Source MemorySegment not a valid Sketch. Family: " + Family.idToFamily(fam).toString()); } } diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java index 1b931b974..68d72b6e9 100644 --- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java @@ -26,7 +26,7 @@ import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta2.PreambleUtil.extractUnionThetaLong; import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; +import static org.apache.datasketches.thetacommon2.QuickSelect.selectExcludingZeros; import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; @@ -34,8 +34,8 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Shared code for the HeapUnion and DirectUnion implementations. @@ -279,7 +279,7 @@ public void union(final Sketch sketchIn) { gadget_.hashUpdate(sketchIn.getCache()[0]); return; } - Sketch.checkSketchAndMemoryFlags(sketchIn); + Sketch.checkSketchAndMemorySegmentFlags(sketchIn); unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule unionEmpty_ = false; diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java index 891792cac..9d7b7a1a5 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java @@ -50,7 +50,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * The parent class for the Update Sketch families, such as QuickSelect and Alpha. @@ -67,7 +67,7 @@ public abstract class UpdateSketch extends Sketch { * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have * been explicitly stored as writable, direct objects can be wrapped. This method assumes the - * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * {@link org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}. * Default Update Seed. * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash. * It must have a size of at least 24 bytes. @@ -111,7 +111,7 @@ public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expected /** * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the - * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * {@link org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}. * @param srcSeg the given MemorySegment with a sketch image. * It must have a size of at least 24 bytes. * @return an UpdateSketch @@ -129,7 +129,7 @@ public static UpdateSketch heapify(final MemorySegment srcSeg) { * @return an UpdateSketch */ public static UpdateSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { - Objects.requireNonNull(srcSeg, "Source Memory must not be null"); + Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); checkBounds(0, 24, srcSeg.byteSize()); //need min 24 bytes final Family family = Family.idToFamily(srcSeg.get(JAVA_BYTE, FAMILY_BYTE)); if (family.equals(Family.ALPHA)) { @@ -415,7 +415,7 @@ else if (family.equals(Family.QUICKSELECT)) { //Check lgNomLongs if (lgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS) { throw new SketchesArgumentException( - "Possible corruption: Current Memory lgNomLongs < min required size: " + "Possible corruption: Current MemorySegment lgNomLongs < min required size: " + lgNomLongs + " < " + ThetaUtil.MIN_LG_NOM_LONGS); } } @@ -448,7 +448,7 @@ static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSee final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); if (curCapBytes < minReqBytes) { throw new SketchesArgumentException( - "Possible corruption: Current Memory size < min required size: " + "Possible corruption: Current MemorySegment size < min required size: " + curCapBytes + " < " + minReqBytes); } //check Theta, p @@ -464,12 +464,12 @@ static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSee } /** - * This checks to see if the memory RF factor was set correctly as early versions may not + * This checks to see if the MemorySegment RF factor was set correctly as early versions may not * have set it. * @param srcSeg the source MemorySegment * @param lgNomLongs the current lgNomLongs * @param lgArrLongs the current lgArrLongs - * @return true if the the memory RF factor is incorrect and the caller can either + * @return true if the the MemorySegment RF factor is incorrect and the caller can either * correct it or throw an error. */ static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgNomLongs, diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java index 0326ceb06..7580f59b4 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java @@ -30,7 +30,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * For building a new UpdateSketch. @@ -54,22 +54,21 @@ public final class UpdateSketchBuilder { /** * Constructor for building a new UpdateSketch. The default configuration is *
      - *
    • Nominal Entries: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
    • - *
    • Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
    • + *
    • Nominal Entries: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
    • + *
    • Seed: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}
    • *
    • Input Sampling Probability: 1.0
    • *
    • Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
    • *
    • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. - * For direct sketches, which are targeted for native memory off the Java heap, this value will + * For direct sketches, which are targeted for off-heap, this value will * be fixed at either {@link ResizeFactor#X1} or {@link ResizeFactor#X2}.
    • - *
    • MemoryRequestServer (Direct only): - * {@link org.apache.datasketches.memory.DefaultMemoryRequestServer}.
    • *
    * Parameters unique to the concurrent sketches only: *
      - *
    • Number of local Nominal Entries: 4
    • *
    • Concurrent NumPoolThreads: 3
    • + *
    • Number of local Nominal Entries: 4
    • *
    • Concurrent PropagateOrderedCompact: true
    • *
    • Concurrent MaxConcurrencyError: 0
    • + *
    • Concurrent MaxNumLocalThreads: 1
    • *
    */ public UpdateSketchBuilder() { @@ -339,7 +338,7 @@ public UpdateSketch build(final MemorySegment dstSeg) { sketch = HeapAlphaSketch.newHeapInstance(bLgNomLongs, bSeed, bP, bRF); } else { - throw new SketchesArgumentException("AlphaSketch cannot be made Direct to Memory."); + throw new SketchesArgumentException("AlphaSketch cannot be backed by a MemorySegment."); } break; } @@ -383,9 +382,9 @@ public UpdateSketch buildShared() { } /** - * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current - * configuration of the Builder and the given destination WritableMemory. If the destination - * WritableMemory is null, this defaults to an on-heap concurrent shared UpdateSketch. + * Returns a concurrent shared UpdateSketch with the current + * configuration of the Builder and the given destination MemorySegment. If the destination + * MemorySegment is null, this defaults to an on-heap concurrent shared UpdateSketch. * *

    The parameters unique to the shared concurrent sketch are: *

      @@ -396,7 +395,7 @@ public UpdateSketch buildShared() { *

      Key parameters that are in common with other Theta sketches: *

        *
      • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
      • - *
      • Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
      • + *
      • Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
      • *
      * * @param dstSeg the given MemorySegment for Direct, otherwise null. diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java index 3ba16c3fa..b2d3a8d4b 100644 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java @@ -25,7 +25,7 @@ import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item. diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java index 09a20b19e..0da98d5fc 100644 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java @@ -22,7 +22,7 @@ import static org.apache.datasketches.common.ByteArrayUtil.getIntLE; import static org.apache.datasketches.common.ByteArrayUtil.getLongLE; import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; -import static org.apache.datasketches.theta2.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta2.CompactOperations.segmentToCompact; import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; @@ -34,7 +34,7 @@ import java.lang.foreign.MemorySegment; import java.util.Arrays; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. @@ -53,7 +53,7 @@ class WrappedCompactSketch extends CompactSketch { } /** - * Wraps the given Memory, which must be a SerVer 3 CompactSketch image. + * Wraps the given byteArray, which must be a SerVer 3 CompactSketch image. * @param bytes representation of serialized compressed compact sketch. * @param seedHash The update seedHash. * See Seed Hash. @@ -68,7 +68,7 @@ static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHas @Override public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - return memoryToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg); + return segmentToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg); } @Override diff --git a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java index 2cdb99a0a..79bff95a1 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java +++ b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java @@ -19,12 +19,9 @@ package org.apache.datasketches.thetacommon; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.Math.max; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import java.lang.foreign.MemorySegment; - import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.Memory; @@ -286,107 +283,6 @@ public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int } while (curProbe != loopIndex); throw new SketchesArgumentException("Key not found and no empty slot in table!"); } - - //With MemorySegment - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment. - * Returns the index if found, -1 if not found. The input MemorySegment may be read only. - * - * @param seg The MemorySegment containing the hash table to search. - * The hash table portion must be a power of 2 in size. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to search for. Must not be zero. - * @param segOffsetBytes offset in the MemorySegment where the hashTable starts - * @return Current probe index if found, -1 if not found. - */ - public static int hashSearchMemory(final MemorySegment seg, final int lgArrLongs, final long hash, - final int segOffsetBytes) { - if (hash == 0) { - throw new SketchesArgumentException("Given hash must not be zero: " + hash); - } - final int arrayMask = (1 << lgArrLongs) - 1; - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - final int loopIndex = curProbe; - do { - final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes; - final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); - if (curArrayHash == EMPTY) { return -1; } - else if (curArrayHash == hash) { return curProbe; } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - return -1; - } - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment. - * This method assumes that the input hash is not a duplicate. - * Useful for rebuilding tables to avoid unnecessary comparisons. - * Returns the index of insertion, which is always positive or zero. - * Throws an exception if table has no empty slot. - * - * @param wseg The writable MemorySegment that contains the hashTable to insert into. - * The size of the hashTable portion must be a power of 2. - * @param lgArrLongs The log_base2(hashTable.length. - * See lgArrLongs. - * @param hash value that must not be zero and will be inserted into the array into an empty slot. - * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts - * @return index of insertion. Always positive or zero. - */ - public static int hashInsertOnlyMemory(final MemorySegment wseg, final int lgArrLongs, - final long hash, final int memOffsetBytes) { - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - // search for duplicate or zero - final int loopIndex = curProbe; - do { - final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); - if (curArrayHash == EMPTY) { - wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); - return curProbe; - } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - throw new SketchesArgumentException("No empty slot in table!"); - } - - /** - * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts - * values directly into a writable MemorySegment. - * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). - * Throws an exception if the value is not found and table has no empty slot. - * - * @param wseg The writable MemorySegment that contains the hashTable to insert into. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to be potentially inserted into an empty slot only if it is not - * a duplicate of any other hash value in the table. It must not be zero. - * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts - * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). - */ - public static int hashSearchOrInsertMemory(final MemorySegment wseg, final int lgArrLongs, - final long hash, final int memOffsetBytes) { - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - // search for duplicate or zero - final int loopIndex = curProbe; - do { - final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); - if (curArrayHash == EMPTY) { - wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); - return ~curProbe; - } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate - // curArrayHash is not a duplicate and not zero, continue searching - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - throw new SketchesArgumentException("Key not found and no empty slot in table!"); - } //Other related methods diff --git a/src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java b/src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java new file mode 100644 index 000000000..233eae34f --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * This class enables the estimation of error bounds given a sample set size, the sampling + * probability theta, the number of standard deviations and a simple noDataSeen flag. This can + * be used to estimate error bounds for fixed threshold sampling as well as the error bounds + * calculations for sketches. + * + * @author Kevin Lang + */ +// BTW, the suffixes "NStar", "NPrimeB", and "NPrimeF" correspond to variables in the formal +// writeup of this scheme. +public final class BinomialBoundsN { + + private BinomialBoundsN() {} + + private static final double[] deltaOfNumSDev = + { + 0.5000000000000000000, // = 0.5 (1 + erf(0) + 0.1586553191586026479, // = 0.5 (1 + erf((-1/sqrt(2)))) + 0.0227502618904135701, // = 0.5 (1 + erf((-2/sqrt(2)))) + 0.0013498126861731796 // = 0.5 (1 + erf((-3/sqrt(2)))) + }; + + // our "classic" bounds, but now with continuity correction + + private static double contClassicLB(final double numSamplesF, final double theta, + final double numSDev) { + final double nHat = (numSamplesF - 0.5) / theta; + final double b = numSDev * Math.sqrt((1.0 - theta) / theta); + final double d = 0.5 * b * Math.sqrt((b * b) + (4.0 * nHat)); + final double center = nHat + (0.5 * (b * b)); + return (center - d); + } + + private static double contClassicUB(final double numSamplesF, final double theta, + final double numSDev) { + final double nHat = (numSamplesF + 0.5) / theta; + final double b = numSDev * Math.sqrt((1.0 - theta) / theta); + final double d = 0.5 * b * Math.sqrt((b * b) + (4.0 * nHat)); + final double center = nHat + (0.5 * (b * b)); + return (center + d); + } + + // This is a special purpose calculator for NStar, using a computational + // strategy inspired by its Bayesian definition. It is only appropriate + // for a very limited set of inputs. However, the procedure computeApproxBinoLB () + // below does in fact only call it for suitably limited inputs. + // Outside of this limited range, two different bad things will happen. + // First, because we are not using logarithms, the values of intermediate + // quantities will exceed the dynamic range of doubles. Second, even if that + // problem were fixed, the running time of this procedure is essentially linear + // in est = (numSamples / p), and that can be Very, Very Big. + + private static long specialNStar(final long numSamplesI, final double p, final double delta) { + final double q, numSamplesF; + double tot, curTerm; + long m; + assertTrue(numSamplesI >= 1); + assertTrue((0.0 < p) && (p < 1.0)); + assertTrue((0.0 < delta) && (delta < 1.0)); + q = 1.0 - p; + numSamplesF = numSamplesI; + // Use a different algorithm if the following isn't true; this one will be too slow, or worse. + assertTrue((numSamplesF / p) < 500.0); + curTerm = Math.pow(p, numSamplesF); // curTerm = posteriorProbability (k, k, p) + assertTrue(curTerm > 1e-100); // sanity check for non-use of logarithms + tot = curTerm; + m = numSamplesI; + while (tot <= delta) { // this test can fail even the first time + curTerm = (curTerm * q * (m)) / ((m + 1) - numSamplesI); + tot += curTerm; + m += 1; + } + // we have reached a state where tot > delta, so back up one + return (m - 1); + } + + // The following procedure has very limited applicability. + // The above remarks about specialNStar() also apply here. + private static long specialNPrimeB(final long numSamplesI, final double p, final double delta) { + final double q, numSamplesF, oneMinusDelta; + double tot, curTerm; + long m; + assertTrue(numSamplesI >= 1); + assertTrue((0.0 < p) && (p < 1.0)); + assertTrue((0.0 < delta) && (delta < 1.0)); + q = 1.0 - p; + oneMinusDelta = 1.0 - delta; + numSamplesF = numSamplesI; + curTerm = Math.pow(p, numSamplesF); // curTerm = posteriorProbability (k, k, p) + assertTrue(curTerm > 1e-100); // sanity check for non-use of logarithms + tot = curTerm; + m = numSamplesI; + while (tot < oneMinusDelta) { + curTerm = (curTerm * q * (m)) / ((m + 1) - numSamplesI); + tot += curTerm; + m += 1; + } + return (m); // don't need to back up + } + + private static long specialNPrimeF(final long numSamplesI, final double p, final double delta) { + // Use a different algorithm if the following isn't true; this one will be too slow, or worse. + assertTrue(((numSamplesI) / p) < 500.0); //A super-small delta could also make it slow. + return (specialNPrimeB(numSamplesI + 1, p, delta)); + } + + // The following computes an approximation to the lower bound of + // a Frequentist confidence interval based on the tails of the Binomial distribution. + private static double computeApproxBinoLB(final long numSamplesI, final double theta, + final int numSDev) { + if (theta == 1.0) { + return (numSamplesI); + } + + else if (numSamplesI == 0) { + return (0.0); + } + + else if (numSamplesI == 1) { + final double delta = deltaOfNumSDev[numSDev]; + final double rawLB = (Math.log(1.0 - delta)) / (Math.log(1.0 - theta)); + return (Math.floor(rawLB)); // round down + } + + else if (numSamplesI > 120) { + // plenty of samples, so gaussian approximation to binomial distribution isn't too bad + final double rawLB = contClassicLB( numSamplesI, theta, numSDev); + return (rawLB - 0.5); // fake round down + } + + // at this point we know 2 <= numSamplesI <= 120 + + else if (theta > (1.0 - 1e-5)) { // empirically-determined threshold + return (numSamplesI); + } + + else if (theta < ((numSamplesI) / 360.0)) { // empirically-determined threshold + // here we use the gaussian approximation, but with a modified "numSDev" + final int index; + final double rawLB; + index = (3 * ((int) numSamplesI)) + (numSDev - 1); + rawLB = contClassicLB(numSamplesI, theta, EquivTables.getLB(index)); + return (rawLB - 0.5); // fake round down + } + + else { // This is the most difficult range to approximate; we will compute an "exact" LB. + // We know that est <= 360, so specialNStar() shouldn't be ridiculously slow. + final double delta = deltaOfNumSDev[numSDev]; + final long nstar = specialNStar(numSamplesI, theta, delta); + return (nstar); // don't need to round + } + } + + // The following computes an approximation to the upper bound of + // a Frequentist confidence interval based on the tails of the Binomial distribution. + private static double computeApproxBinoUB(final long numSamplesI, final double theta, + final int numSDev) { + if (theta == 1.0) { + return (numSamplesI); + } + + else if (numSamplesI == 0) { + final double delta = deltaOfNumSDev[numSDev]; + final double rawUB = (Math.log(delta)) / (Math.log(1.0 - theta)); + return (Math.ceil(rawUB)); // round up + } + + else if (numSamplesI > 120) { + // plenty of samples, so gaussian approximation to binomial distribution isn't too bad + final double rawUB = contClassicUB(numSamplesI, theta, numSDev); + return (rawUB + 0.5); // fake round up + } + + // at this point we know 1 <= numSamplesI <= 120 + + else if (theta > (1.0 - 1e-5)) { // empirically-determined threshold + return (numSamplesI + 1); + } + + else if (theta < ((numSamplesI) / 360.0)) { // empirically-determined threshold + // here we use the gaussian approximation, but with a modified "numSDev" + final int index; + final double rawUB; + index = (3 * ((int) numSamplesI)) + (numSDev - 1); + rawUB = contClassicUB(numSamplesI, theta, EquivTables.getUB(index)); + return (rawUB + 0.5); // fake round up + } + + else { // This is the most difficult range to approximate; we will compute an "exact" UB. + // We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow. + final double delta = deltaOfNumSDev[numSDev]; + final long nprimef = specialNPrimeF(numSamplesI, theta, delta); + return (nprimef); // don't need to round + } + } + + // The following two procedures enforce some extra rules that help + // to prevent the return of bounds that might be confusing to users. + /** + * Returns the approximate lower bound value + * @param numSamples the number of samples in the sample set + * @param theta the sampling probability + * @param numSDev the number of "standard deviations" from the mean for the tail bounds. + * This must be an integer value of 1, 2 or 3. + * @param noDataSeen this is normally false. However, in the case where you have zero samples + * and a theta < 1.0, this flag enables the distinction between a virgin case when no actual + * data has been seen and the case where the estimate may be zero but an upper error bound may + * still exist. + * @return the approximate lower bound value + */ + public static double getLowerBound(final long numSamples, final double theta, final int numSDev, + final boolean noDataSeen) { + //in earlier code numSamples was called numSamplesI + if (noDataSeen) { return 0.0; } + checkArgs(numSamples, theta, numSDev); + final double lb = computeApproxBinoLB(numSamples, theta, numSDev); + final double numSamplesF = numSamples; + final double est = numSamplesF / theta; + return (Math.min(est, Math.max(numSamplesF, lb))); + } + + /** + * Returns the approximate upper bound value + * @param numSamples the number of samples in the sample set + * @param theta the sampling probability + * @param numSDev the number of "standard deviations" from the mean for the tail bounds. + * This must be an integer value of 1, 2 or 3. + * @param noDataSeen this is normally false. However, in the case where you have zero samples + * and a theta < 1.0, this flag enables the distinction between a virgin case when no actual + * data has been seen and the case where the estimate may be zero but an upper error bound may + * still exist. + * @return the approximate upper bound value + */ + public static double getUpperBound(final long numSamples, final double theta, final int numSDev, + final boolean noDataSeen) { + //in earlier code numSamples was called numSamplesI + if (noDataSeen) { return 0.0; } + checkArgs(numSamples, theta, numSDev); + final double ub = computeApproxBinoUB(numSamples, theta, numSDev); + final double numSamplesF = numSamples; + final double est = numSamplesF / theta; + return (Math.max(est, ub)); + } + + //exposed only for test + static void checkArgs(final long numSamples, final double theta, final int numSDev) { + if ((numSDev | (numSDev - 1) | (3 - numSDev) | numSamples) < 0) { + throw new SketchesArgumentException( + "numSDev must only be 1,2, or 3 and numSamples must >= 0: numSDev=" + + numSDev + ", numSamples=" + numSamples); + } + if ((theta < 0.0) || (theta > 1.0)) { + throw new SketchesArgumentException("0.0 < theta <= 1.0: " + theta); + } + } + + private static void assertTrue(final boolean truth) { + assert (truth); + } + +} // end of class "BinomialBoundsN" diff --git a/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java b/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java new file mode 100644 index 000000000..2ae14747c --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; + +import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.theta2.Sketch; + +/** + * This class is used to compute the bounds on the estimate of the ratio B / A, where: + *
        + *
      • A is a Theta Sketch of population PopA.
      • + *
      • B is a Theta Sketch of population PopB that is a subset of A, + * obtained by an intersection of A with some other Theta Sketch C, + * which acts like a predicate or selection clause.
      • + *
      • The estimate of the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
      • + *
      • The Upper Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
      • + *
      • The Lower Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
      • + *
      + * Note: The theta of A cannot be greater than the theta of B. + * If B is formed as an intersection of A and some other set C, + * then the theta of B is guaranteed to be less than or equal to the theta of B. + * + * @author Kevin Lang + * @author Lee Rhodes + */ +public final class BoundsOnRatiosInThetaSketchedSets { + + private BoundsOnRatiosInThetaSketchedSets() {} + + /** + * Gets the approximate lower bound for B over A based on a 95% confidence interval + * @param sketchA the sketch A + * @param sketchB the sketch B + * @return the approximate lower bound for B over A + */ + public static double getLowerBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = (thetaLongB == thetaLongA) + ? sketchA.getRetainedEntries(true) + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate upper bound for B over A based on a 95% confidence interval + * @param sketchA the sketch A + * @param sketchB the sketch B + * @return the approximate upper bound for B over A + */ + public static double getUpperBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = (thetaLongB == thetaLongA) + ? sketchA.getRetainedEntries(true) + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 1.0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); + } + + /** + * Gets the estimate for B over A + * @param sketchA the sketch A + * @param sketchB the sketch B + * @return the estimate for B over A + */ + public static double getEstimateOfBoverA(final Sketch sketchA, final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = (thetaLongB == thetaLongA) + ? sketchA.getRetainedEntries(true) + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0.5; } + + return (double) countB / (double) countA; + } + + static void checkThetas(final long thetaLongA, final long thetaLongB) { + if (thetaLongB > thetaLongA) { + throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); + } + } +} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java b/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java new file mode 100644 index 000000000..1b16a383b --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; + +import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.Summary; + +/** + * This class is used to compute the bounds on the estimate of the ratio B / A, where: + *
        + *
      • A is a Tuple Sketch of population PopA.
      • + *
      • B is a Tuple or Theta Sketch of population PopB that is a subset of A, + * obtained by an intersection of A with some other Tuple or Theta Sketch C, + * which acts like a predicate or selection clause.
      • + *
      • The estimate of the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
      • + *
      • The Upper Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
      • + *
      • The Lower Bound estimate on the ratio PopB/PopA is + * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
      • + *
      + * Note: The theta of A cannot be greater than the theta of B. + * If B is formed as an intersection of A and some other set C, + * then the theta of B is guaranteed to be less than or equal to the theta of B. + * + * @author Kevin Lang + * @author Lee Rhodes + * @author David Cromberge + */ +public final class BoundsOnRatiosInTupleSketchedSets { + + private BoundsOnRatiosInTupleSketchedSets() {} + + /** + * Gets the approximate lower bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Tuple sketch B with summary type S + * @param Summary + * @return the approximate lower bound for B over A + */ + public static double getLowerBoundForBoverA( + final Sketch sketchA, + final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate lower bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Theta sketch B + * @param Summary + * @return the approximate lower bound for B over A + */ + public static double getLowerBoundForBoverA( + final Sketch sketchA, + final org.apache.datasketches.theta2.Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate upper bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Tuple sketch B with summary type S + * @param Summary + * @return the approximate upper bound for B over A + */ + public static double getUpperBoundForBoverA( + final Sketch sketchA, + final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 1.0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); + } + + /** + * Gets the approximate upper bound for B over A based on a 95% confidence interval + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Theta sketch B + * @param Summary + * @return the approximate upper bound for B over A + */ + public static double getUpperBoundForBoverA( + final Sketch sketchA, + final org.apache.datasketches.theta2.Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 1.0; } + final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); + } + + /** + * Gets the estimate for B over A + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Tuple sketch B with summary type S + * @param Summary + * @return the estimate for B over A + */ + public static double getEstimateOfBoverA( + final Sketch sketchA, + final Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0.5; } + + return (double) countB / (double) countA; + } + + /** + * Gets the estimate for B over A + * @param sketchA the Tuple sketch A with summary type S + * @param sketchB the Theta sketch B + * @param Summary + * @return the estimate for B over A + */ + public static double getEstimateOfBoverA( + final Sketch sketchA, + final org.apache.datasketches.theta2.Sketch sketchB) { + final long thetaLongA = sketchA.getThetaLong(); + final long thetaLongB = sketchB.getThetaLong(); + checkThetas(thetaLongA, thetaLongB); + + final int countB = sketchB.getRetainedEntries(true); + final int countA = thetaLongB == thetaLongA + ? sketchA.getRetainedEntries() + : sketchA.getCountLessThanThetaLong(thetaLongB); + + if (countA <= 0) { return 0.5; } + + return (double) countB / (double) countA; + } + + static void checkThetas(final long thetaLongA, final long thetaLongB) { + if (thetaLongB > thetaLongA) { + throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); + } + } +} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java b/src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java new file mode 100644 index 000000000..e824444a4 --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +/** + * Tables for BinomialBoundsN calculations. + * + *

      These equivTables contain modified values for numSDevs that if used + * WHEN THETA IS VERY SMALL will cause the continuity-corrected version + * of our "classic" confidence intervals to be very close to "exact" confidence + * intervals based on the tails of the actual binomial distirbution.

      + * + * @author Kevin Lang + */ +final class EquivTables { + + private EquivTables() {} + + static double getLB(final int index) { + return lbEquivTable[index]; + } + + static double getUB(final int index) { + return ubEquivTable[index]; + } + + private static double[] lbEquivTable = { + 1.0, 2.0, 3.0, // fake values for k = 0 + 0.78733703534118149, 3.14426768537558132, 13.56789685109913535, // k = 1 + 0.94091379266077979, 2.64699271711145911, 6.29302733018320737, // k = 2 + 0.96869128474958188, 2.46531676590527127, 4.97375283467403051, // k = 3 + 0.97933572521046131, 2.37418810664669877, 4.44899975481712318, // k = 4 + 0.98479165917274258, 2.31863116255024693, 4.16712379778553554, // k = 5 + 0.98806033915698777, 2.28075536565225434, 3.99010556144099837, // k = 6 + 0.99021896790580399, 2.25302005857281529, 3.86784477136922078, // k = 7 + 0.99174267079089873, 2.23168103978522936, 3.77784896945266269, // k = 8 + 0.99287147837287648, 2.21465899260871879, 3.70851932988722410, // k = 9 + 0.99373900046805375, 2.20070155496262032, 3.65326029076638292, // k = 10 + 0.99442519013851438, 2.18900651202670815, 3.60803817612955413, // k = 11 + 0.99498066823221620, 2.17903457780744247, 3.57024330407946877, // k = 12 + 0.99543899410224412, 2.17040883161922693, 3.53810982030634591, // k = 13 + 0.99582322541263579, 2.16285726913676513, 3.51039837124298515, // k = 14 + 0.99614973311747690, 2.15617827879603396, 3.48621230377099778, // k = 15 + 0.99643042892560629, 2.15021897666090922, 3.46488605693562590, // k = 16 + 0.99667418783778317, 2.14486114872480016, 3.44591466064832730, // k = 17 + 0.99688774875812669, 2.14001181420209718, 3.42890765690452781, // k = 18 + 0.99707632299691795, 2.13559675336844634, 3.41355809420343803, // k = 19 + 0.99724399084971083, 2.13155592217421486, 3.39962113251016262, // k = 20 + 0.99739400151915447, 2.12784018863251845, 3.38689892877548004, // k = 21 + 0.99752896842633731, 2.12440890875851096, 3.37522975271599535, // k = 22 + 0.99765101725122918, 2.12122815311133195, 3.36448003577621080, // k = 23 + 0.99776189496810730, 2.11826934724291505, 3.35453840911279144, // k = 24 + 0.99786304821586214, 2.11550823850916458, 3.34531123809287578, // k = 25 + 0.99795568665180667, 2.11292409529477254, 3.33671916527694634, // k = 26 + 0.99804083063483517, 2.11049908609763293, 3.32869446834217797, // k = 27 + 0.99811933910984862, 2.10821776918189130, 3.32117898316676019, // k = 28 + 0.99819195457286014, 2.10606671027090897, 3.31412243534683171, // k = 29 + 0.99825930555178388, 2.10403415237001923, 3.30748113008135647, // k = 30 + 0.99832193858154028, 2.10210975877822648, 3.30121691946897045, // k = 31 + 0.99838032666573895, 2.10028440670842542, 3.29529629751144171, // k = 32 + 0.99843488390555990, 2.09855000145353188, 3.28968974413223236, // k = 33 + 0.99848596721417948, 2.09689934193824001, 3.28437111460505093, // k = 34 + 0.99853390005924325, 2.09532599155502908, 3.27931717312372939, // k = 35 + 0.99857895741078551, 2.09382418262592296, 3.27450718840060517, // k = 36 + 0.99862138880970974, 2.09238872751677718, 3.26992261182860489, // k = 37 + 0.99866141580770318, 2.09101494715108061, 3.26554677962434425, // k = 38 + 0.99869923565267982, 2.08969860402822860, 3.26136468165239535, // k = 39 + 0.99873502010169091, 2.08843585627218431, 3.25736275677081721, // k = 40 + 0.99876893292508839, 2.08722321436752623, 3.25352872241415980, // k = 41 + 0.99880111078502409, 2.08605749165553789, 3.24985141664350863, // k = 42 + 0.99883168573342118, 2.08493577529222307, 3.24632068399498053, // k = 43 + 0.99886077231613513, 2.08385540129560809, 3.24292724848112357, // k = 44 + 0.99888847451828155, 2.08281392374021834, 3.23966263299664092, // k = 45 + 0.99891488795844907, 2.08180908991394631, 3.23651906111521726, // k = 46 + 0.99894010085196783, 2.08083882998420222, 3.23348939240611344, // k = 47 + 0.99896419358239541, 2.07990122528650545, 3.23056705515594444, // k = 48 + 0.99898723510594323, 2.07899450946285924, 3.22774598963252402, // k = 49 + 0.99900929266780736, 2.07811704477046533, 3.22502059972006805, // k = 50 + 0.99903043086155208, 2.07726730587160091, 3.22238570890294795, // k = 51 + 0.99905070073845081, 2.07644388314946582, 3.21983651940365689, // k = 52 + 0.99907015770423868, 2.07564546080757850, 3.21736857351049821, // k = 53 + 0.99908884779227947, 2.07487081196367740, 3.21497773796417619, // k = 54 + 0.99910681586905525, 2.07411879634256024, 3.21266015316183484, // k = 55 + 0.99912410177549305, 2.07338834403498140, 3.21041222805715165, // k = 56 + 0.99914074347179849, 2.07267845454973099, 3.20823061166797174, // k = 57 + 0.99915677607464204, 2.07198819052374006, 3.20611216970604573, // k = 58 + 0.99917223149395795, 2.07131667846186929, 3.20405396962596001, // k = 59 + 0.99918714153457699, 2.07066309019154460, 3.20205326110445299, // k = 60 + 0.99920153247185794, 2.07002665203046377, 3.20010746990493544, // k = 61 + 0.99921543193525508, 2.06940663431663552, 3.19821417453343315, // k = 62 + 0.99922886570365677, 2.06880235245998279, 3.19637109973109546, // k = 63 + 0.99924185357357942, 2.06821315729285971, 3.19457610621114441, // k = 64 + 0.99925441845175555, 2.06763843812092318, 3.19282717869864996, // k = 65 + 0.99926658263325407, 2.06707761824370095, 3.19112241228646099, // k = 66 + 0.99927836173816331, 2.06653015295219689, 3.18946001739936946, // k = 67 + 0.99928977431994781, 2.06599552505539918, 3.18783829446098821, // k = 68 + 0.99930083753795884, 2.06547324585920933, 3.18625564538041317, // k = 69 + 0.99931156864562354, 2.06496285191821016, 3.18471055124089730, // k = 70 + 0.99932197985521043, 2.06446390392778767, 3.18320157510865442, // k = 71 + 0.99933208559809827, 2.06397598606787369, 3.18172735837393361, // k = 72 + 0.99934190032416836, 2.06349869971447220, 3.18028661102792398, // k = 73 + 0.99935143390791836, 2.06303166975550312, 3.17887810481605015, // k = 74 + 0.99936070171270330, 2.06257453607466346, 3.17750067581857820, // k = 75 + 0.99936971103502970, 2.06212696042919674, 3.17615321728274580, // k = 76 + 0.99937847392385493, 2.06168861430600714, 3.17483467831510779, // k = 77 + 0.99938700168914352, 2.06125918927764928, 3.17354405480557489, // k = 78 + 0.99939530099953799, 2.06083838987589729, 3.17228039269048168, // k = 79 + 0.99940338278830154, 2.06042593411496000, 3.17104278166036124, // k = 80 + 0.99941125463777780, 2.06002155276328835, 3.16983035274597569, // k = 81 + 0.99941892470027938, 2.05962498741951094, 3.16864227952240185, // k = 82 + 0.99942640059737187, 2.05923599161263837, 3.16747776846497686, // k = 83 + 0.99943368842187397, 2.05885433061945378, 3.16633606416374391, // k = 84 + 0.99944079790603269, 2.05847977868873500, 3.16521644518826406, // k = 85 + 0.99944773295734990, 2.05811212058944193, 3.16411821883858124, // k = 86 + 0.99945450059186669, 2.05775114781260982, 3.16304072400711789, // k = 87 + 0.99946110646314423, 2.05739666442039493, 3.16198332650733960, // k = 88 + 0.99946755770463369, 2.05704847678819647, 3.16094541781455973, // k = 89 + 0.99947385746861528, 2.05670640500335367, 3.15992641851471490, // k = 90 + 0.99948001256305474, 2.05637027420314666, 3.15892576988736096, // k = 91 + 0.99948602689656241, 2.05603991286400856, 3.15794293484717059, // k = 92 + 0.99949190674294641, 2.05571516158917689, 3.15697740043813724, // k = 93 + 0.99949765436329585, 2.05539586490317561, 3.15602867309343083, // k = 94 + 0.99950327557880314, 2.05508187237845164, 3.15509627710042651, // k = 95 + 0.99950877461972709, 2.05477304104951486, 3.15417975753007340, // k = 96 + 0.99951415481862682, 2.05446923022574879, 3.15327867462917766, // k = 97 + 0.99951942042375208, 2.05417030908833453, 3.15239260700215596, // k = 98 + 0.99952457390890004, 2.05387614661762541, 3.15152114915238712, // k = 99 + 0.99952962005008317, 2.05358662050909402, 3.15066390921020911, // k = 100 + 0.99953456216121594, 2.05330161104427589, 3.14982051097524618, // k = 101 + 0.99953940176368405, 2.05302100378725072, 3.14899059183684926, // k = 102 + 0.99954414373920031, 2.05274468493067275, 3.14817379948561893, // k = 103 + 0.99954879047621148, 2.05247255013657082, 3.14736979964868624, // k = 104 + 0.99955334485656522, 2.05220449388099269, 3.14657826610371671, // k = 105 + 0.99955780993869325, 2.05194041831310869, 3.14579888316276879, // k = 106 + 0.99956218652590678, 2.05168022402710903, 3.14503134811607765, // k = 107 + 0.99956647932785359, 2.05142381889103831, 3.14427536967733090, // k = 108 + 0.99957069025060719, 2.05117111251445294, 3.14353066260227365, // k = 109 + 0.99957482032178291, 2.05092201793428330, 3.14279695558593630, // k = 110 + 0.99957887261450651, 2.05067645094720774, 3.14207398336887422, // k = 111 + 0.99958284988383639, 2.05043432833224415, 3.14136149076028914, // k = 112 + 0.99958675435604505, 2.05019557189746138, 3.14065923143530767, // k = 113 + 0.99959058650074439, 2.04996010556124020, 3.13996696426707445, // k = 114 + 0.99959434898201494, 2.04972785368377686, 3.13928445867830419, // k = 115 + 0.99959804437042976, 2.04949874512311681, 3.13861149103462367, // k = 116 + 0.99960167394553423, 2.04927271043337100, 3.13794784369528656, // k = 117 + 0.99960523957651048, 2.04904968140490951, 3.13729330661277572, // k = 118 + 0.99960874253329735, 2.04882959397491504, 3.13664767767019725, // k = 119 + 0.99961218434327748, 2.04861238220240693, 3.13601075688413289 // k = 120 + }; + + private static double[] ubEquivTable = { + 1.0, 2.0, 3.0, // fake values for k = 0 + 0.99067760836669549, 1.75460517119302040, 2.48055626001627161, // k = 1 + 0.99270518097577565, 1.78855957509907171, 2.53863835259832626, // k = 2 + 0.99402032633599902, 1.81047286499563143, 2.57811676180597260, // k = 3 + 0.99492607629539975, 1.82625928017762362, 2.60759550546498531, // k = 4 + 0.99558653966013821, 1.83839160339161367, 2.63086812358551470, // k = 5 + 0.99608981951632813, 1.84812399034444752, 2.64993712523727254, // k = 6 + 0.99648648035983456, 1.85617372053235385, 2.66598485907860550, // k = 7 + 0.99680750790483330, 1.86298655802610824, 2.67976541374471822, // k = 8 + 0.99707292880049181, 1.86885682585270274, 2.69178781407745760, // k = 9 + 0.99729614928489241, 1.87398826101983218, 2.70241106542158604, // k = 10 + 0.99748667952445658, 1.87852708449801753, 2.71189717290596377, // k = 11 + 0.99765127712748836, 1.88258159501103250, 2.72044290303773550, // k = 12 + 0.99779498340305395, 1.88623391878036273, 2.72819957382063194, // k = 13 + 0.99792160418357412, 1.88954778748873764, 2.73528576807902368, // k = 14 + 0.99803398604944960, 1.89257337682371940, 2.74179612106766513, // k = 15 + 0.99813449883217231, 1.89535099316557876, 2.74780718300419835, // k = 16 + 0.99822494122659577, 1.89791339232732525, 2.75338173141955167, // k = 17 + 0.99830679915913834, 1.90028752122407241, 2.75857186416826039, // k = 18 + 0.99838117410831728, 1.90249575897183831, 2.76342117562634826, // k = 19 + 0.99844913407071090, 1.90455689090418900, 2.76796659454200267, // k = 20 + 0.99851147736424650, 1.90648682834171268, 2.77223944710058845, // k = 21 + 0.99856879856019987, 1.90829917277082473, 2.77626682032629901, // k = 22 + 0.99862183849734265, 1.91000561415842185, 2.78007199816156003, // k = 23 + 0.99867096266018507, 1.91161621560812023, 2.78367524259661536, // k = 24 + 0.99871656986212543, 1.91313978579765376, 2.78709435016625662, // k = 25 + 0.99875907577771272, 1.91458400425526065, 2.79034488416175463, // k = 26 + 0.99879885565047744, 1.91595563175945927, 2.79344064132371273, // k = 27 + 0.99883610756373287, 1.91726064301425936, 2.79639384757751941, // k = 28 + 0.99887095169674467, 1.91850441099725799, 2.79921543574803877, // k = 29 + 0.99890379414739527, 1.91969155477030995, 2.80191513182441554, // k = 30 + 0.99893466279047516, 1.92082633358913313, 2.80450167352080371, // k = 31 + 0.99896392088177777, 1.92191254955568525, 2.80698295731653502, // k = 32 + 0.99899147889385631, 1.92295362479495680, 2.80936614404217266, // k = 33 + 0.99901764688726757, 1.92395267400968351, 2.81165765979318394, // k = 34 + 0.99904238606342233, 1.92491244978191389, 2.81386337393604435, // k = 35 + 0.99906590152386343, 1.92583552644848055, 2.81598868034527072, // k = 36 + 0.99908829040739988, 1.92672418013918900, 2.81803841726804194, // k = 37 + 0.99910959420023460, 1.92758051694144683, 2.82001709302821268, // k = 38 + 0.99912996403594434, 1.92840654943159961, 2.82192875763732332, // k = 39 + 0.99914930224576892, 1.92920397044028391, 2.82377730628954282, // k = 40 + 0.99916781270195543, 1.92997447498220254, 2.82556612075063640, // k = 41 + 0.99918553179077207, 1.93071949211818605, 2.82729843191989971, // k = 42 + 0.99920250730914972, 1.93144048613876862, 2.82897728689417249, // k = 43 + 0.99921873345181211, 1.93213870990595638, 2.83060537017752267, // k = 44 + 0.99923435180002684, 1.93281536508689555, 2.83218527795750674, // k = 45 + 0.99924930425362390, 1.93347145882316340, 2.83371938965598247, // k = 46 + 0.99926370394567243, 1.93410820221384938, 2.83520990872793277, // k = 47 + 0.99927750755296074, 1.93472643138986200, 2.83665891945119597, // k = 48 + 0.99929082941537217, 1.93532697329771963, 2.83806833931606661, // k = 49 + 0.99930366295501472, 1.93591074716263734, 2.83943997143404658, // k = 50 + 0.99931598804721489, 1.93647857274021362, 2.84077557836653227, // k = 51 + 0.99932789059798210, 1.93703110239354714, 2.84207662106302905, // k = 52 + 0.99933946180485123, 1.93756904936378760, 2.84334468086129277, // k = 53 + 0.99935053819703512, 1.93809302131219852, 2.84458116874117195, // k = 54 + 0.99936126637970801, 1.93860365411038060, 2.84578731838604426, // k = 55 + 0.99937166229284458, 1.93910149816429112, 2.84696443486512862, // k = 56 + 0.99938169190727422, 1.93958709548454067, 2.84811369085281285, // k = 57 + 0.99939136927613959, 1.94006085573701625, 2.84923617230361970, // k = 58 + 0.99940074328745254, 1.94052339623206649, 2.85033291216254270, // k = 59 + 0.99940993070470086, 1.94097508636855309, 2.85140492437699322, // k = 60 + 0.99941868577388959, 1.94141633372043998, 2.85245314430358121, // k = 61 + 0.99942734443487780, 1.94184757038001976, 2.85347839582286156, // k = 62 + 0.99943556385736088, 1.94226915100517772, 2.85448160365493209, // k = 63 + 0.99944374522542034, 1.94268143723749631, 2.85546346373061510, // k = 64 + 0.99945159955424856, 1.94308482059116727, 2.85642486111805738, // k = 65 + 0.99945915301904620, 1.94347956957849988, 2.85736639994965458, // k = 66 + 0.99946660663832176, 1.94386600964031686, 2.85828887832701639, // k = 67 + 0.99947383703224091, 1.94424436597356021, 2.85919278275500233, // k = 68 + 0.99948075442870277, 1.94461502153473020, 2.86007887186090670, // k = 69 + 0.99948766082269458, 1.94497821937304138, 2.86094774077355396, // k = 70 + 0.99949422748713346, 1.94533411296001191, 2.86179981848076181, // k = 71 + 0.99950070756119658, 1.94568300035135167, 2.86263579405672886, // k = 72 + 0.99950704321753392, 1.94602523449961495, 2.86345610449197352, // k = 73 + 0.99951320334216121, 1.94636083782822311, 2.86426125541271404, // k = 74 + 0.99951920293474927, 1.94669011080745236, 2.86505169255406145, // k = 75 + 0.99952501670378524, 1.94701327348536779, 2.86582788270862920, // k = 76 + 0.99953071209267819, 1.94733044372333097, 2.86659027602854621, // k = 77 + 0.99953632734991515, 1.94764180764266825, 2.86733927778843167, // k = 78 + 0.99954171164873173, 1.94794766430732125, 2.86807526143834934, // k = 79 + 0.99954699274462655, 1.94824807472994621, 2.86879864789403882, // k = 80 + 0.99955216611081710, 1.94854317889829076, 2.86950970901679625, // k = 81 + 0.99955730019613043, 1.94883320227168610, 2.87020887436986527, // k = 82 + 0.99956213770650493, 1.94911826561721568, 2.87089648477021342, // k = 83 + 0.99956704264963037, 1.94939848545763539, 2.87157281693902178, // k = 84 + 0.99957166306481327, 1.94967401618316671, 2.87223821840905202, // k = 85 + 0.99957632713136491, 1.94994497791333288, 2.87289293193450135, // k = 86 + 0.99958087233392234, 1.95021155752212394, 2.87353731228213860, // k = 87 + 0.99958532555996271, 1.95047376805584349, 2.87417154907075201, // k = 88 + 0.99958956246481989, 1.95073180380688882, 2.87479599765507032, // k = 89 + 0.99959389351869277, 1.95098572880579013, 2.87541081987382086, // k = 90 + 0.99959807862052230, 1.95123574036898617, 2.87601637401948551, // k = 91 + 0.99960214057801977, 1.95148186921983324, 2.87661283691068093, // k = 92 + 0.99960607527256684, 1.95172415829728152, 2.87720042968334155, // k = 93 + 0.99960996433179616, 1.95196280898670693, 2.87777936649376898, // k = 94 + 0.99961379137860717, 1.95219787713926962, 2.87834989933620022, // k = 95 + 0.99961756088146103, 1.95242944583677058, 2.87891216133900230, // k = 96 + 0.99962125605327401, 1.95265762420910960, 2.87946647367488140, // k = 97 + 0.99962486179100551, 1.95288245314810638, 2.88001290210658567, // k = 98 + 0.99962843240297161, 1.95310404286672679, 2.88055166523392359, // k = 99 + 0.99963187276145504, 1.95332251980147475, 2.88108300006589957, // k = 100 + 0.99963525453173929, 1.95353785898848287, 2.88160703591438505, // k = 101 + 0.99963855412988778, 1.95375019354571577, 2.88212393551896184, // k = 102 + 0.99964190254169694, 1.95395953472205974, 2.88263389761985422, // k = 103 + 0.99964506565942202, 1.95416607430155409, 2.88313700661564098, // k = 104 + 0.99964834424233118, 1.95436972855640079, 2.88363350163803034, // k = 105 + 0.99965136548857458, 1.95457068540693513, 2.88412349413960101, // k = 106 + 0.99965436594726498, 1.95476896383092935, 2.88460710620208260, // k = 107 + 0.99965736463468602, 1.95496457504532373, 2.88508450078833789, // k = 108 + 0.99966034130443404, 1.95515761150707590, 2.88555580586194083, // k = 109 + 0.99966326130828520, 1.95534810382198998, 2.88602118761679094, // k = 110 + 0.99966601446035952, 1.95553622237747504, 2.88648066384146773, // k = 111 + 0.99966887679593697, 1.95572186728168163, 2.88693444915907094, // k = 112 + 0.99967161286551232, 1.95590523410490391, 2.88738271495714116, // k = 113 + 0.99967435412270333, 1.95608626483223702, 2.88782540459769166, // k = 114 + 0.99967701261934394, 1.95626497627117146, 2.88826277189363623, // k = 115 + 0.99967963265157778, 1.95644153684824573, 2.88869486674335008, // k = 116 + 0.99968216317182623, 1.95661589936000269, 2.88912184353694101, // k = 117 + 0.99968479674396349, 1.95678821614791332, 2.88954376359643561, // k = 118 + 0.99968729031337489, 1.95695842061650183, 2.88996069422501023, // k = 119 + 0.99968963358631413, 1.95712651709766305, 2.89037285320668502 // k = 120 + }; + +} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java new file mode 100644 index 000000000..57b8f1ab2 --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.Math.max; +import static org.apache.datasketches.common.Util.ceilingPowerOf2; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; + +/** + * Helper class for the common hash table methods. + * + * @author Lee Rhodes + * @author Kevin Lang + */ +public final class HashOperations { + private static final int STRIDE_HASH_BITS = 7; + private static final int EMPTY = 0; + + /** + * The stride mask for the Open Address, Double Hashing (OADH) hash table algorithm. + */ + public static final int STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1; + + private HashOperations() {} + + //Make odd and independent of index assuming lgArrLongs lowest bits of the hash were used for + // index. This results in a 8 bit value that is always odd. + private static int getStride(final long hash, final int lgArrLongs) { + return (2 * (int) ((hash >>> lgArrLongs) & STRIDE_MASK) ) + 1; + } + + //ON-HEAP + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for on-heap. + * Returns the index if found, -1 if not found. + * + * @param hashTable The hash table to search. Its size must be a power of 2. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to search for. It must not be zero. + * @return Current probe index if found, -1 if not found. + */ + public static int hashSearch(final long[] hashTable, final int lgArrLongs, final long hash) { + if (hash == 0) { + throw new SketchesArgumentException("Given hash must not be zero: " + hash); + } + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + + // search for duplicate or empty slot + final int loopIndex = curProbe; + do { + final long arrVal = hashTable[curProbe]; + if (arrVal == EMPTY) { + return -1; // not found + } else if (arrVal == hash) { + return curProbe; // found + } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + return -1; + } + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for on-heap. + * This method assumes that the input hash is not a duplicate. + * Useful for rebuilding tables to avoid unnecessary comparisons. + * Returns the index of insertion, which is always positive or zero. + * Throws an exception if the table has no empty slot. + * + * @param hashTable the hash table to insert into. Its size must be a power of 2. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to be potentially inserted into an empty slot. It must not be zero. + * @return index of insertion. Always positive or zero. + */ + public static int hashInsertOnly(final long[] hashTable, final int lgArrLongs, final long hash) { + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + + final long loopIndex = curProbe; + do { + final long arrVal = hashTable[curProbe]; + if (arrVal == EMPTY) { + hashTable[curProbe] = hash; + return curProbe; + } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + throw new SketchesArgumentException("No empty slot in table!"); + } + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for on-heap. + * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). + * Throws an exception if the value is not found and table has no empty slot. + * + * @param hashTable The hash table to insert into. Its size must be a power of 2. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to be potentially inserted into an empty slot only if it is not + * a duplicate of any other hash value in the table. It must not be zero. + * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). + */ + public static int hashSearchOrInsert(final long[] hashTable, final int lgArrLongs, + final long hash) { + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + + // search for duplicate or zero + final int loopIndex = curProbe; + do { + final long arrVal = hashTable[curProbe]; + if (arrVal == EMPTY) { + hashTable[curProbe] = hash; // insert value + return ~curProbe; + } else if (arrVal == hash) { + return curProbe; // found a duplicate + } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + throw new SketchesArgumentException("Hash not found and no empty slots!"); + } + + /** + * Inserts the given long array into the given OADH hashTable of the target size, + * ignores duplicates and counts the values inserted. + * The hash values must not be negative, zero values and values ≥ thetaLong are ignored. + * The given hash table may have values, but they must have been inserted by this method or one + * of the other OADH insert methods in this class. + * This method performs additional checks against potentially invalid hash values or theta values. + * Returns the count of values actually inserted. + * + * @param srcArr the source hash array to be potentially inserted + * @param hashTable The hash table to insert into. Its size must be a power of 2. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param thetaLong The theta value that all input hash values are compared against. + * It must greater than zero. + * See Theta Long + * @return the count of values actually inserted + */ + public static int hashArrayInsert(final long[] srcArr, final long[] hashTable, + final int lgArrLongs, final long thetaLong) { + int count = 0; + final int arrLen = srcArr.length; + checkThetaCorruption(thetaLong); + for (int i = 0; i < arrLen; i++ ) { // scan source array, build target array + final long hash = srcArr[i]; + checkHashCorruption(hash); + if (continueCondition(thetaLong, hash) ) { + continue; + } + if (hashSearchOrInsert(hashTable, lgArrLongs, hash) < 0) { + count++ ; + } + } + return count; + } + + //With MemorySegment + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment. + * Returns the index if found, -1 if not found. The input MemorySegment may be read only. + * + * @param seg The MemorySegment containing the hash table to search. + * The hash table portion must be a power of 2 in size. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to search for. Must not be zero. + * @param segOffsetBytes offset in the MemorySegment where the hashTable starts + * @return Current probe index if found, -1 if not found. + */ + public static int hashSearchMemorySegment(final MemorySegment seg, final int lgArrLongs, final long hash, + final int segOffsetBytes) { + if (hash == 0) { + throw new SketchesArgumentException("Given hash must not be zero: " + hash); + } + final int arrayMask = (1 << lgArrLongs) - 1; + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + final int loopIndex = curProbe; + do { + final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes; + final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); + if (curArrayHash == EMPTY) { return -1; } + else if (curArrayHash == hash) { return curProbe; } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + return -1; + } + + /** + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment. + * This method assumes that the input hash is not a duplicate. + * Useful for rebuilding tables to avoid unnecessary comparisons. + * Returns the index of insertion, which is always positive or zero. + * Throws an exception if table has no empty slot. + * + * @param wseg The writable MemorySegment that contains the hashTable to insert into. + * The size of the hashTable portion must be a power of 2. + * @param lgArrLongs The log_base2(hashTable.length. + * See lgArrLongs. + * @param hash value that must not be zero and will be inserted into the array into an empty slot. + * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts + * @return index of insertion. Always positive or zero. + */ + public static int hashInsertOnlyMemorySegment(final MemorySegment wseg, final int lgArrLongs, + final long hash, final int memOffsetBytes) { + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + // search for duplicate or zero + final int loopIndex = curProbe; + do { + final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); + if (curArrayHash == EMPTY) { + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); + return curProbe; + } + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + throw new SketchesArgumentException("No empty slot in table!"); + } + + /** + * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts + * values directly into a writable MemorySegment. + * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). + * Throws an exception if the value is not found and table has no empty slot. + * + * @param wseg The writable MemorySegment that contains the hashTable to insert into. + * @param lgArrLongs The log_base2(hashTable.length). + * See lgArrLongs. + * @param hash The hash value to be potentially inserted into an empty slot only if it is not + * a duplicate of any other hash value in the table. It must not be zero. + * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts + * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). + */ + public static int hashSearchOrInsertMemorySegment(final MemorySegment wseg, final int lgArrLongs, + final long hash, final int memOffsetBytes) { + final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 + final int stride = getStride(hash, lgArrLongs); + int curProbe = (int) (hash & arrayMask); + // search for duplicate or zero + final int loopIndex = curProbe; + do { + final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); + if (curArrayHash == EMPTY) { + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); + return ~curProbe; + } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate + // curArrayHash is not a duplicate and not zero, continue searching + curProbe = (curProbe + stride) & arrayMask; + } while (curProbe != loopIndex); + throw new SketchesArgumentException("Key not found and no empty slot in table!"); + } + + //Other related methods + + /** + * Checks that the given theta is not negative nor zero. + * @param thetaLong must be greater than zero otherwise throws an exception. + * See Theta Long + */ + public static void checkThetaCorruption(final long thetaLong) { + //if any one of the groups go negative it fails. + if (( thetaLong | (thetaLong - 1) ) < 0L ) { + throw new SketchesStateException( + "Data Corruption: thetaLong was negative or zero: " + "ThetaLong: " + thetaLong); + } + } + + /** + * Checks that the given hash value is not negative. + * @param hash must be greater than -1 otherwise throws an exception. + * Note a hash of zero is normally ignored, but a negative hash is never allowed. + */ + public static void checkHashCorruption(final long hash) { + if ( hash < 0L ) { + throw new SketchesArgumentException( + "Data Corruption: hash was negative: " + "Hash: " + hash); + } + } + + /** + * Return true (continue) if hash is greater than or equal to thetaLong, or if hash == 0, + * or if hash == Long.MAX_VALUE. + * @param thetaLong must be greater than the hash value + * See Theta Long + * @param hash must be less than thetaLong and not less than or equal to zero. + * @return true (continue) if hash is greater than or equal to thetaLong, or if hash == 0, + * or if hash == Long.MAX_VALUE. + */ + public static boolean continueCondition(final long thetaLong, final long hash) { + //if any one of the groups go negative it returns true + return (( (hash - 1L) | (thetaLong - hash - 1L)) < 0L ); + } + + /** + * Converts the given array to a hash table. + * @param hashArr The given array of hashes. Gaps are OK. + * @param count The number of valid hashes in the array + * @param thetaLong Any hashes equal to or greater than thetaLong will be ignored + * @param rebuildThreshold The fill fraction for the hash table forcing a rebuild or resize. + * @return a HashTable + */ + public static long[] convertToHashTable( + final long[] hashArr, + final int count, + final long thetaLong, + final double rebuildThreshold) { + final int lgArrLongs = minLgHashTableSize(count, rebuildThreshold); + final int arrLongs = 1 << lgArrLongs; + final long[] hashTable = new long[arrLongs]; + hashArrayInsert(hashArr, hashTable, lgArrLongs, thetaLong); + return hashTable; + } + + /** + * Returns the smallest log hash table size given the count of items and the rebuild threshold. + * @param count the given count of items + * @param rebuild_threshold the rebuild threshold as a fraction between zero and one. + * @return the smallest log hash table size + */ + public static int minLgHashTableSize(final int count, final double rebuild_threshold) { + final int upperCount = (int) Math.ceil(count / rebuild_threshold); + final int arrLongs = max(ceilingPowerOf2(upperCount), 1 << ThetaUtil.MIN_LG_ARR_LONGS); + final int newLgArrLongs = Integer.numberOfTrailingZeros(arrLongs); + return newLgArrLongs; + } + + /** + * Counts the cardinality of the first Log2 values of the given source array. + * @param srcArr the given source array + * @param lgArrLongs See lgArrLongs + * @param thetaLong See Theta Long + * @return the cardinality + */ + public static int countPart(final long[] srcArr, final int lgArrLongs, final long thetaLong) { + int cnt = 0; + final int len = 1 << lgArrLongs; + for (int i = len; i-- > 0;) { + final long hash = srcArr[i]; + if (continueCondition(thetaLong, hash) ) { + continue; + } + cnt++ ; + } + return cnt; + } + + /** + * Counts the cardinality of the given source array. + * @param srcArr the given source array + * @param thetaLong See Theta Long + * @return the cardinality + */ + public static int count(final long[] srcArr, final long thetaLong) { + int cnt = 0; + final int len = srcArr.length; + for (int i = len; i-- > 0;) { + final long hash = srcArr[i]; + if (continueCondition(thetaLong, hash) ) { + continue; + } + cnt++ ; + } + return cnt; + } + +} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java b/src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java new file mode 100644 index 000000000..e2e80e0a7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +/** + * QuickSelect algorithm improved from Sedgewick. Gets the kth order value + * (1-based or 0-based) from the array. + * Warning! This changes the ordering of elements in the given array!
      + * Also see:
      + * blog.teamleadnet.com/2012/07/quick-select-algorithm-find-kth-element.html
      + * See QuickSelectTest for examples and testNG tests. + * + * @author Lee Rhodes + */ +public final class QuickSelect { + + private QuickSelect() {} + + /** + * Gets the 0-based kth order statistic from the array. Warning! This changes the ordering + * of elements in the given array! + * + * @param arr The array to be re-arranged. + * @param lo The lowest 0-based index to be considered. + * @param hi The highest 0-based index to be considered. + * @param pivot The 0-based index of the value to pivot on. + * @return The value of the smallest (n)th element where n is 0-based. + */ + public static long select(final long[] arr, int lo, int hi, final int pivot) { + while (hi > lo) { + final int j = partition(arr, lo, hi); + if (j == pivot) { + return arr[pivot]; + } + if (j > pivot) { + hi = j - 1; + } + else { + lo = j + 1; + } + } + return arr[pivot]; + } + + /** + * Gets the 1-based kth order statistic from the array including any zero values in the + * array. Warning! This changes the ordering of elements in the given array! + * + * @param arr The hash array. + * @param pivot The 1-based index of the value that is chosen as the pivot for the array. + * After the operation all values below this 1-based index will be less than this value + * and all values above this index will be greater. The 0-based index of the pivot will be + * pivot-1. + * @return The value of the smallest (N)th element including zeros, where N is 1-based. + */ + public static long selectIncludingZeros(final long[] arr, final int pivot) { + final int arrSize = arr.length; + final int adj = pivot - 1; + return select(arr, 0, arrSize - 1, adj); + } + + /** + * Gets the 1-based kth order statistic from the array excluding any zero values in the + * array. Warning! This changes the ordering of elements in the given array! + * + * @param arr The hash array. + * @param nonZeros The number of non-zero values in the array. + * @param pivot The 1-based index of the value that is chosen as the pivot for the array. + * After the operation all values below this 1-based index will be less than this value + * and all values above this index will be greater. The 0-based index of the pivot will be + * pivot+arr.length-nonZeros-1. + * @return The value of the smallest (N)th element excluding zeros, where N is 1-based. + */ + public static long selectExcludingZeros(final long[] arr, final int nonZeros, final int pivot) { + if (pivot > nonZeros) { + return 0L; + } + final int arrSize = arr.length; + final int zeros = arrSize - nonZeros; + final int adjK = (pivot + zeros) - 1; + return select(arr, 0, arrSize - 1, adjK); + } + + /** + * Partition arr[] into arr[lo .. i-1], arr[i], arr[i+1,hi] + * + * @param arr The given array to partition + * @param lo the low index + * @param hi the high index + * @return the next partition value. Ultimately, the desired pivot. + */ + private static int partition(final long[] arr, final int lo, final int hi) { + int i = lo, j = hi + 1; //left and right scan indices + final long v = arr[lo]; //partitioning item value + while (true) { + //Scan right, scan left, check for scan complete, and exchange + while (arr[ ++i] < v) { + if (i == hi) { + break; + } + } + while (v < arr[ --j]) { + if (j == lo) { + break; + } + } + if (i >= j) { + break; + } + final long x = arr[i]; + arr[i] = arr[j]; + arr[j] = x; + } + //put v=arr[j] into position with a[lo .. j-1] <= a[j] <= a[j+1 .. hi] + final long x = arr[lo]; + arr[lo] = arr[j]; + arr[j] = x; + return j; + } + + //For double arrays + + /** + * Gets the 0-based kth order statistic from the array. Warning! This changes the ordering + * of elements in the given array! + * + * @param arr The array to be re-arranged. + * @param lo The lowest 0-based index to be considered. + * @param hi The highest 0-based index to be considered. + * @param pivot The 0-based smallest value to pivot on. + * @return The value of the smallest (n)th element where n is 0-based. + */ + public static double select(final double[] arr, int lo, int hi, final int pivot) { + while (hi > lo) { + final int j = partition(arr, lo, hi); + if (j == pivot) { + return arr[pivot]; + } + if (j > pivot) { + hi = j - 1; + } + else { + lo = j + 1; + } + } + return arr[pivot]; + } + + /** + * Gets the 1-based kth order statistic from the array including any zero values in the + * array. Warning! This changes the ordering of elements in the given array! + * + * @param arr The hash array. + * @param pivot The 1-based index of the value that is chosen as the pivot for the array. + * After the operation all values below this 1-based index will be less than this value + * and all values above this index will be greater. The 0-based index of the pivot will be + * pivot-1. + * @return The value of the smallest (N)th element including zeros, where N is 1-based. + */ + public static double selectIncludingZeros(final double[] arr, final int pivot) { + final int arrSize = arr.length; + final int adj = pivot - 1; + return select(arr, 0, arrSize - 1, adj); + } + + /** + * Gets the 1-based kth order statistic from the array excluding any zero values in the + * array. Warning! This changes the ordering of elements in the given array! + * + * @param arr The hash array. + * @param nonZeros The number of non-zero values in the array. + * @param pivot The 1-based index of the value that is chosen as the pivot for the array. + * After the operation all values below this 1-based index will be less than this value + * and all values above this index will be greater. The 0-based index of the pivot will be + * pivot+arr.length-nonZeros-1. + * @return The value of the smallest (N)th element excluding zeros, where N is 1-based. + */ + public static double selectExcludingZeros(final double[] arr, final int nonZeros, final int pivot) { + if (pivot > nonZeros) { + return 0L; + } + final int arrSize = arr.length; + final int zeros = arrSize - nonZeros; + final int adjK = (pivot + zeros) - 1; + return select(arr, 0, arrSize - 1, adjK); + } + + /** + * Partition arr[] into arr[lo .. i-1], arr[i], arr[i+1,hi] + * + * @param arr The given array to partition + * @param lo the low index + * @param hi the high index + * @return the next partition value. Ultimately, the desired pivot. + */ + private static int partition(final double[] arr, final int lo, final int hi) { + int i = lo, j = hi + 1; //left and right scan indices + final double v = arr[lo]; //partitioning item value + while (true) { + //Scan right, scan left, check for scan complete, and exchange + while (arr[ ++i] < v) { + if (i == hi) { + break; + } + } + while (v < arr[ --j]) { + if (j == lo) { + break; + } + } + if (i >= j) { + break; + } + final double x = arr[i]; + arr[i] = arr[j]; + arr[j] = x; + } + //put v=arr[j] into position with a[lo .. j-1] <= a[j] <= a[j+1 .. hi] + final double x = arr[lo]; + arr[lo] = arr[j]; + arr[j] = x; + return j; + } + +} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java b/src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java new file mode 100644 index 000000000..f559cb9e8 --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.datasketches.common.SketchesArgumentException; + +/** + * Simplifies and speeds up set operations by resolving specific corner cases. + * @author Lee Rhodes + */ + +public class SetOperationCornerCases { + private static final long MAX = Long.MAX_VALUE; + + /** Intersection actions */ + public enum IntersectAction { + /** Degenerate{MinTheta, 0, F} */ + DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Empty{1.0, 0, T */ + EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Intersect */ + FULL_INTERSECT("I", "Full Intersect"); + + private String actionId; + private String actionDescription; + + private IntersectAction(final String actionId, final String actionDescription) { + this.actionId = actionId; + this.actionDescription = actionDescription; + } + + /** + * Gets the Action ID + * @return the actionId + */ + public String getActionId() { + return actionId; + } + + /** + * Gets the Action Description + * @return the actionDescription + */ + public String getActionDescription() { + return actionDescription; + } + } + + /** A not B actions */ + public enum AnotbAction { + /** Sketch A Exact */ + SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ + TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ + DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ + DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Empty{1.0, 0, T} */ + EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full AnotB */ + FULL_ANOTB("N", "Full AnotB"); + + private String actionId; + private String actionDescription; + + private AnotbAction(final String actionId, final String actionDescription) { + this.actionId = actionId; + this.actionDescription = actionDescription; + } + + /** + * Gets the Action ID + * @return the actionId + */ + public String getActionId() { + return actionId; + } + + /** + * Gets the action description + * @return the action description + */ + public String getActionDescription() { + return actionDescription; + } + } + + /** List of union actions */ + public enum UnionAction { + /** Sketch A Exactly */ + SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ + TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Sketch B Exactly */ + SKETCH_B("B", "Sketch B Exactly"), + /** Trim Sketch B by MinTheta */ + TRIM_B("TB", "Trim Sketch B by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ + DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ + DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Degenerate{ThetaB, 0, F} */ + DEGEN_THB_0_F("DB", "Degenerate{ThetaB, 0, F}"), + /** Empty{1.0, 0, T} */ + EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Union */ + FULL_UNION("N", "Full Union"); + + private String actionId; + private String actionDescription; + + private UnionAction(final String actionId, final String actionDescription) { + this.actionId = actionId; + this.actionDescription = actionDescription; + } + + /** + * Gets the action ID + * @return the actionId + */ + public String getActionId() { + return actionId; + } + + /** + * Gets the action description + * @return the actionDescription + */ + public String getActionDescription() { + return actionDescription; + } + } + + /** List of corner cases */ + public enum CornerCase { + /** Empty Empty */ + Empty_Empty(055, "A{ 1.0, 0, T} ; B{ 1.0, 0, T}", + IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.EMPTY_1_0_T), + /** Empty Exact */ + Empty_Exact(056, "A{ 1.0, 0, T} ; B{ 1.0,>0, F}", + IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Estimation */ + Empty_Estimation(052, "A{ 1.0, 0, T} ; B{<1.0,>0, F", + IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Degen */ + Empty_Degen(050, "A{ 1.0, 0, T} ; B{<1.0, 0, F}", + IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.DEGEN_THB_0_F), + + /** Exact Empty */ + Exact_Empty(065, "A{ 1.0,>0, F} ; B{ 1.0, 0, T}", + IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Exact Exact */ + Exact_Exact(066, "A{ 1.0,>0, F} ; B{ 1.0,>0, F}", + IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Estimation */ + Exact_Estimation(062, "A{ 1.0,>0, F} ; B{<1.0,>0, F}", + IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Degen */ + Exact_Degen(060, "A{ 1.0,>0, F} ; B{<1.0, 0, F}", + IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + + /** Estimation_Empty */ + Estimation_Empty(025, "A{<1.0,>0, F} ; B{ 1.0, 0, T}", + IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Estimation_Exact */ + Estimation_Exact(026, "A{<1.0,>0, F} ; B{ 1.0,>0, F}", + IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Estimation */ + Estimation_Estimation(022, "A{<1.0,>0, F} ; B{<1.0,>0, F}", + IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Degen */ + Estimation_Degen(020, "A{<1.0,>0, F} ; B{<1.0, 0, F}", + IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + + /** Degen_Empty */ + Degen_Empty(005, "A{<1.0, 0, F} ; B{ 1.0, 0, T}", + IntersectAction.EMPTY_1_0_T, AnotbAction.DEGEN_THA_0_F, UnionAction.DEGEN_THA_0_F), + /** Degen_Exact */ + Degen_Exact(006, "A{<1.0, 0, F} ; B{ 1.0,>0, F}", + IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_THA_0_F, UnionAction.TRIM_B), + /** Degen_Estimation */ + Degen_Estimation(002, "A{<1.0, 0, F} ; B{<1.0,>0, F}", + IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.TRIM_B), + /** Degen_Degen */ + Degen_Degen(000, "A{<1.0, 0, F} ; B{<1.0, 0, F}", + IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.DEGEN_MIN_0_F); + + private static final Map caseIdToCornerCaseMap = new HashMap<>(); + private int caseId; + private String caseDescription; + private IntersectAction intersectAction; + private AnotbAction anotbAction; + private UnionAction unionAction; + + static { + for (final CornerCase cc : values()) { + caseIdToCornerCaseMap.put(cc.getId(), cc); + } + } + + private CornerCase(final int caseId, final String caseDescription, + final IntersectAction intersectAction, final AnotbAction anotbAction, final UnionAction unionAction) { + this.caseId = caseId; + this.caseDescription = caseDescription; + this.intersectAction = intersectAction; + this.anotbAction = anotbAction; + this.unionAction = unionAction; + } + + /** + * Gets the case ID + * @return the caseId + */ + public int getId() { + return caseId; + } + + /** + * Gets the case description + * @return the caseDescription + */ + public String getCaseDescription() { + return caseDescription; + } + + /** + * Gets the intersect action + * @return the intersectAction + */ + public IntersectAction getIntersectAction() { + return intersectAction; + } + + /** + * Gets the AnotB action + * @return the anotbAction + */ + public AnotbAction getAnotbAction() { + return anotbAction; + } + + /** + * Gets the union action + * @return the unionAction + */ + public UnionAction getUnionAction() { + return unionAction; + } + + //See checkById test in /tuple/MiscTest. + /** + * Converts caseId to CornerCaseId + * @param id the case ID + * @return the Corner Case ID + */ + public static CornerCase caseIdToCornerCase(final int id) { + final CornerCase cc = caseIdToCornerCaseMap.get(id); + if (cc == null) { + throw new SketchesArgumentException("Possible Corruption: Illegal CornerCase ID: " + Integer.toOctalString(id)); + } + return cc; + } + } //end of enum CornerCase + + /** + * Creates the CornerCase ID + * @param thetaLongA the theta of A as a long + * @param countA the count of A + * @param emptyA true if A is empty + * @param thetaLongB the theta of B as a long + * @param countB the count of B + * @param emptyB true if B is empty + * @return the Corner Case ID + */ + public static int createCornerCaseId( + final long thetaLongA, final int countA, final boolean emptyA, + final long thetaLongB, final int countB, final boolean emptyB) { + return (sketchStateId(emptyA, countA, thetaLongA) << 3) | sketchStateId(emptyB, countB, thetaLongB); + } + + /** + * Returns the sketch state ID + * @param isEmpty true if empty + * @param numRetained the number of items retained + * @param thetaLong the value of theta as a long + * @return the sketch state ID + */ + public static int sketchStateId(final boolean isEmpty, final int numRetained, final long thetaLong) { + // assume thetaLong = MAX if empty + return (((thetaLong == MAX) || isEmpty) ? 4 : 0) | ((numRetained > 0) ? 2 : 0) | (isEmpty ? 1 : 0); + } +} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java b/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java new file mode 100644 index 000000000..52dd5c331 --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import static org.apache.datasketches.hash.MurmurHash3.hash; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; + +/** + * Utility methods for the Theta Family of sketches + * @author Lee Rhodes + * + */ +public final class ThetaUtil { + + /** + * The smallest Log2 nom entries allowed: 4. + */ + public static final int MIN_LG_NOM_LONGS = 4; + /** + * The largest Log2 nom entries allowed: 26. + */ + public static final int MAX_LG_NOM_LONGS = 26; + /** + * The hash table rebuild threshold = 15.0/16.0. + */ + public static final double REBUILD_THRESHOLD = 15.0 / 16.0; + /** + * The resize threshold = 0.5; tuned for speed. + */ + public static final double RESIZE_THRESHOLD = 0.5; + /** + * The default nominal entries is provided as a convenience for those cases where the + * nominal sketch size in number of entries is not provided. + * A sketch of 4096 entries has a Relative Standard Error (RSE) of +/- 1.56% at a confidence of + * 68%; or equivalently, a Relative Error of +/- 3.1% at a confidence of 95.4%. + * See Default Nominal Entries + */ + public static final int DEFAULT_NOMINAL_ENTRIES = 4096; + /** + * The seed 9001 used in the sketch update methods is a prime number that + * was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and + * the author cannot prove that this particular seed is somehow superior to other seeds. There + * was some early Internet discussion that a seed of 0 did not produce as clean avalanche diagrams + * as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did + * have some issues. As far as the author can determine, MurmurHash3 does not have these problems. + * + *

      In order to perform set operations on two sketches it is critical that the same hash + * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship + * between the original source key value and the hashed bit string would be violated. Once + * you have developed a history of stored sketches you are stuck with it. + * + *

      WARNING: This seed is used internally by library sketches in different + * packages and thus must be declared public. However, this seed value must not be used by library + * users with the MurmurHash3 function. It should be viewed as existing for exclusive, private + * use by the library. + * + *

      See Default Update Seed + */ + public static final long DEFAULT_UPDATE_SEED = 9001L; + + private ThetaUtil() {} + + /** + * The smallest Log2 cache size allowed: 5. + */ + public static final int MIN_LG_ARR_LONGS = 5; + + /** + * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException. + * @param seedHashA the seedHash A + * @param seedHashB the seedHash B + * @return seedHashA if they are equal + */ + public static short checkSeedHashes(final short seedHashA, final short seedHashB) { + if (seedHashA != seedHashB) { + throw new SketchesArgumentException( + "Incompatible Seed Hashes. " + Integer.toHexString(seedHashA & 0XFFFF) + + ", " + Integer.toHexString(seedHashB & 0XFFFF)); + } + return seedHashA; + } + + /** + * Computes and checks the 16-bit seed hash from the given long seed. + * The seed hash may not be zero in order to maintain compatibility with older serialized + * versions that did not have this concept. + * @param seed See Update Hash Seed + * @return the seed hash. + */ + public static short computeSeedHash(final long seed) { + final long[] seedArr = {seed}; + final short seedHash = (short)(hash(seedArr, 0L)[0] & 0xFFFFL); + if (seedHash == 0) { + throw new SketchesArgumentException( + "The given seed: " + seed + " produced a seedHash of zero. " + + "You must choose a different seed."); + } + return seedHash; + } + + /** + * Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero, + * one or more resize factors. + * + * @param lgTarget Log2 of the target size + * @param lgRF Log_base2 of Resize Factor. + * See Resize Factor + * @param lgMin Log2 of the minimum allowed starting size + * @return The Log2 of the starting size + */ + public static int startingSubMultiple(final int lgTarget, final int lgRF, + final int lgMin) { + return lgTarget <= lgMin ? lgMin : lgRF == 0 ? lgTarget : (lgTarget - lgMin) % lgRF + lgMin; + } + + /** + * Checks that the given nomLongs is within bounds and returns the Log2 of the ceiling power of 2 + * of the given nomLongs. + * @param nomLongs the given number of nominal longs. This can be any value from 16 to + * 67108864, inclusive. + * @return The Log2 of the ceiling power of 2 of the given nomLongs. + */ + public static int checkNomLongs(final int nomLongs) { + final int lgNomLongs = Integer.numberOfTrailingZeros(Util.ceilingPowerOf2(nomLongs)); + if (lgNomLongs > MAX_LG_NOM_LONGS || lgNomLongs < MIN_LG_NOM_LONGS) { + throw new SketchesArgumentException("Nominal Entries must be >= 16 and <= 67108864: " + + nomLongs); + } + return lgNomLongs; + } + +} + diff --git a/src/main/java/org/apache/datasketches/thetacommon2/package-info.java b/src/main/java/org/apache/datasketches/thetacommon2/package-info.java new file mode 100644 index 000000000..46fd1110b --- /dev/null +++ b/src/main/java/org/apache/datasketches/thetacommon2/package-info.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * This package contains common tools and methods for the theta, + * tuple, tuple/* and fdt packages. + */ +package org.apache.datasketches.thetacommon2; diff --git a/src/main/java/org/apache/datasketches/tuple2/AnotB.java b/src/main/java/org/apache/datasketches/tuple2/AnotB.java index 46ff084ae..72a94dfc3 100644 --- a/src/main/java/org/apache/datasketches/tuple2/AnotB.java +++ b/src/main/java/org/apache/datasketches/tuple2/AnotB.java @@ -21,8 +21,8 @@ import static java.lang.Math.min; import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.convertToHashTable; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; import java.lang.reflect.Method; import java.util.Arrays; @@ -30,10 +30,10 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.thetacommon.SetOperationCornerCases; -import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; -import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases.AnotbAction; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Computes a set difference, A-AND-NOT-B, of two generic tuple sketches. diff --git a/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java index 36d7a9b5f..63d1b07ba 100644 --- a/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java @@ -22,7 +22,7 @@ import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.thetacommon.HashOperations.count; +import static org.apache.datasketches.thetacommon2.HashOperations.count; import java.lang.foreign.MemorySegment; import java.lang.reflect.Array; diff --git a/src/main/java/org/apache/datasketches/tuple2/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/HashTables.java index 913c53196..be9ad48e3 100644 --- a/src/main/java/org/apache/datasketches/tuple2/HashTables.java +++ b/src/main/java/org/apache/datasketches/tuple2/HashTables.java @@ -23,12 +23,12 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; import java.lang.reflect.Array; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; @SuppressWarnings("unchecked") class HashTables { diff --git a/src/main/java/org/apache/datasketches/tuple2/Intersection.java b/src/main/java/org/apache/datasketches/tuple2/Intersection.java index 8731df0d4..f3e0dde3c 100644 --- a/src/main/java/org/apache/datasketches/tuple2/Intersection.java +++ b/src/main/java/org/apache/datasketches/tuple2/Intersection.java @@ -26,7 +26,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java index 161478d86..79065453f 100644 --- a/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java +++ b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java @@ -22,12 +22,12 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getEstimateOfBoverA; -import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getLowerBoundForBoverA; -import static org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets2.getUpperBoundForBoverA; +import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA; +import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA; +import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Jaccard similarity of two Tuple Sketches, or alternatively, of a Tuple and Theta Sketch. diff --git a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java index 6823cec3d..bdd376405 100644 --- a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java @@ -26,7 +26,7 @@ import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.common.Util.checkBounds; import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon.HashOperations.count; +import static org.apache.datasketches.thetacommon2.HashOperations.count; import java.lang.foreign.MemorySegment; import java.lang.reflect.Array; @@ -37,9 +37,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.QuickSelect; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.QuickSelect; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * A generic tuple sketch using the QuickSelect algorithm. @@ -189,7 +189,7 @@ private QuickSelectSketch( /* * This private constructor is used to protect against "Finalizer attacks". * The private static inner class Validate performs validation and deserialization - * from the input Memory and may throw exceptions. In order to protect against the attack, we must + * from the input MemorySegment and may throw exceptions. In order to protect against the attack, we must * perform this validation prior to the constructor's super reaches the Object class. * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass. * Using an empty final finalizer() is not recommended and is deprecated as of Java9. @@ -228,7 +228,7 @@ private static final class Validate { long validate( final MemorySegment seg, final SummaryDeserializer deserializer) { - Objects.requireNonNull(seg, "SourceMemory must not be null."); + Objects.requireNonNull(seg, "Source MemorySegment must not be null."); Objects.requireNonNull(deserializer, "Deserializer must not be null."); checkBounds(0, 8, seg.byteSize()); diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketch.java b/src/main/java/org/apache/datasketches/tuple2/Sketch.java index 20315b03d..fc85afb4c 100644 --- a/src/main/java/org/apache/datasketches/tuple2/Sketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/Sketch.java @@ -21,7 +21,7 @@ import static org.apache.datasketches.common.Util.LS; -import org.apache.datasketches.thetacommon.BinomialBoundsN; +import org.apache.datasketches.thetacommon2.BinomialBoundsN; /** * This is an equivalent to org.apache.datasketches.theta2.Sketch with diff --git a/src/main/java/org/apache/datasketches/tuple2/Union.java b/src/main/java/org/apache/datasketches/tuple2/Union.java index 5945ad53c..8bb25b8b1 100644 --- a/src/main/java/org/apache/datasketches/tuple2/Union.java +++ b/src/main/java/org/apache/datasketches/tuple2/Union.java @@ -22,8 +22,8 @@ import static java.lang.Math.min; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.QuickSelect; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.QuickSelect; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Compute the union of two or more generic tuple sketches or generic tuple sketches combined with diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java index c3b761f14..9bede1dc4 100644 --- a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java @@ -23,7 +23,7 @@ import java.nio.ByteBuffer; import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * An extension of QuickSelectSketch<S>, which can be updated with many types of keys. diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java index e0343d302..de3dae88a 100644 --- a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java @@ -21,7 +21,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * For building a new generic tuple UpdatableSketch diff --git a/src/main/java/org/apache/datasketches/tuple2/Util.java b/src/main/java/org/apache/datasketches/tuple2/Util.java index f2f8227ea..24f326b01 100644 --- a/src/main/java/org/apache/datasketches/tuple2/Util.java +++ b/src/main/java/org/apache/datasketches/tuple2/Util.java @@ -21,14 +21,12 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.hash.MurmurHash3.hash; import static org.apache.datasketches.hash.XxHash.hashCharArr; import static org.apache.datasketches.hash.XxHash.hashString; import java.lang.reflect.Array; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Common utility functions for Tuples @@ -58,36 +56,6 @@ public static final byte[] stringToByteArray(final String value) { return value.getBytes(UTF_8); } - /** - * Computes and checks the 16-bit seed hash from the given long seed. - * The seed hash may not be zero in order to maintain compatibility with older serialized - * versions that did not have this concept. - * @param seed See Update Hash Seed - * @return the seed hash. - */ - public static short computeSeedHash(final long seed) { - final long[] seedArr = {seed}; - final short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL); - if (seedHash == 0) { - throw new SketchesArgumentException( - "The given seed: " + seed + " produced a seedHash of zero. " - + "You must choose a different seed."); - } - return seedHash; - } - - /** - * Checks the two given seed hashes. If they are not equal, this method throws an Exception. - * @param seedHashA given seed hash A - * @param seedHashB given seed hash B - */ - public static final void checkSeedHashes(final short seedHashA, final short seedHashB) { - if (seedHashA != seedHashB) { - throw new SketchesArgumentException("Incompatible Seed Hashes. " + seedHashA + ", " - + seedHashB); - } - } - /** * Gets the starting capacity of a new sketch given the Nominal Entries and the log Resize Factor. * @param nomEntries the given Nominal Entries diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java index c27051423..80e5fd2cd 100644 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java @@ -154,7 +154,7 @@ public byte[] toByteArray() { * @return DeserializedResult object, which contains a DoubleSummary object and number of bytes * read from the MemorySegment */ - public static DeserializeResult fromMemory(final MemorySegment seg) { + public static DeserializeResult fromMemorySegment(final MemorySegment seg) { return new DeserializeResult<>(new DoubleSummary(seg.get(JAVA_DOUBLE_UNALIGNED, VALUE_INDEX), Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); } diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java index ef9854d78..95b86002a 100644 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java @@ -32,7 +32,7 @@ public class DoubleSummaryDeserializer implements SummaryDeserializer heapifySummary(final MemorySegment seg) { - return DoubleSummary.fromMemory(seg); + return DoubleSummary.fromMemorySegment(seg); } } diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java index c7f25ecd9..047a8fc58 100644 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java @@ -154,7 +154,7 @@ public byte[] toByteArray() { * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes * read from the MemorySegment */ - public static DeserializeResult fromMemory(final MemorySegment seg) { + public static DeserializeResult fromMemorySegment(final MemorySegment seg) { return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX), Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); } diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java index b981e0db2..0b7387e60 100644 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java @@ -32,7 +32,7 @@ public class IntegerSummaryDeserializer implements SummaryDeserializer heapifySummary(final MemorySegment seg) { - return IntegerSummary.fromMemory(seg); + return IntegerSummary.fromMemorySegment(seg); } } diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java index 30a18c1e4..a0f5044c3 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java @@ -21,21 +21,21 @@ import static java.lang.Math.min; import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon.HashOperations.convertToHashTable; -import static org.apache.datasketches.thetacommon.HashOperations.count; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon2.HashOperations.convertToHashTable; +import static org.apache.datasketches.thetacommon2.HashOperations.count; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.thetacommon.SetOperationCornerCases; -import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; -import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.datasketches.tuple2.Util; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases.AnotbAction; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches. @@ -60,7 +60,7 @@ public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { numValues_ = numValues; - seedHash_ = Util.computeSeedHash(seed); + seedHash_ = computeSeedHash(seed); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java index b2b26a30f..def239c71 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java @@ -20,12 +20,13 @@ package org.apache.datasketches.tuple2.arrayofdoubles; import static java.lang.Math.min; +import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.tuple2.Util; /** * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles. @@ -49,7 +50,7 @@ public abstract class ArrayOfDoublesIntersection { * @param seed the hash function update seed. */ ArrayOfDoublesIntersection(final int numValues, final long seed) { - seedHash_ = Util.computeSeedHash(seed); + seedHash_ = computeSeedHash(seed); numValues_ = numValues; hashTables_ = null; empty_ = false; @@ -65,7 +66,7 @@ public abstract class ArrayOfDoublesIntersection { */ public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) { if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); + checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); if (tupleSketch.numValues_ != numValues_) { throw new SketchesArgumentException( "Input tupleSketch cannot have different numValues from the internal numValues."); diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index 7c29d7141..043fefe07 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -24,8 +24,8 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.QuickSelect; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.QuickSelect; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Top level class for hash table based implementations of tuple sketch of type diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java index eaf486a15..43355a634 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java @@ -21,7 +21,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Builds set operations object for tuple sketches of type ArrayOfDoubles. @@ -84,7 +84,7 @@ public ArrayOfDoublesSetOperationBuilder setSeed(final long seed) { /** * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder. - * The new instance is allocated on the heap if the memory is not provided. + * The new instance is allocated on the heap. * @return an instance of ArrayOfDoublesUnion */ public ArrayOfDoublesUnion buildUnion() { @@ -104,7 +104,7 @@ public ArrayOfDoublesUnion buildUnion(final MemorySegment dstSeg) { /** * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the * builder. - * The new instance is allocated on the heap if the memory is not provided. + * The new instance is allocated on the heap. * The number of nominal entries is not relevant to this, so it is ignored. * @return an instance of ArrayOfDoublesIntersection */ @@ -113,9 +113,8 @@ public ArrayOfDoublesIntersection buildIntersection() { } /** - * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the - * builder. - * The new instance is allocated on the heap if the MemorySegment is not provided. + * Creates an instance of ArrayOfDoublesIntersection in the given MemorySegment and based on the + * current configuration of the builder. * The number of nominal entries is not relevant to this, so it is ignored. * @param dstSeg destination MemorySegment to be used by the sketch * @return an instance of ArrayOfDoublesIntersection @@ -126,7 +125,6 @@ public ArrayOfDoublesIntersection buildIntersection(final MemorySegment dstSeg) /** * Creates an instance of ArrayOfDoublesAnotB based on the current configuration of the builder. - * The memory is not relevant to this, so it is ignored if set. * The number of nominal entries is not relevant to this, so it is ignored. * @return an instance of ArrayOfDoublesAnotB */ diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java index c4163fb1e..49bf36cbb 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java @@ -23,8 +23,8 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon.BinomialBoundsN; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.BinomialBoundsN; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.SerializerDeserializer; /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java index 36421e14d..a439d4899 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java @@ -21,7 +21,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles. diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java index a097ccf47..d00d21f86 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java @@ -21,15 +21,15 @@ import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; import static java.lang.Math.min; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.SerializerDeserializer; -import org.apache.datasketches.tuple2.Util; /** * The base class for unions of tuple sketches of type ArrayOfDoubles. @@ -108,7 +108,7 @@ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long se */ public void union(final ArrayOfDoublesSketch tupleSketch) { if (tupleSketch == null) { return; } - Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); + checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); if (gadget_.getNumValues() != tupleSketch.getNumValues()) { throw new SketchesArgumentException("Incompatible sketches: number of values mismatch " + gadget_.getNumValues() + " and " + tupleSketch.getNumValues()); diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java index c61e8944d..1ebece067 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java @@ -19,12 +19,14 @@ package org.apache.datasketches.tuple2.arrayofdoubles; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; + import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.Util; /** @@ -216,7 +218,7 @@ long getSeed() { @Override short getSeedHash() { - return Util.computeSeedHash(seed_); + return computeSeedHash(seed_); } /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java index a6fa5e118..9f2b98a42 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java @@ -23,7 +23,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * For building a new ArrayOfDoublesUpdatableSketch diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 727c9dccf..c6bce07cd 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -24,6 +24,8 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; @@ -31,13 +33,12 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.tuple2.SerializerDeserializer; -import org.apache.datasketches.tuple2.Util; /** * Direct Compact Sketch of type ArrayOfDoubles. * - *

      This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

      This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

      */ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch { @@ -65,7 +66,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final long thetaLong, final MemorySegment dstSeg) { super(sketch.getNumValues()); - checkIfEnoughMemory(dstSeg, sketch.getRetainedEntries(), sketch.getNumValues()); + checkMemorySegmentSize(dstSeg, sketch.getRetainedEntries(), sketch.getNumValues()); seg_ = dstSeg; dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); @@ -81,7 +82,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed())); + dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(sketch.getSeed())); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); if (count > 0) { @@ -108,7 +109,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong, final boolean isEmpty, final int numValues, final short seedHash, final MemorySegment dstSeg) { super(numValues); - checkIfEnoughMemory(dstSeg, values.length, numValues); + checkMemorySegmentSize(dstSeg, values.length, numValues); seg_ = dstSeg; dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); @@ -182,7 +183,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } @@ -237,7 +238,7 @@ public double[][] getValues() { } @Override - //converts compact Memory array of long[] to compact long[] + //converts compact MemorySegment array of long[] to compact long[] long[] getKeys() { final int count = getRetainedEntries(); final long[] keys = new long[count]; @@ -275,12 +276,12 @@ short getSeedHash() { @Override MemorySegment getMemorySegment() { return seg_; } - private static void checkIfEnoughMemory(final MemorySegment seg, final int numEntries, + private static void checkMemorySegmentSize(final MemorySegment seg, final int numEntries, final int numValues) { final int sizeNeeded = ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); if (sizeNeeded > seg.byteSize()) { - throw new SketchesArgumentException("Not enough memory: need " + sizeNeeded + throw new SketchesArgumentException("Not enough space: need " + sizeNeeded + " bytes, got " + seg.byteSize() + " bytes"); } } diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java index c5771046e..52e8c24e3 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -24,7 +24,7 @@ /** * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. * - *

      This implementation uses data in a given Memory that is owned and managed by the caller. + *

      This implementation uses data in a given MemorySegment that is owned and managed by the caller. * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

      */ diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 249723323..d2fbbdfd8 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -25,6 +25,8 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import static org.apache.datasketches.common.Util.clear; import static org.apache.datasketches.common.Util.clearBits; import static org.apache.datasketches.common.Util.setBits; @@ -36,7 +38,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon2.HashOperations; import org.apache.datasketches.tuple2.SerializerDeserializer; import org.apache.datasketches.tuple2.Util; @@ -78,7 +80,7 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke final int numValues, final long seed, final MemorySegment dstSeg) { - this(checkMemory(nomEntries, lgResizeFactor, numValues, dstSeg), + this(checkMemorySegment(nomEntries, lgResizeFactor, numValues, dstSeg), //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J nomEntries, @@ -112,7 +114,7 @@ private DirectArrayOfDoublesQuickSelectSketch( | (1 << Flags.IS_EMPTY.ordinal()) )); seg_.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues); - seg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(seed)); + seg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(seed)); thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); seg_.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries)); @@ -127,13 +129,13 @@ private DirectArrayOfDoublesQuickSelectSketch( setRebuildThreshold(); } - private static final boolean checkMemory( + private static final boolean checkMemorySegment( final int nomEntries, final int lgResizeFactor, final int numValues, final MemorySegment dstSeg) { final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - checkIfEnoughMemory(dstSeg, startingCapacity, numValues); + checkMemorySegmentSize(dstSeg, startingCapacity, numValues); return true; } @@ -161,7 +163,7 @@ private DirectArrayOfDoublesQuickSelectSketch( SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); keysOffset_ = ENTRIES_START; valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); // to do: make parent take care of its own parts @@ -186,7 +188,7 @@ private static final boolean checkSerVer_Endianness(final MemorySegment seg) { } @Override - //converts Memory hashTable of double[] to compacted double[][] + //converts MemorySegment hashTable of double[] to compacted double[][] public double[][] getValues() { final int count = getRetainedEntries(); final double[][] values = new double[count][]; @@ -377,11 +379,11 @@ protected boolean isInSamplingMode() { return (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) != 0; } - // rebuild in the same memory + // rebuild in the same MemorySegment @Override protected void rebuild(final int newCapacity) { final int numValues = getNumValues(); - checkIfEnoughMemory(seg_, newCapacity, numValues); + checkMemorySegmentSize(seg_, newCapacity, numValues); final int currCapacity = getCurrentCapacity(); final long[] keys = new long[currCapacity]; final double[] values = new double[currCapacity * numValues]; @@ -403,17 +405,17 @@ protected void rebuild(final int newCapacity) { @Override protected int insertKey(final long key) { - return HashOperations.hashInsertOnlyMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START); + return HashOperations.hashInsertOnlyMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); } @Override protected int findOrInsertKey(final long key) { - return HashOperations.hashSearchOrInsertMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START); + return HashOperations.hashSearchOrInsertMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); } @Override protected double[] find(final long key) { - final int index = HashOperations.hashSearchMemory(seg_, lgCurrentCapacity_, key, ENTRIES_START); + final int index = HashOperations.hashSearchMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); if (index == -1) { return null; } final double[] array = new double[numValues_]; MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_ @@ -421,11 +423,11 @@ protected double[] find(final long key) { return array; } - private static void checkIfEnoughMemory(final MemorySegment seg, final int numEntries, final int numValues) { + private static void checkMemorySegmentSize(final MemorySegment seg, final int numEntries, final int numValues) { final int sizeNeeded = ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); if (sizeNeeded > seg.byteSize()) { - throw new SketchesArgumentException("Not enough memory: need " + throw new SketchesArgumentException("Not enough space: need " + sizeNeeded + " bytes, got " + seg.byteSize() + " bytes"); } } diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java index 63b421f4d..953853b91 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -27,8 +27,8 @@ /** * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). * - *

      This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

      This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

      */ final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java index e546d4756..954a1d916 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -31,8 +31,8 @@ /** * Direct Union operation for tuple sketches of type ArrayOfDoubles. * - *

      This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

      This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

      */ class DirectArrayOfDoublesUnion extends ArrayOfDoublesUnion { diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java index 51568fd87..13d3add4d 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java @@ -26,7 +26,7 @@ final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion { /** - * Wraps the given Memory. + * Wraps the given MemorySegment. * @param gadget the ArrayOfDoublesQuickSelectSketch * @param seg the destination MemorySegment */ diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java index 4baa685d6..0da06c5fc 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java @@ -23,10 +23,10 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; class HashTables { private long[] hashTable = null; diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java index dc84da82a..0e118d5d5 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java @@ -24,6 +24,8 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; @@ -31,9 +33,8 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.SerializerDeserializer; -import org.apache.datasketches.tuple2.Util; /** * The on-heap implementation of tuple Compact Sketch of type ArrayOfDoubles. @@ -62,7 +63,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch super(sketch.getNumValues()); isEmpty_ = sketch.isEmpty(); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - seedHash_ = Util.computeSeedHash(sketch.getSeed()); + seedHash_ = computeSeedHash(sketch.getSeed()); final int count = sketch.getRetainedEntries(); if (count > 0) { keys_ = new long[count]; @@ -133,7 +134,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); + checkSeedHashes(seedHash_, computeSeedHash(seed)); isEmpty_ = (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); final boolean hasEntries = diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java index 9e33f4e87..081750b5a 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java @@ -27,6 +27,8 @@ import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.common.Util.exactLog2OfLong; +import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; +import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; @@ -35,7 +37,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.HashOperations; +import org.apache.datasketches.thetacommon2.HashOperations; import org.apache.datasketches.tuple2.SerializerDeserializer; import org.apache.datasketches.tuple2.Util; @@ -103,7 +105,7 @@ final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelec if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0; lgNomEntries_ = seg.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); @@ -244,7 +246,7 @@ void serializeInto(final MemorySegment seg) { | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(seed_)); + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(seed_)); seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); seg.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_); seg.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length)); diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java index 636c90d12..3c8129bc0 100644 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java @@ -63,9 +63,9 @@ public ArrayOfStringsSketch(final int lgK, final ResizeFactor rf, final float p) } /** - * Constructs this sketch from a Memory image, which must be from an ArrayOfStringsSketch, and + * Constructs this sketch from a MemorySegment image, which must be from an ArrayOfStringsSketch, and * usually with data. - * @param seg the given Memory + * @param seg the given MemorySegment * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java index 66eed2a8f..1eed22fe1 100644 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java +++ b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java @@ -48,7 +48,7 @@ public final class ArrayOfStringsSummary implements UpdatableSummary { checkNumNodes(stringArr.length); } - //used by fromMemory and in test + //used by fromMemorySegment and in test /** * This reads a MemorySegment that has a layout similar to the C struct: * {@snippet : @@ -154,7 +154,7 @@ static void checkNumNodes(final int numNodes) { //also used in test static void checkInBytes(final MemorySegment seg, final int totBytes) { if (seg.byteSize() < totBytes) { - throw new SketchesArgumentException("Incoming Memory has insufficient capacity."); + throw new SketchesArgumentException("Incoming MemorySegment has insufficient capacity."); } } diff --git a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java index 1dba0bb4b..ae1730684 100644 --- a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java +++ b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java @@ -27,7 +27,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java index 74aec9bb8..1addaa3b4 100644 --- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java +++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java @@ -27,7 +27,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; /** * This class converts current compact sketches into prior SerVer 1 and SerVer 2 format for testing. diff --git a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java index 91b1a40a2..d528d6f6e 100644 --- a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java @@ -229,7 +229,7 @@ public void checkCompactCachePart() { private static final boolean COMPACT = true; private static final boolean EMPTY = true; private static final boolean DIRECT = true; - private static final boolean MEMORY = true; + private static final boolean SEGMENT = true; private static final boolean ORDERED = true; private static final boolean ESTIMATION = true; @@ -237,12 +237,12 @@ public void checkCompactCachePart() { /** * Empty, segment-based Compact sketches are always ordered */ - public void checkEmptyMemoryCompactSketch() { + public void checkEmptyMemorySegmentCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); MemorySegment wseg1 = MemorySegment.ofArray(new byte[16]); CompactSketch csk1 = sk.compact(false, wseg1); //the first parameter is ignored when empty - State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); MemorySegment wseg2 = MemorySegment.ofArray(new byte[16]); @@ -260,7 +260,7 @@ public void checkEmptyMemoryCompactSketch() { assertFalse(csk1 == csk3); CompactSketch csk4 = csk1.compact(false, null); - State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state4.check(csk4); assertNotEquals(csk1, csk4); //different object because on heap @@ -277,13 +277,13 @@ public void checkEmptyMemoryCompactSketch() { /** * Single-Item, segment-based Compact sketches are always ordered: */ - public void checkSingleItemMemoryCompactSketch() { + public void checkSingleItemMemorySegmentCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); sk.update(1); MemorySegment wseg1 = MemorySegment.ofArray(new byte[16]); CompactSketch csk1 = sk.compact(false, wseg1); //the first parameter is ignored when single item - State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); MemorySegment wseg2 = MemorySegment.ofArray(new byte[16]); @@ -308,7 +308,7 @@ public void checkSingleItemMemoryCompactSketch() { } @Test - public void checkMultipleItemMemoryCompactSketch() { + public void checkMultipleItemMemorySegmentCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); //This sequence is naturally out-of-order by the hash values. sk.update(1); @@ -317,12 +317,12 @@ public void checkMultipleItemMemoryCompactSketch() { MemorySegment wseg1 = MemorySegment.ofArray(new byte[50]); CompactSketch csk1 = sk.compact(true, wseg1); - State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); MemorySegment wseg2 = MemorySegment.ofArray(new byte[50]); CompactSketch csk2 = sk.compact(false, wseg2); - State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION); + State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, SEGMENT, !ORDERED, !ESTIMATION); state2.check(csk2); assertNotEquals(csk1, csk2); //different object because segment is valid @@ -351,7 +351,7 @@ public void checkEmptyHeapCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty - State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty @@ -382,7 +382,7 @@ public void checkSingleItemHeapCompactSketch() { sk.update(1); CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item - State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item @@ -413,11 +413,11 @@ public void checkMultipleItemHeapCompactSketch() { sk.update(3); CompactSketch csk1 = sk.compact(true, null); //creates a new object - State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered - State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION); + State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !SEGMENT, !ORDERED, !ESTIMATION); state2.check(csk2); assertNotEquals(csk1, csk2); //order is different and different objects diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java index 60d409cde..2ac9885cf 100644 --- a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java @@ -34,8 +34,8 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; import org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.SharedLocal; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -54,7 +54,7 @@ public void checkDirectCompactConversion() { } @Test - public void checkHeapifyMemoryEstimating() { + public void checkHeapifyMemorySegmentEstimating() { int lgK = 9; int k = 1 << lgK; int u = 2*k; @@ -74,7 +74,7 @@ public void checkHeapifyMemoryEstimating() { UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wseg); assertEquals(sharedHeap.getClass().getSimpleName(), "HeapQuickSelectSketch"); - checkMemoryDirectProxyMethods(local, shared); + checkMemorySegmentDirectProxyMethods(local, shared); checkOtherProxyMethods(local, shared); checkOtherProxyMethods(local, sharedHeap); @@ -161,7 +161,7 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkWrapMemoryEst() { + public void checkWrapMemorySegmentEst() { int lgK = 9; int k = 1 << lgK; int u = 2*k; @@ -422,7 +422,7 @@ public void checkResetAndStartingSubMultiple() { } @Test - public void checkExactModeMemoryArr() { + public void checkExactModeMemorySegmentArr() { int lgK = 12; int k = 1 << lgK; int u = k; @@ -440,7 +440,7 @@ public void checkExactModeMemoryArr() { } @Test - public void checkEstModeMemoryArr() { + public void checkEstModeMemorySegmentArr() { int lgK = 12; int k = 1 << lgK; boolean useSeg = true; @@ -459,7 +459,7 @@ public void checkEstModeMemoryArr() { } @Test - public void checkEstModeNativeMemory() { + public void checkEstModeNativeMemorySegment() { int lgK = 12; int k = 1 << lgK; boolean useSeg = true; @@ -477,7 +477,7 @@ public void checkEstModeNativeMemory() { } @Test - public void checkConstructReconstructFromMemory() { + public void checkConstructReconstructFromMemorySegment() { int lgK = 12; int k = 1 << lgK; boolean useSeg = true; @@ -514,7 +514,7 @@ public void checkConstructReconstructFromMemory() { } @Test - public void checkNullMemory() { + public void checkNullMemorySegment() { UpdateSketchBuilder bldr = new UpdateSketchBuilder(); final UpdateSketch sk = bldr.build(); for (int i = 0; i < 1000; i++) { sk.update(i); } @@ -697,7 +697,7 @@ static void println(String s) { //System.out.println(s); //disable here } - private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) { + private static void checkMemorySegmentDirectProxyMethods(Sketch local, Sketch shared) { assertEquals( local.hasMemorySegment(), shared.hasMemorySegment()); diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java index 4685639ec..76734dfce 100644 --- a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java @@ -34,7 +34,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -217,7 +217,7 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkHeapifyMemoryEstimating() { + public void checkHeapifyMemorySegmentEstimating() { int lgK = 9; int k = 1 << lgK; int u = 2*k; //thus estimating diff --git a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java index 20eb9a382..0418d2ddd 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java @@ -36,7 +36,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java index 1e61249f1..1eced4555 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java @@ -50,8 +50,8 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.HashOperations; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -286,7 +286,7 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkWrapMemoryEst() { + public void checkWrapMemorySegmentEst() { int k = 512; int u = 2*k; //thus estimating try (Arena arena = Arena.ofConfined()) { @@ -632,7 +632,7 @@ public void checkResetAndStartingSubMultiple() { } @Test - public void checkExactModeMemoryArr() { + public void checkExactModeMemorySegmentArr() { int k = 4096; int u = 4096; try (Arena arena = Arena.ofConfined()) { @@ -652,7 +652,7 @@ public void checkExactModeMemoryArr() { } @Test - public void checkEstModeMemoryArr() { + public void checkEstModeMemorySegmentArr() { int k = 4096; int u = 2*k; @@ -673,7 +673,7 @@ public void checkEstModeMemoryArr() { } @Test - public void checkEstModeNativeMemory() { + public void checkEstModeNativeMemorySegment() { int k = 4096; int u = 2*k; int segCapacity = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3); @@ -696,7 +696,7 @@ public void checkEstModeNativeMemory() { } @Test - public void checkConstructReconstructFromMemory() { + public void checkConstructReconstructFromMemorySegment() { int k = 4096; int u = 2*k; try (Arena arena = Arena.ofConfined()) { diff --git a/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java index 2d911497a..7194f70a7 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java @@ -36,7 +36,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java index 546d0524f..09d7cc88d 100644 --- a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java @@ -29,7 +29,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java index a0d6cadae..fbc80b091 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java @@ -44,7 +44,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -185,7 +185,7 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkHeapifyMemoryEstimating() { + public void checkHeapifyMemorySegmentEstimating() { int k = 512; int u = 2*k; //thus estimating long seed = ThetaUtil.DEFAULT_UPDATE_SEED; diff --git a/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java index 7ba1fecf2..4dcbe934c 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java @@ -33,7 +33,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java index 0584e34b3..bbce7862e 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java @@ -45,7 +45,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -179,7 +179,7 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkHeapifyMemoryEstimating() { + public void checkHeapifyMemorySegmentEstimating() { int k = 512; int u = 2*k; //thus estimating long seed = ThetaUtil.DEFAULT_UPDATE_SEED; diff --git a/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java b/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java index dceea2a2b..db8388a24 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java @@ -33,7 +33,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java b/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java index 2e9519562..b946fb775 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java @@ -26,19 +26,18 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.datasketches.tuple.Util; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; @SuppressWarnings("resource") public class HeapifyWrapSerVer1and2Test { - private static final short defaultSeedHash = Util.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + private static final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); @Test public void checkHeapifyCompactSketchAssumedDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -68,7 +67,7 @@ public void checkHeapifyCompactSketchAssumedDefaultSeed() { public void checkHeapifyCompactSketchAssumedDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -98,7 +97,7 @@ public void checkHeapifyCompactSketchAssumedDifferentSeed() { public void checkHeapifyCompactSketchGivenDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -128,7 +127,7 @@ public void checkHeapifyCompactSketchGivenDefaultSeed() { public void checkHeapifyCompactSketchGivenDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -158,7 +157,7 @@ public void checkHeapifyCompactSketchGivenDifferentSeed() { public void checkHeapifySketchAssumedDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -188,7 +187,7 @@ public void checkHeapifySketchAssumedDefaultSeed() { public void checkHeapifySketchAssumedDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -218,7 +217,7 @@ public void checkHeapifySketchAssumedDifferentSeed() { public void checkHeapifySketchGivenDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -248,7 +247,7 @@ public void checkHeapifySketchGivenDefaultSeed() { public void checkHeapifySketchGivenDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } @@ -278,7 +277,7 @@ public void checkHeapifySketchGivenDifferentSeed() { public void checkWrapCompactSketchAssumedDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -317,7 +316,7 @@ public void checkWrapCompactSketchAssumedDefaultSeed() { public void checkWrapCompactSketchAssumedDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -356,7 +355,7 @@ public void checkWrapCompactSketchAssumedDifferentSeed() { public void checkWrapCompactSketchGivenDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -395,7 +394,7 @@ public void checkWrapCompactSketchGivenDefaultSeed() { public void checkWrapCompactSketchGivenDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -434,7 +433,7 @@ public void checkWrapCompactSketchGivenDifferentSeed() { public void checkWrapSketchAssumedDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -473,7 +472,7 @@ public void checkWrapSketchAssumedDefaultSeed() { public void checkWrapSketchAssumedDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -512,7 +511,7 @@ public void checkWrapSketchAssumedDifferentSeed() { public void checkWrapSketchGivenDefaultSeed() { final int k = 64; final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; @@ -551,7 +550,7 @@ public void checkWrapSketchGivenDefaultSeed() { public void checkWrapSketchGivenDifferentSeed() { final int k = 64; final long seed = 128L; - final short seedHash = Util.computeSeedHash(seed); + final short seedHash = ThetaUtil.computeSeedHash(seed); UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setSeed(seed).build(); for (int i = 0; i < k; i++) { usk.update(i); } CompactSketch cskResult; diff --git a/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java index e5db72838..f62cb2dc7 100644 --- a/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java @@ -58,7 +58,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemorySegmentTest.java similarity index 99% rename from src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java rename to src/test/java/org/apache/datasketches/theta2/ReadOnlyMemorySegmentTest.java index ab0ed1495..969643618 100644 --- a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemoryTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemorySegmentTest.java @@ -31,7 +31,7 @@ import org.testng.Assert; import org.testng.annotations.Test; -public class ReadOnlyMemoryTest { +public class ReadOnlyMemorySegmentTest { @Test public void wrapAndTryUpdatingUpdateSketch() { diff --git a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java index f7011c538..0b1657679 100644 --- a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java @@ -21,7 +21,7 @@ import static org.apache.datasketches.common.ResizeFactor.X4; import static org.apache.datasketches.theta2.Sketch.getMaxUpdateSketchBytes; -import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import static org.apache.datasketches.thetacommon2.HashOperations.minLgHashTableSize; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -34,7 +34,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -232,7 +232,7 @@ public void checkComputeLgArrLongs() { */ @Test public void checkDirectUnionExample() { - //The first task is to compute how much direct memory we need and set the heap large enough. + //The first task is to compute how much off-heap space we need and set the heap large enough. //For the first trial, we will set the Union large enough for an exact result for THIS example. final int sketchNomEntries = 1 << 14; //16K int unionNomEntries = 1 << 15; //32K @@ -245,11 +245,11 @@ public void checkDirectUnionExample() { final byte[] backingArr = new byte[heapLayout[5]]; final ByteBuffer heapBuf = ByteBuffer.wrap(backingArr).order(ByteOrder.nativeOrder()); - // Attaches a MemorySegment object to the underlying memory of heapBuf. + // Attaches a MemorySegment object to the underlying heap space of heapBuf. // heapSeg will have a Read/Write view of the complete backing segment of heapBuf (direct or not). // Any R/W action from heapSeg will be visible via heapBuf and visa versa. // - // However, if you had created this WM object directly in raw, off-heap "native" memory + // However, if you had created this WM object off-heap // you would have the responsibility to close it when you are done. // But, since it was allocated via BB, it closes it for you. final MemorySegment heapSeg = MemorySegment.ofBuffer(heapBuf); diff --git a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java index 6848c224e..12d3591aa 100644 --- a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java @@ -21,7 +21,7 @@ import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EMPTY; import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_HEAP; -import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_MEMORY_UNORDERED; +import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_SEGMENT_UNORDERED; import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EXACT; import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.NULL; import static org.testng.Assert.assertEquals; @@ -87,20 +87,20 @@ public void compareCornerCases() { int k = 64; for (State stateA : State.values()) { for (State stateB : State.values()) { - if ((stateA == EST_MEMORY_UNORDERED) || (stateB == EST_MEMORY_UNORDERED)) { continue; } + if ((stateA == EST_SEGMENT_UNORDERED) || (stateB == EST_SEGMENT_UNORDERED)) { continue; } if ((stateA == NULL) || (stateB == NULL)) { continue; } cornerCaseChecks(stateA, stateB, k); - cornerCaseChecksMemory(stateA, stateB, k); + cornerCaseChecksMemorySegment(stateA, stateB, k); } } } // @Test // public void checkExactNullSpecificCase() { -// cornerCaseChecksMemory(State.EXACT, State.NULL, 64); +// cornerCaseChecksMemorySegment(State.EXACT, State.NULL, 64); // } - private static void cornerCaseChecksMemory(State stateA, State stateB, int k) { + private static void cornerCaseChecksMemorySegment(State stateA, State stateB, int k) { println("StateA: " + stateA + ", StateB: " + stateB); CompactSketch tcskA = generate(stateA, k); CompactSketch tcskB = generate(stateB, k); @@ -245,7 +245,7 @@ public void checkUnionNotOrdered() { CompactSketch skNull = generate(NULL, k); CompactSketch skEmpty = generate(EMPTY, k); CompactSketch skHeap = generate(EST_HEAP, k); - CompactSketch skHeapUO = generate(EST_MEMORY_UNORDERED, k); + CompactSketch skHeapUO = generate(EST_SEGMENT_UNORDERED, k); Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); union.union(skNull, skHeapUO); union.union(skEmpty, skHeapUO); @@ -423,7 +423,7 @@ public void checkGenerator() { assertEquals(csk.hasMemorySegment(), false); assertEquals(csk.isOrdered(), true); - csk = generate(State.EST_MEMORY_UNORDERED, k); + csk = generate(State.EST_SEGMENT_UNORDERED, k); assertEquals(csk.isEmpty(), false); assertEquals(csk.isEstimationMode(), true); assertEquals(csk.getRetainedEntries(true) > k, true); @@ -433,7 +433,7 @@ public void checkGenerator() { assertEquals(csk.isOrdered(), false); } - enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_MEMORY_UNORDERED} + enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_SEGMENT_UNORDERED} private static CompactSketch generate(State state, int k) { UpdateSketch sk = null; @@ -483,7 +483,7 @@ private static CompactSketch generate(State state, int k) { csk = sk.compact(true, null); //compact as {Th < 1.0, 0, T} break; } - case EST_MEMORY_UNORDERED : { + case EST_SEGMENT_UNORDERED : { sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); for (int i = 0; i < (4 * k); i++) { sk.update(i); diff --git a/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java b/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java index c62cd8e16..bce80098d 100644 --- a/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java @@ -30,7 +30,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/theta2/SketchTest.java b/src/test/java/org/apache/datasketches/theta2/SketchTest.java index 686ad7ff9..fca348c45 100644 --- a/src/test/java/org/apache/datasketches/theta2/SketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SketchTest.java @@ -45,8 +45,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -//import org.apache.datasketches.theta2.Skectches; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -332,7 +331,7 @@ public void checkIsSameResource() { DirectCompactSketch dcos = (DirectCompactSketch) sketch.compact(true, cseg); assertTrue(isSameResource(dcos.getMemorySegment(), cseg)); assertTrue(dcos.isOrdered()); - //never create 2 sketches with the same memory, so don't do as I do :) + //never create 2 sketches with the same MemorySegment, so don't do as I do :) DirectCompactSketch dcs = (DirectCompactSketch) sketch.compact(false, cseg); assertTrue(isSameResource(dcs.getMemorySegment(), cseg)); assertFalse(dcs.isOrdered()); @@ -353,7 +352,7 @@ public void checkCountLessThanTheta() { assertEquals(count, k); } - private static MemorySegment createCompactSketchMemory(int k, int u) { + private static MemorySegment createCompactSketchMemorySegment(int k, int u) { UpdateSketch usk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); for (int i = 0; i < u; i++) { usk.update(i); } int bytes = Sketch.getMaxCompactSketchBytes(usk.getRetainedEntries(true)); @@ -364,7 +363,7 @@ private static MemorySegment createCompactSketchMemory(int k, int u) { @Test public void checkCompactFlagsOnWrap() { - MemorySegment wseg = createCompactSketchMemory(16, 32); + MemorySegment wseg = createCompactSketchMemorySegment(16, 32); Sketch sk = Sketch.wrap(wseg); assertTrue(sk instanceof CompactSketch); int flags = PreambleUtil.extractFlags(wseg); @@ -392,7 +391,7 @@ public void checkCompactFlagsOnWrap() { @Test public void checkCompactSizeAndFlagsOnHeapify() { - MemorySegment wseg = createCompactSketchMemory(16, 32); + MemorySegment wseg = createCompactSketchMemorySegment(16, 32); Sketch sk = Sketch.heapify(wseg); assertTrue(sk instanceof CompactSketch); int flags = PreambleUtil.extractFlags(wseg); diff --git a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java index 277aae961..074a19459 100644 --- a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java @@ -40,7 +40,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -48,7 +48,7 @@ */ public class SketchesTest { - private static MemorySegment getCompactSketchMemory(final int k, final int from, final int to) { + private static MemorySegment getCompactSketchMemorySegment(final int k, final int from, final int to) { final UpdateSketch sk1 = updateSketchBuilder().setNominalEntries(k).build(); for (int i=from; i= min_p) { + lb = BinomialBoundsN.getLowerBound(numSamplesI, p, ci, false); + ub = BinomialBoundsN.getUpperBound(numSamplesI, p, ci, false); + + // if (numSamplesI == 300 && p > 0.365 && p < 0.367) { ub += 0.01; } // artificial discrepancy + + // the logarithm helps discrepancies to not be swamped out of the total + sum1 += Math.log(lb + 1.0); + sum2 += Math.log(ub + 1.0); + count += 2; + + if (p < 1.0) { + lb = BinomialBoundsN.getLowerBound(numSamplesI, 1.0 - p, ci, false); + ub = BinomialBoundsN.getUpperBound(numSamplesI, 1.0 - p, ci, false); + sum3 += Math.log(lb + 1.0); + sum4 += Math.log(ub + 1.0); + count += 2; + } + + p *= 0.99; + } + numSamplesI = Math.max(numSamplesI + 1, (1001 * numSamplesI) / 1000); + } + + println(String.format("{%.15e, %.15e, %.15e, %.15e, %d}", sum1, sum2, sum3, sum4, count)); + final double[] arrOut = {sum1, sum2, sum3, sum4, count}; + return arrOut; + } + + private static final double TOL = 1E-15; + + @Test + public static void checkBounds() { + int i = 0; + for (int ci = 1; ci <= 3; ci++, i++) { + final double[] arr = runTestAux(20, ci, 1e-3); + for (int j = 0; j < 5; j++) { + assertTrue(((arr[j] / std[i][j]) - 1.0) < TOL); + } + } + for (int ci = 1; ci <= 3; ci++, i++) { + final double[] arr = runTestAux(200, ci, 1e-5); + for (int j = 0; j < 5; j++) { + assertTrue(((arr[j] / std[i][j]) - 1.0) < TOL); + } + } + //comment last one out for a shorter test + // for (int ci = 1; ci <= 3; ci++, i++) { + // final double[] arr = runTestAux(2000, ci, 1e-7); + // for (int j = 0; j < 5; j++) { + // assertTrue(((arr[j] / std[i][j]) - 1.0) < TOL); + // } + //} + } + + // With all 3 enabled the test should produce in groups of 3 */ + private static final double[][] std = { + {7.083330682531043e+04, 8.530373642825481e+04, 3.273647725073409e+04, 3.734024243699785e+04, 57750}, + {6.539415269641498e+04, 8.945522372568645e+04, 3.222302546497840e+04, 3.904738469737429e+04, 57750}, + {6.006043493107306e+04, 9.318105731423477e+04, 3.186269956585285e+04, 4.096466221922520e+04, 57750}, + + {2.275584770163813e+06, 2.347586549014998e+06, 1.020399409477305e+06, 1.036729927598294e+06, 920982}, + {2.243569126699713e+06, 2.374663344107342e+06, 1.017017233582122e+06, 1.042597845553438e+06, 920982}, + {2.210056231903739e+06, 2.400441267999687e+06, 1.014081235946986e+06, 1.049480769755676e+06, 920982}, + + {4.688240115809608e+07, 4.718067204619278e+07, 2.148362024482338e+07, 2.153118905212302e+07, 12834414}, + {4.674205938540214e+07, 4.731333757486791e+07, 2.146902141966406e+07, 2.154916650733873e+07, 12834414}, + {4.659896614422579e+07, 4.744404182094614e+07, 2.145525391547799e+07, 2.156815612325058e+07, 12834414} + }; + + @Test + public static void checkCheckArgs() { + try { + checkArgs(-1L, 1.0, 1); + checkArgs(10L, 0.0, 1); + checkArgs(10L, 1.01, 1); + checkArgs(10L, 1.0, 3); + checkArgs(10L, 1.0, 0); + checkArgs(10L, 1.0, 4); + fail("Expected SketchesArgumentException"); + } catch (final SketchesArgumentException e) { + //pass + } + } + + @Test + public static void checkComputeApproxBino_LB_UB() { + final long n = 100; + final double theta = (2.0 - 1e-5) / 2.0; + double result = getLowerBound(n, theta, 1, false); + assertEquals(result, n, 0.0); + result = getUpperBound(n, theta, 1, false); + assertEquals(result, n + 1, 0.0); + result = getLowerBound(n, theta, 1, true); + assertEquals(result, 0.0, 0.0); + result = getUpperBound(n, theta, 1, true); + assertEquals(result, 0.0, 0.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public static void checkThetaLimits1() { + BinomialBoundsN.getUpperBound(100, 1.1, 1, false); + } + + @Test + public static void boundsExample() { + println("BinomialBoundsN Example:"); + final int k = 500; + final double theta = 0.001; + final int stdDev = 2; + final double ub = BinomialBoundsN.getUpperBound(k, theta, stdDev, false); + final double est = k / theta; + final double lb = BinomialBoundsN.getLowerBound(k, theta, stdDev, false); + println("K=" + k + ", Theta=" + theta + ", SD=" + stdDev); + println("UB: " + ub); + println("Est: " + est); + println("LB: " + lb); + println(""); + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java b/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java similarity index 76% rename from src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java rename to src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java index 88dd009c0..f7ee77b32 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2Test.java +++ b/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.thetacommon; +package org.apache.datasketches.thetacommon2; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -29,7 +29,7 @@ import org.apache.datasketches.theta2.UpdateSketch; import org.testng.annotations.Test; -public class BoundsOnRatiosInThetaSketchedSets2Test { +public class BoundsOnRatiosInThetaSketchedSetsTest { @Test public void checkNormalReturns() { @@ -44,9 +44,9 @@ public void checkNormalReturns() { inter.intersect(skC); final CompactSketch skB = inter.getResult(); - double est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB); - double lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB); - double ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB); + double est = BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skB); + double lb = BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(skA, skB); + double ub = BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(skA, skB); assertTrue(ub > est); assertTrue(est > lb); assertEquals(est, 0.5, .03); @@ -54,16 +54,16 @@ public void checkNormalReturns() { println("est: " + est); println("lb : " + lb); skA.reset(); //skA is now empty - est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skB); - lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skB); - ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skB); + est = BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skB); + lb = BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(skA, skB); + ub = BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(skA, skB); println("ub : " + ub); println("est: " + est); println("lb : " + lb); skC.reset(); //Now both are empty - est = BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC); - lb = BoundsOnRatiosInThetaSketchedSets2.getLowerBoundForBoverA(skA, skC); - ub = BoundsOnRatiosInThetaSketchedSets2.getUpperBoundForBoverA(skA, skC); + est = BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skC); + lb = BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(skA, skC); + ub = BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(skA, skC); println("ub : " + ub); println("est: " + est); println("lb : " + lb); @@ -77,7 +77,7 @@ public void checkAbnormalReturns() { final int uC = 10000; for (int i = 0; i < uA; i++) { skA.update(i); } for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } - BoundsOnRatiosInThetaSketchedSets2.getEstimateOfBoverA(skA, skC); + BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skC); } @Test diff --git a/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java b/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java new file mode 100644 index 000000000..e4bd42478 --- /dev/null +++ b/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.theta2.UpdateSketch; +import org.apache.datasketches.theta2.UpdateSketchBuilder; +import org.apache.datasketches.tuple2.Sketch; +import org.apache.datasketches.tuple2.UpdatableSketch; +import org.apache.datasketches.tuple2.UpdatableSketchBuilder; +import org.apache.datasketches.tuple2.adouble.DoubleSummary; +import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations; +import org.apache.datasketches.tuple2.Intersection; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +/** + * @author Lee Rhodes + * @author David Cromberge + */ +public class BoundsOnRatiosInTupleSketchedSetsTest { + + private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum; + private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations(); + private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode); + private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); + private final UpdatableSketchBuilder tupleBldr = new UpdatableSketchBuilder<>(factory); + private final Double constSummary = 1.0; + + @Test + public void checkNormalReturns1() { // tuple, tuple + final UpdatableSketch skA = tupleBldr.build(); //4K + final UpdatableSketch skC = tupleBldr.build(); + final int uA = 10000; + final int uC = 100000; + for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } + for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2), constSummary); } + final Intersection inter = new Intersection<>(dsso); + inter.intersect(skA); + inter.intersect(skC); + final Sketch skB = inter.getResult(); + + double est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); + double lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); + double ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); + assertTrue(ub > est); + assertTrue(est > lb); + assertEquals(est, 0.5, .03); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + skA.reset(); //skA is now empty + est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); + lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); + ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + skC.reset(); //Now both are empty + est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); + lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skC); + ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skC); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + } + + @Test + public void checkNormalReturns2() { // tuple, theta + final UpdatableSketch skA = tupleBldr.build(); //4K + final UpdateSketch skC = thetaBldr.build(); + final int uA = 10000; + final int uC = 100000; + for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } + for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } + final Intersection inter = new Intersection<>(dsso); + inter.intersect(skA); + inter.intersect(skC, factory.newSummary()); + final Sketch skB = inter.getResult(); + + double est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); + double lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); + double ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); + assertTrue(ub > est); + assertTrue(est > lb); + assertEquals(est, 0.5, .03); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + skA.reset(); //skA is now empty + est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); + lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); + ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + skC.reset(); //Now both are empty + est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); + lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skC); + ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skC); + println("ub : " + ub); + println("est: " + est); + println("lb : " + lb); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkAbnormalReturns1() { // tuple, tuple + final UpdatableSketch skA = tupleBldr.build(); //4K + final UpdatableSketch skC = tupleBldr.build(); + final int uA = 100000; + final int uC = 10000; + for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } + for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2), constSummary); } + BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkAbnormalReturns2() { // tuple, theta + final UpdatableSketch skA = tupleBldr.build(); //4K + final UpdateSketch skC = thetaBldr.build(); + final int uA = 100000; + final int uC = 10000; + for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } + for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } + BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } +} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java b/src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java new file mode 100644 index 000000000..1ff10ef8c --- /dev/null +++ b/src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import static org.apache.datasketches.hash.MurmurHash3.hash; +import static org.apache.datasketches.thetacommon2.HashOperations.checkHashCorruption; +import static org.apache.datasketches.thetacommon2.HashOperations.checkThetaCorruption; +import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; +import static org.apache.datasketches.thetacommon2.HashOperations.hashArrayInsert; +import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; +import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnlyMemorySegment; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchMemorySegment; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsert; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsertMemorySegment; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; +import org.testng.annotations.Test; + +public class HashOperationsTest { + + //Not otherwise already covered + + @Test(expectedExceptions = SketchesStateException.class) + public void testThetaCorruption1() { + checkThetaCorruption(0); + } + + @Test(expectedExceptions = SketchesStateException.class) + public void testThetaCorruption2() { + checkThetaCorruption(-1); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void testHashCorruption() { + checkHashCorruption(-1); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHashSearch() { + hashSearch(new long[4], 2, 0); + } + + @Test + public void checkHashArrayInsert() { + final long[] hTable = new long[16]; + final long[] hashIn = new long[1]; + for (int i = 0; i < 8; i++) { + hashIn[0] = i; + final long h = hash(hashIn, 0)[0] >>> 1; + hashInsertOnly(hTable, 4, h); + final int count = hashArrayInsert(hTable, hTable, 4, Long.MAX_VALUE); + assertEquals(count, 0); + } + + } + + @Test + public void testContinueCondtion() { + final long thetaLong = Long.MAX_VALUE / 2; + assertTrue(continueCondition(thetaLong, 0)); + assertTrue(continueCondition(thetaLong, thetaLong)); + assertTrue(continueCondition(thetaLong, thetaLong + 1)); + assertFalse(continueCondition(thetaLong, thetaLong - 1)); + } + + @Test + public void testHashInsertOnlyNoStride() { + final long[] table = new long[32]; + final int index = hashInsertOnly(table, 5, 1); + assertEquals(index, 1); + assertEquals(table[1], 1L); + } + + @Test + public void testHashInsertOnlyWithStride() { + final long[] table = new long[32]; + table[1] = 1; + final int index = hashInsertOnly(table, 5, 1); + assertEquals(index, 2); + assertEquals(table[2], 1L); + } + + @Test + public void testHashInsertOnlyMemorySegmentNoStride() { + final long[] table = new long[32]; + final MemorySegment seg = MemorySegment.ofArray(table); + final int index = hashInsertOnlyMemorySegment(seg, 5, 1, 0); + assertEquals(index, 1); + assertEquals(table[1], 1L); + } + + @Test + public void testHashInsertOnlyMemorySegmentWithStride() { + final long[] table = new long[32]; + table[1] = 1; + final MemorySegment seg = MemorySegment.ofArray(table); + final int index = hashInsertOnlyMemorySegment(seg, 5, 1, 0); + assertEquals(index, 2); + assertEquals(table[2], 1L); + } + + @Test + public void checkFullHeapTableCatchesInfiniteLoop() { + final long[] table = new long[32]; + for (int i = 1; i <= 32; ++i) { + hashInsertOnly(table, 5, i); + } + + // table full; search returns not found, others throw exception + final int retVal = hashSearch(table, 5, 33); + assertEquals(retVal, -1); + + try { + hashInsertOnly(table, 5, 33); + fail(); + } catch (final SketchesArgumentException e) { + // expected + } + + try { + hashSearchOrInsert(table, 5, 33); + fail(); + } catch (final SketchesArgumentException e) { + // expected + } + } + + @Test + public void checkFullDirectTableCatchesInfiniteLoop() { + final long[] table = new long[32]; + final MemorySegment seg = MemorySegment.ofArray(table); + for (int i = 1; i <= 32; ++i) { + hashInsertOnlyMemorySegment(seg, 5, i, 0); + } + + // table full; search returns not found, others throw exception + final int retVal = hashSearchMemorySegment(seg, 5, 33, 0); + assertEquals(retVal, -1); + + try { + hashInsertOnlyMemorySegment(seg, 5, 33, 0); + fail(); + } catch (final SketchesArgumentException e) { + // expected + } + + try { + hashSearchOrInsertMemorySegment(seg, 5, 33, 0); + fail(); + } catch (final SketchesArgumentException e) { + // expected + } + } + + @Test + public void checkFullFastDirectTableCatchesInfiniteLoop() { + final long[] table = new long[32]; + final MemorySegment wseg = MemorySegment.ofArray(table); + + for (int i = 1; i <= 32; ++i) { + hashInsertOnlyMemorySegment(wseg, 5, i, 0); + } + + // table full; throws exception + try { + hashInsertOnlyMemorySegment(wseg, 5, 33, 0); + fail(); + } catch (final SketchesArgumentException e) { + // expected + } + + try { + hashSearchOrInsertMemorySegment(wseg, 5, 33, 0); + fail(); + } catch (final SketchesArgumentException e) { + // expected + } + } + + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java b/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java new file mode 100644 index 000000000..e4ca129b1 --- /dev/null +++ b/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java @@ -0,0 +1,446 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import static java.lang.String.format; +import static org.apache.datasketches.thetacommon.QuickSelect.select; +import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; +import static org.apache.datasketches.thetacommon.QuickSelect.selectIncludingZeros; + +import java.util.Random; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class QuickSelectTest { + private static final String LS = System.getProperty("line.separator"); + private static final Random random = new Random(); // pseudo-random number generator + + //long[] arrays + + @Test + public void checkQuickSelect0Based() { + final int len = 64; + final long[] arr = new long[len]; + for (int i = 0; i < len; i++ ) { + arr[i] = i; + } + for (int pivot = 0; pivot < 64; pivot++ ) { + final long trueVal = pivot; + for (int i = 0; i < 1000; i++ ) { + shuffle(arr); + final long retVal = select(arr, 0, len - 1, pivot); + Assert.assertEquals(retVal, trueVal); + } + } + } + + @Test + public void checkQuickSelect1BasedExcludingZeros() { + final int len = 64; + final int nonZeros = (7 * len) / 8; + final long[] arr = new long[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + final int pivot = len / 2; + final long trueVal = arr[pivot - 1]; + shuffle(arr); + final long retVal = selectExcludingZeros(arr, nonZeros, pivot); + Assert.assertEquals(retVal, trueVal); + } + + @Test + public void checkQuickSelect1BasedExcludingZeros2() { + final int len = 64; + final int nonZeros = 16; + final long[] arr = new long[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + shuffle(arr); + final int pivot = len / 2; + final long retVal = selectExcludingZeros(arr, nonZeros, pivot); + Assert.assertEquals(retVal, 0); + } + + @Test + public void checkQuickSelect1BasedIncludingZeros() { + final int len = 64; + final int zeros = len / 8; + final long[] arr = new long[len]; + for (int i = zeros; i < len; i++ ) { + arr[i] = i + 1; + } + final int pivot = len / 2; + final long trueVal = arr[pivot - 1]; + shuffle(arr); + final long retVal = selectIncludingZeros(arr, pivot); + Assert.assertEquals(retVal, trueVal); + } + + //double[] arrays + + @Test + public void checkQuickSelectDbl0Based() { + final int len = 64; + final double[] arr = new double[len]; + for (int i = 0; i < len; i++ ) { + arr[i] = i; + } + for (int pivot = 0; pivot < 64; pivot++ ) { + final double trueVal = pivot; + for (int i = 0; i < 1000; i++ ) { + shuffle(arr); + final double retVal = select(arr, 0, len - 1, pivot); + Assert.assertEquals(retVal, trueVal, 0.0); + } + } + } + + @Test + public void checkQuickSelectDbl1BasedExcludingZeros() { + final int len = 64; + final int nonZeros = (7 * len) / 8; + final double[] arr = new double[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + final int pivot = len / 2; + final double trueVal = arr[pivot - 1]; + shuffle(arr); + final double retVal = selectExcludingZeros(arr, nonZeros, pivot); + Assert.assertEquals(retVal, trueVal, 0.0); + } + + @Test + public void checkQuickSelectDbl1BasedExcludingZeros2() { + final int len = 64; + final int nonZeros = 16; + final double[] arr = new double[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + shuffle(arr); + final int pivot = len / 2; + final double retVal = selectExcludingZeros(arr, nonZeros, pivot); + Assert.assertEquals(retVal, 0, 0.0); + } + + @Test + public void checkQuickSelectDbl1BasedIncludingZeros() { + final int len = 64; + final int zeros = len / 8; + final double[] arr = new double[len]; + for (int i = zeros; i < len; i++ ) { + arr[i] = i + 1; + } + final int pivot = len / 2; + final double trueVal = arr[pivot - 1]; + shuffle(arr); + final double retVal = selectIncludingZeros(arr, pivot); + Assert.assertEquals(retVal, trueVal, 0.0); + } + + + /** + * Rearrange the elements of an array in random order. + * @param a long array + */ + public static void shuffle(final long[] a) { + final int N = a.length; + for (int i = 0; i < N; i++ ) { + final int r = i + uniform(N - i); // between i and N-1 + final long temp = a[i]; + a[i] = a[r]; + a[r] = temp; + } + } + + /** + * Rearrange the elements of an array in random order. + * @param a double array + */ + public static void shuffle(final double[] a) { + final int N = a.length; + for (int i = 0; i < N; i++ ) { + final int r = i + uniform(N - i); // between i and N-1 + final double temp = a[i]; + a[i] = a[r]; + a[r] = temp; + } + } + + + /** + * Returns an integer uniformly between 0 (inclusive) and n (exclusive) where {@code n > 0} + * + * @param n the upper exclusive bound + * @return random integer + */ + public static int uniform(final int n) { + if (n <= 0) { + throw new SketchesArgumentException("n must be positive"); + } + return random.nextInt(n); + } + + private static String printArr(final long[] arr) { + final StringBuilder sb = new StringBuilder(); + final int len = arr.length; + sb.append(" Base0").append(" Base1").append(" Value").append(LS); + for (int i = 0; i < len; i++ ) { + sb + .append(format("%6d", i)).append(format("%6d", i + 1)).append(format("%6d", arr[i])) + .append(LS); + } + return sb.toString(); + } + + private static String printArr(final double[] arr) { + final StringBuilder sb = new StringBuilder(); + final int len = arr.length; + sb.append(" Base0").append(" Base1").append(" Value").append(LS); + for (int i = 0; i < len; i++ ) { + sb + .append(format("%6d", i)).append(format("%6d", i + 1)).append(format("%9.3f", arr[i])) + .append(LS); + } + return sb.toString(); + } + + //For console testing + static void test1() { + final int len = 16; + final int nonZeros = (3 * len) / 4; + final int zeros = len - nonZeros; + final long[] arr = new long[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + println("Generated Numbers:"); + println(printArr(arr)); + shuffle(arr); + println("Randomized Ordering:"); + println(printArr(arr)); + final int pivot = len / 2; + println("select(...):"); + println("ArrSize : " + len); + println("NonZeros: " + nonZeros); + println("Zeros : " + zeros); + println("Choose pivot at 1/2 array size, pivot: " + pivot); + final long ret = select(arr, 0, len - 1, pivot); + println("Return value of 0-based pivot including zeros:"); + println("select(arr, 0, " + (len - 1) + ", " + pivot + ") => " + ret); + println("0-based index of pivot = pivot = " + (pivot)); + println("Result Array:" + LS); + println(printArr(arr)); + } + + //For console testing + static void test2() { + final int len = 16; + final int nonZeros = (3 * len) / 4; + final int zeros = len - nonZeros; + final long[] arr = new long[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + println("Generated Numbers:"); + println(printArr(arr)); + shuffle(arr); + println("Randomized Ordering:"); + println(printArr(arr)); + final int pivot = len / 2; //= 8 + println("selectDiscountingZeros(...):"); + println("ArrSize : " + len); + println("NonZeros: " + nonZeros); + println("Zeros : " + zeros); + println("Choose pivot at 1/2 array size, pivot= " + pivot); + final long ret = selectExcludingZeros(arr, nonZeros, pivot); + println("Return value of 1-based pivot discounting zeros:"); + println("selectDiscountingZeros(arr, " + nonZeros + ", " + pivot + ") => " + ret); + println("0-based index of pivot= pivot+zeros-1 = " + ((pivot + zeros) - 1)); + println("Result Array:" + LS); + println(printArr(arr)); + } + + //For console testing + static void test3() { + final int len = 16; + final int nonZeros = (3 * len) / 4; + final int zeros = len - nonZeros; + final long[] arr = new long[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + println("Generated Numbers:"); + println(printArr(arr)); + shuffle(arr); + println("Randomized Ordering:"); + println(printArr(arr)); + final int pivot = len / 2; //= 8 + println("selectIncludingZeros(...):"); + println("ArrSize : " + len); + println("NonZeros: " + nonZeros); + println("Zeros : " + zeros); + println("Choose pivot at 1/2 array size, pivot= " + pivot); + final long ret = selectIncludingZeros(arr, pivot); + println("Return value of 1-based pivot including zeros:"); + println("selectIncludingZeros(arr, " + pivot + ") => " + ret); + println("0-based index of pivot= pivot-1 = " + (pivot - 1)); + println("Result Array:" + LS); + println(printArr(arr)); + } + + static void testDbl1() { + final int len = 16; + final int nonZeros = (3 * len) / 4; + final int zeros = len - nonZeros; + final double[] arr = new double[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + println("Generated Numbers:"); + println(printArr(arr)); + shuffle(arr); + println("Randomized Ordering:"); + println(printArr(arr)); + final int pivot = len / 2; + println("select(...):"); + println("ArrSize : " + len); + println("NonZeros: " + nonZeros); + println("Zeros : " + zeros); + println("Choose pivot at 1/2 array size, pivot: " + pivot); + final double ret = select(arr, 0, len - 1, pivot); + println("Return value of 0-based pivot including zeros:"); + println("select(arr, 0, " + (len - 1) + ", " + pivot + ") => " + ret); + println("0-based index of pivot = pivot = " + (pivot)); + println("Result Array:" + LS); + println(printArr(arr)); + } + + //For console testing + static void testDbl2() { + final int len = 16; + final int nonZeros = (3 * len) / 4; + final int zeros = len - nonZeros; + final double[] arr = new double[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + println("Generated Numbers:"); + println(printArr(arr)); + shuffle(arr); + println("Randomized Ordering:"); + println(printArr(arr)); + final int pivot = len / 2; //= 8 + println("selectDiscountingZeros(...):"); + println("ArrSize : " + len); + println("NonZeros: " + nonZeros); + println("Zeros : " + zeros); + println("Choose pivot at 1/2 array size, pivot= " + pivot); + final double ret = selectExcludingZeros(arr, nonZeros, pivot); + println("Return value of 1-based pivot discounting zeros:"); + println("selectDiscountingZeros(arr, " + nonZeros + ", " + pivot + ") => " + ret); + println("0-based index of pivot= pivot+zeros-1 = " + ((pivot + zeros) - 1)); + println("Result Array:" + LS); + println(printArr(arr)); + } + + //For console testing + static void testDbl3() { + final int len = 16; + final int nonZeros = (3 * len) / 4; + final int zeros = len - nonZeros; + final double[] arr = new double[len]; + for (int i = 0; i < nonZeros; i++ ) { + arr[i] = i + 1; + } + println("Generated Numbers:"); + println(printArr(arr)); + shuffle(arr); + println("Randomized Ordering:"); + println(printArr(arr)); + final int pivot = len / 2; //= 8 + println("selectIncludingZeros(...):"); + println("ArrSize : " + len); + println("NonZeros: " + nonZeros); + println("Zeros : " + zeros); + println("Choose pivot at 1/2 array size, pivot= " + pivot); + final double ret = selectIncludingZeros(arr, pivot); + println("Return value of 1-based pivot including zeros:"); + println("selectIncludingZeros(arr, " + pivot + ") => " + ret); + println("0-based index of pivot= pivot-1 = " + (pivot - 1)); + println("Result Array:" + LS); + println(printArr(arr)); + } + + // public static void main(String[] args) { + // println(LS+"==LONGS 1=========="+LS); + // test1(); + // println(LS+"==LONGS 2=========="+LS); + // test2(); + // println(LS+"==LONGS 3=========="+LS); + // test3(); + // println(LS+"==DOUBLES 1========"+LS); + // testDbl1(); + // println(LS+"==DOUBLES 2========"+LS); + // testDbl2(); + // println(LS+"==DOUBLES 3========"+LS); + // testDbl3(); + // + // + // QuickSelectTest qst = new QuickSelectTest(); + // qst.checkQuickSelect0Based(); + // qst.checkQuickSelect1BasedExcludingZeros(); + // qst.checkQuickSelect1BasedExcludingZeros2(); + // qst.checkQuickSelect1BasedIncludingZeros(); + // qst.checkQuickSelectDbl0Based(); + // qst.checkQuickSelectDbl1BasedExcludingZeros(); + // qst.checkQuickSelectDbl1BasedExcludingZeros2(); + // qst.checkQuickSelectDbl1BasedIncludingZeros(); + // + // } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(final String s) { + //System.out.println(s); //disable here + } + + /** + * @param d value to print + */ + static void println(final double d) { + //System.out.println(d); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java b/src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java new file mode 100644 index 000000000..0943e29d7 --- /dev/null +++ b/src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.thetacommon2; + +import org.apache.datasketches.quantilescommon.QuantilesUtil; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class ThetaUtilTest { + + @Test + public void checkStartingSubMultiple() { + Assert.assertEquals(ThetaUtil.startingSubMultiple(8, 3, 4), 5); + Assert.assertEquals(ThetaUtil.startingSubMultiple(7, 3, 4), 4); + Assert.assertEquals(ThetaUtil.startingSubMultiple(6, 3, 4), 6); + } + + @Test(expectedExceptions = NullPointerException.class) + public void checkValidateValuesNullException() { + QuantilesUtil.checkDoublesSplitPointsOrder(null); + } + +} + diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java index 6c4fa49ec..7fd85554a 100644 --- a/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java +++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java @@ -74,7 +74,7 @@ public byte[] toByteArray() { * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes * read from the MemorySegment */ - public static DeserializeResult fromMemory(final MemorySegment seg) { + public static DeserializeResult fromMemorySegment(final MemorySegment seg) { return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX)), SERIALIZED_SIZE_BYTES); } diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java index 877fd1d65..885bbf0cf 100644 --- a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java +++ b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java @@ -25,7 +25,7 @@ public class IntegerSummaryDeserializer implements SummaryDeserializer heapifySummary(final MemorySegment seg) { - return IntegerSummary.fromMemory(seg); + return IntegerSummary.fromMemorySegment(seg); } } diff --git a/src/test/java/org/apache/datasketches/tuple2/MiscTest.java b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java index fb353d6a9..6c56c852e 100644 --- a/src/test/java/org/apache/datasketches/tuple2/MiscTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java @@ -22,7 +22,7 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; import org.apache.datasketches.tuple2.adouble.DoubleSummary; import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; diff --git a/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java similarity index 82% rename from src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java rename to src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java index eb45779c6..e2eaa3900 100644 --- a/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemoryTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java @@ -19,25 +19,26 @@ package org.apache.datasketches.tuple2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; -import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; -import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches; -import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion; -import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; -import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesSketches; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUnion; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; -public class ReadOnlyMemoryTest { +public class ReadOnlyMemorySegmentTest { @Test public void wrapAndTryUpdatingSketch() { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1}); final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) - ArrayOfDoublesSketches.wrapSketch(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertEquals(sketch2.getEstimate(), 1.0); sketch2.toByteArray(); boolean thrown = false; @@ -60,7 +61,7 @@ public void heapifyAndUpdateSketch() { sketch1.update(1, new double[] {1}); // downcasting is not recommended, for testing only final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) - ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); sketch2.update(2, new double[] {1}); Assert.assertEquals(sketch2.getEstimate(), 2.0); } @@ -75,7 +76,7 @@ public void wrapAndTryUpdatingUnionEstimationMode() { } final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union1.union(sketch1); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(MemorySegment.ofArray(union1.toByteArray()).asReadOnly()); final ArrayOfDoublesSketch resultSketch = union2.getResult(); Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); @@ -105,7 +106,7 @@ public void heapifyAndUpdateUnion() { } final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union1.union(sketch1); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(MemorySegment.ofArray(union1.toByteArray())); final ArrayOfDoublesSketch resultSketch = union2.getResult(); Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java index fabb9d03d..fc90e4215 100644 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java @@ -25,7 +25,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.theta2.UpdateSketch; import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.AnotB; import org.apache.datasketches.tuple2.CompactSketch; import org.apache.datasketches.tuple2.Intersection; diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java index a8d072a2e..5aea6c90e 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java @@ -23,7 +23,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java index 92de38bf1..fa9412207 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java @@ -23,7 +23,7 @@ import static org.apache.datasketches.hash.MurmurHash3.hash; import static org.testng.Assert.assertTrue; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; public class CornerCaseArrayOfDoublesSetOperationsTest { diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java index 6c39dd67a..fa2cca8ab 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java @@ -22,8 +22,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.datasketches.tuple2.Util; +import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -69,7 +68,7 @@ public void exactModeFromQuickSelectSketch() { Assert.assertEquals(sketch.getRetainedEntries(), 3); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + Assert.assertEquals(sketch.getSeedHash(), ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); double[][] values = sketch.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { @@ -125,7 +124,7 @@ public void deserializeWithWrongSeed() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void fromQuickSelectSketchNotEnoughMemory() { + public void fromQuickSelectSketchNotEnoughMemorySegment() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); us.update(1, new double[] {1.0}); us.compact(MemorySegment.ofArray(new byte[39])); diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java index 0a4e226aa..67aaffd05 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java @@ -274,7 +274,7 @@ public void serializeDeserializeSampling() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void notEnoughMemory() { + public void memorySegmentNotLargeEnough() { new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(32).build(MemorySegment.ofArray(new byte[1055])); } diff --git a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java b/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java index 9fd17aecc..308d154da 100644 --- a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java @@ -49,7 +49,7 @@ public void checkToByteArray() { println(s); } - println("\nfromMemory(seg)"); + println("\nfromMemorySegment(seg)"); DeserializeResult dres = ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); ArrayOfStringsSummary nsum3 = dres.getObject(); nodesArr = nsum3.getValue(); From a3e4165980743c86ddec562d0906bd0825321fe4 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 26 Jun 2025 13:20:55 -0700 Subject: [PATCH 18/25] More cleanup. --- .../theta2/DirectQuickSelectSketchR.java | 3 +- .../datasketches/theta2/Intersection.java | 28 +++++++++ .../datasketches/theta2/IntersectionImpl.java | 7 +-- .../datasketches/theta2/SetOperation.java | 11 ++-- .../apache/datasketches/theta2/Sketches.java | 46 +++++++------- .../org/apache/datasketches/theta2/Union.java | 63 ++++++++++++++++++- .../apache/datasketches/theta2/UnionImpl.java | 2 +- .../arrayofdoubles/ArrayOfDoublesSketch.java | 6 +- .../ArrayOfDoublesSketches.java | 14 +++-- .../arrayofdoubles/ArrayOfDoublesUnion.java | 12 ++-- .../ArrayOfDoublesUpdatableSketch.java | 6 +- .../datasketches/theta2/UnionImplTest.java | 4 +- 12 files changed, 150 insertions(+), 52 deletions(-) diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java index e2ba9f1fe..483cbbed6 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java @@ -106,8 +106,7 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, fin final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - final DirectQuickSelectSketchR dqss = - new DirectQuickSelectSketchR(seed, srcSeg); + final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java index 143a5a1fb..30cb910d9 100644 --- a/src/main/java/org/apache/datasketches/theta2/Intersection.java +++ b/src/main/java/org/apache/datasketches/theta2/Intersection.java @@ -19,9 +19,11 @@ package org.apache.datasketches.theta2; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.common.Util.floorPowerOf2; import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; @@ -135,6 +137,32 @@ public CompactSketch intersect(final Sketch a, final Sketch b) { public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered, MemorySegment dstSeg); + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. + * @return an Intersection that wraps a source MemorySegment that contains an Intersection image + */ + public static Intersection wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. + * @param expectedSeed See seed + * @return an Intersection that wraps a source MemorySegment that contains an Intersection image + */ + public static Intersection wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); + } + // Restricted /** diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java index e16faa6e8..0d7a2bb8f 100644 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -196,8 +196,8 @@ static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long s } /** - * Factory: Wrap an Intersection target around the given source MemorySegment containing - * intersection data. + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * @param srcSeg The source MemorySegment image. * @param seed See seed * @param readOnly True if MemorySegment is to be treated as read only @@ -218,8 +218,7 @@ static IntersectionImpl wrapInstance( } @Override - public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, - final MemorySegment dstSeg) { + public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, final MemorySegment dstSeg) { if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } hardReset(); intersect(a); diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java index 795712dca..227ae54db 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java @@ -104,11 +104,10 @@ public static SetOperation heapify(final MemorySegment srcSeg, final long expect /** * Wrap takes the SetOperation image in MemorySegment and refers to it directly. * There is no data copying onto the java heap. - * This method assumes the - * Default Update Seed. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * - *

      Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.

      + *

      Note: Only certain set operators during stateful operations can be serialized and thus wrapped.

      * * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. * @return a SetOperation backed by the given MemorySegment @@ -120,9 +119,9 @@ public static SetOperation wrap(final MemorySegment srcSeg) { /** * Wrap takes the SetOperation image in MemorySegment and refers to it directly. * There is no data copying onto the java heap. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * - *

      Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.

      + *

      Note: Only certain set operators during stateful operations can be serialized and thus wrapped.

      * * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. * @param expectedSeed the seed used to validate the given MemorySegment image. diff --git a/src/main/java/org/apache/datasketches/theta2/Sketches.java b/src/main/java/org/apache/datasketches/theta2/Sketches.java index 377345967..fa813b25f 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketches.java @@ -71,7 +71,7 @@ public static double getLowerBound(final int numStdDev, final MemorySegment srcS } /** - * Ref: {@link SetOperation#getMaxAnotBResultBytes(int)}. + * Convenience method, ref: {@link SetOperation#getMaxAnotBResultBytes(int)}. * Returns the maximum number of bytes for the returned CompactSketch, given the maximum * value of nomEntries of the first sketch A of AnotB. * @param maxNomEntries the given value @@ -105,7 +105,7 @@ public static int getCompactSketchMaxBytes(final int lgNomEntries) { } /** - * Ref: {@link SetOperation#getMaxIntersectionBytes(int)} + * Convenience method, ref: {@link SetOperation#getMaxIntersectionBytes(int)} * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries} * @return Ref: {@link SetOperation#getMaxIntersectionBytes(int)} */ @@ -114,7 +114,7 @@ public static int getMaxIntersectionBytes(final int nomEntries) { } /** - * Ref: {@link SetOperation#getMaxUnionBytes(int)} + * Convenience method, ref: {@link SetOperation#getMaxUnionBytes(int)} * @param nomEntries Ref: {@link SetOperation#getMaxUnionBytes(int)}, {@code nomEntries} * @return Ref: {@link SetOperation#getMaxUnionBytes(int)} */ @@ -123,7 +123,7 @@ public static int getMaxUnionBytes(final int nomEntries) { } /** - * Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} + * Convenience method, ref: {@link Sketch#getMaxUpdateSketchBytes(int)} * @param nomEntries Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}, {@code nomEntries} * @return Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} */ @@ -132,7 +132,7 @@ public static int getMaxUpdateSketchBytes(final int nomEntries) { } /** - * Ref: {@link Sketch#getSerializationVersion(MemorySegment)} + * Convenience method, ref: {@link Sketch#getSerializationVersion(MemorySegment)} * @param srcSeg Ref: {@link Sketch#getSerializationVersion(MemorySegment)}, {@code srcSeg} * @return Ref: {@link Sketch#getSerializationVersion(MemorySegment)} */ @@ -157,7 +157,7 @@ public static double getUpperBound(final int numStdDev, final MemorySegment srcS //Heapify Operations /** - * Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)} + * Convenience method, ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)} * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)}, {@code srcSeg} * @return {@link CompactSketch CompactSketch} */ @@ -166,7 +166,7 @@ public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg) { } /** - * Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)} + * Convenience method, ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)} * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, * {@code expectedSeed} @@ -177,7 +177,7 @@ public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg, fin } /** - * Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)} + * Convenience method, ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)} * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)}, {@code srcSeg} * @return {@link CompactSketch CompactSketch} */ @@ -186,7 +186,7 @@ public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg) { } /** - * Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)} + * Convenience method, ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)} * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, * {@code expectedSeed} @@ -197,7 +197,7 @@ public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg, final } /** - * Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)} + * Convenience method, ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)} * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)}, {@code srcSeg} * @return {@link SetOperation SetOperation} */ @@ -206,7 +206,7 @@ public static SetOperation heapifySetOperation(final MemorySegment srcSeg) { } /** - * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)} + * Convenience method, ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)} * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, * {@code srcSeg} * @param expectedSeed the seed used to validate the given MemorySegment image. @@ -219,7 +219,7 @@ public static SetOperation heapifySetOperation(final MemorySegment srcSeg, final } /** - * Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)} + * Convenience method, ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)} * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)}, {@code srcSeg} * @return {@link Sketch Sketch} */ @@ -228,7 +228,7 @@ public static Sketch heapifySketch(final MemorySegment srcSeg) { } /** - * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)} + * Convenience method, ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)} * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed the seed used to validate the given MemorySegment image. * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code expectedSeed} @@ -239,7 +239,7 @@ public static Sketch heapifySketch(final MemorySegment srcSeg, final long expect } /** - * Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)} + * Convenience method, ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)} * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)}, {@code srcSeg} * @return {@link UpdateSketch UpdateSketch} */ @@ -248,7 +248,7 @@ public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg) { } /** - * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)} + * Convenience method, ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)} * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, * {@code srcSeg} * @param expectedSeed the seed used to validate the given MemorySegment image. @@ -281,7 +281,7 @@ public static UpdateSketchBuilder updateSketchBuilder() { //Wrap operations /** - * Convenience method, calls {@link SetOperation#wrap(MemorySegment)} and casts the result to a Intersection + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment)} * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} * @return a Intersection backed by the given MemorySegment */ @@ -290,7 +290,7 @@ public static Intersection wrapIntersection(final MemorySegment srcSeg) { } /** - * Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)} + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)} * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)}, {@code srcSeg} * @return {@link SetOperation SetOperation} */ @@ -299,7 +299,7 @@ public static SetOperation wrapSetOperation(final MemorySegment srcSeg) { } /** - * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)} + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)} * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed the seed used to validate the given MemorySegment image. * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code expectedSeed} @@ -310,7 +310,7 @@ public static SetOperation wrapSetOperation(final MemorySegment srcSeg, final lo } /** - * Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)} + * Convenience method, ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)} * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)}, {@code srcSeg} * @return {@link Sketch Sketch} */ @@ -319,7 +319,7 @@ public static Sketch wrapSketch(final MemorySegment srcSeg) { } /** - * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)} + * Convenience method, ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)} * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed the expectedSeed used to validate the given MemorySegment image. * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code expectedSeed} @@ -330,7 +330,7 @@ public static Sketch wrapSketch(final MemorySegment srcSeg, final long expectedS } /** - * Convenience method, calls {@link SetOperation#wrap(MemorySegment)} and casts the result to a Union + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment)} and casts the result to a Union * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} * @return a Union backed by the given MemorySegment. */ @@ -339,7 +339,7 @@ public static Union wrapUnion(final MemorySegment srcSeg) { } /** - * Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)} + * Convenience method, ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)} * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)}, {@code srcSeg} * @return {@link UpdateSketch UpdateSketch} */ @@ -348,7 +348,7 @@ public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg) { } /** - * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)} + * Convenience method, ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)} * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code srcSeg} * @param expectedSeed the seed used to validate the given MemorySegment image. * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code expectedSeed} diff --git a/src/main/java/org/apache/datasketches/theta2/Union.java b/src/main/java/org/apache/datasketches/theta2/Union.java index 861857366..fde9e90bd 100644 --- a/src/main/java/org/apache/datasketches/theta2/Union.java +++ b/src/main/java/org/apache/datasketches/theta2/Union.java @@ -19,10 +19,15 @@ package org.apache.datasketches.theta2; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; + import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.ThetaUtil; /** * Compute the union of two or more theta sketches. @@ -32,6 +37,62 @@ */ public abstract class Union extends SetOperation { + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * This method assumes the Default Update Seed. + * This does NO validity checking of the given MemorySegment. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @return this class + */ + public static Union fastWrap(final MemorySegment srcSeg) { + return fastWrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * This does NO validity checking of the given MemorySegment. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + public static Union fastWrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return UnionImpl.fastWrapInstance(srcSeg, expectedSeed); + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @return this class + */ + public static Union wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + public static Union wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return UnionImpl.wrapInstance(srcSeg, expectedSeed); + } + /** * Returns the number of storage bytes required for this union in its current state. * @@ -67,7 +128,7 @@ public Family getFamily() { * See Destination Ordered * * @param dstSeg destination MemorySegment - * + * * @return the result of this operation as a CompactSketch of the chosen form */ public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java index 68d72b6e9..9d3aca04b 100644 --- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java @@ -138,7 +138,7 @@ static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expected * See seed * @return this class */ - static UnionImpl fastWrap(final MemorySegment srcSeg, final long expectedSeed) { + static UnionImpl fastWrapInstance(final MemorySegment srcSeg, final long expectedSeed) { Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); final UpdateSketch gadget = srcSeg.isReadOnly() ? DirectQuickSelectSketchR.fastReadOnlyWrap(srcSeg, expectedSeed) diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java index 49bf36cbb..99a87bbb1 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java @@ -94,7 +94,8 @@ public static ArrayOfDoublesSketch heapify(final MemorySegment seg, final long s } /** - * Wrap the given MemorySegment as an ArrayOfDoublesSketch + * Wrap the given MemorySegment as an ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param seg the given MemorySegment * @return an ArrayOfDoublesSketch */ @@ -103,7 +104,8 @@ public static ArrayOfDoublesSketch wrap(final MemorySegment seg) { } /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch + * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param seg the given MemorySegment * @param seed the given seed * @return an ArrayOfDoublesSketch diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java index a439d4899..8ad121a00 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java @@ -67,7 +67,8 @@ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemoryS } /** - * Wrap the given MemorySegment as an ArrayOfDoublesSketch + * Wrap the given MemorySegment as an ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesSketch */ @@ -76,7 +77,8 @@ public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg) { } /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch + * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesSketch @@ -86,7 +88,8 @@ public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg, final } /** - * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUpdatableSketch */ @@ -95,7 +98,8 @@ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegm } /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUpdatableSketch @@ -125,6 +129,7 @@ public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg, final /** * Wrap the given MemorySegment as an ArrayOfDoublesUnion + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ @@ -134,6 +139,7 @@ public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg) { /** * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java index d00d21f86..1c57b881e 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java @@ -61,7 +61,7 @@ public abstract class ArrayOfDoublesUnion { } /** - * Heapify the given MemorySegment as an ArrayOfDoublesUnion + * Heapify the given MemorySegment as an ArrayOfDoublesUnion. * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ @@ -70,7 +70,7 @@ public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg) { } /** - * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion + * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion. * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion @@ -80,7 +80,8 @@ public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg, final long } /** - * Wrap the given MemorySegment as an ArrayOfDoublesUnion + * Wrap the given MemorySegment as an ArrayOfDoublesUnion. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ @@ -89,7 +90,8 @@ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg) { } /** - * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion + * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion @@ -104,7 +106,7 @@ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long se * *

      Nulls and empty sketches are ignored.

      * - * @param tupleSketch sketch to add to the union + * @param tupleSketch sketch to add to the union. */ public void union(final ArrayOfDoublesSketch tupleSketch) { if (tupleSketch == null) { return; } diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java index 1ebece067..98cf6699a 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java @@ -61,7 +61,8 @@ public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg, fin } /** - * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param seg the given MemorySegment * @return an ArrayOfDoublesUpdatableSketch */ @@ -70,7 +71,8 @@ public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg) { } /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. * @param seg the given MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUpdatableSketch diff --git a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java index bcd066146..f3a36de5d 100644 --- a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java @@ -128,10 +128,10 @@ public void checkFastWrap() { final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(srcSeg); for (int i = 0; i < k; i++) { union.update(i); } //exact assertEquals(union.getResult().getEstimate(), k, 0.0); - final Union union2 = UnionImpl.fastWrap(srcSeg, seed); + final Union union2 = UnionImpl.fastWrapInstance(srcSeg, seed); assertEquals(union2.getResult().getEstimate(), k, 0.0); final MemorySegment srcSegR = srcSeg; - final Union union3 = UnionImpl.fastWrap(srcSegR, seed); + final Union union3 = UnionImpl.fastWrapInstance(srcSegR, seed); assertEquals(union3.getResult().getEstimate(), k, 0.0); } From a66d3b00c453f3d71ad1023455508e15c41ea9b7 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 10:27:01 -0700 Subject: [PATCH 19/25] More cleanup --- pom.xml | 1 + .../BoundsOnRatiosInThetaSketchedSets2.java | 121 ----------- .../BoundsOnRatiosInTupleSketchedSets2.java | 204 ------------------ 3 files changed, 1 insertion(+), 325 deletions(-) delete mode 100644 src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java diff --git a/pom.xml b/pom.xml index 01d0cc65c..760acd58d 100644 --- a/pom.xml +++ b/pom.xml @@ -375,6 +375,7 @@ under the License. default-report + test report diff --git a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java deleted file mode 100644 index f8199cc4f..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSets2.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon; - -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; - -import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.theta2.Sketch; - -/** - * This class is used to compute the bounds on the estimate of the ratio B / A, where: - *
        - *
      • A is a Theta Sketch of population PopA.
      • - *
      • B is a Theta Sketch of population PopB that is a subset of A, - * obtained by an intersection of A with some other Theta Sketch C, - * which acts like a predicate or selection clause.
      • - *
      • The estimate of the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
      • - *
      • The Upper Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
      • - *
      • The Lower Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
      • - *
      - * Note: The theta of A cannot be greater than the theta of B. - * If B is formed as an intersection of A and some other set C, - * then the theta of B is guaranteed to be less than or equal to the theta of B. - * - * @author Kevin Lang - * @author Lee Rhodes - */ -public final class BoundsOnRatiosInThetaSketchedSets2 { - - private BoundsOnRatiosInThetaSketchedSets2() {} - - /** - * Gets the approximate lower bound for B over A based on a 95% confidence interval - * @param sketchA the sketch A - * @param sketchB the sketch B - * @return the approximate lower bound for B over A - */ - public static double getLowerBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = (thetaLongB == thetaLongA) - ? sketchA.getRetainedEntries(true) - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate upper bound for B over A based on a 95% confidence interval - * @param sketchA the sketch A - * @param sketchB the sketch B - * @return the approximate upper bound for B over A - */ - public static double getUpperBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = (thetaLongB == thetaLongA) - ? sketchA.getRetainedEntries(true) - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 1.0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); - } - - /** - * Gets the estimate for B over A - * @param sketchA the sketch A - * @param sketchB the sketch B - * @return the estimate for B over A - */ - public static double getEstimateOfBoverA(final Sketch sketchA, final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = (thetaLongB == thetaLongA) - ? sketchA.getRetainedEntries(true) - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0.5; } - - return (double) countB / (double) countA; - } - - static void checkThetas(final long thetaLongA, final long thetaLongB) { - if (thetaLongB > thetaLongA) { - throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); - } - } -} diff --git a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java b/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java deleted file mode 100644 index ea8a20828..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSets2.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon; - -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; - -import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.Summary; - -/** - * This class is used to compute the bounds on the estimate of the ratio B / A, where: - *
        - *
      • A is a Tuple Sketch of population PopA.
      • - *
      • B is a Tuple or Theta Sketch of population PopB that is a subset of A, - * obtained by an intersection of A with some other Tuple or Theta Sketch C, - * which acts like a predicate or selection clause.
      • - *
      • The estimate of the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
      • - *
      • The Upper Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
      • - *
      • The Lower Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
      • - *
      - * Note: The theta of A cannot be greater than the theta of B. - * If B is formed as an intersection of A and some other set C, - * then the theta of B is guaranteed to be less than or equal to the theta of B. - * - * @author Kevin Lang - * @author Lee Rhodes - * @author David Cromberge - */ -public final class BoundsOnRatiosInTupleSketchedSets2 { - - private BoundsOnRatiosInTupleSketchedSets2() {} - - /** - * Gets the approximate lower bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Tuple sketch B with summary type S - * @param Summary - * @return the approximate lower bound for B over A - */ - public static double getLowerBoundForBoverA( - final Sketch sketchA, - final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate lower bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Theta sketch B - * @param Summary - * @return the approximate lower bound for B over A - */ - public static double getLowerBoundForBoverA( - final Sketch sketchA, - final org.apache.datasketches.theta.Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate upper bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Tuple sketch B with summary type S - * @param Summary - * @return the approximate upper bound for B over A - */ - public static double getUpperBoundForBoverA( - final Sketch sketchA, - final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 1.0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate upper bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Theta sketch B - * @param Summary - * @return the approximate upper bound for B over A - */ - public static double getUpperBoundForBoverA( - final Sketch sketchA, - final org.apache.datasketches.theta.Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 1.0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); - } - - /** - * Gets the estimate for B over A - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Tuple sketch B with summary type S - * @param Summary - * @return the estimate for B over A - */ - public static double getEstimateOfBoverA( - final Sketch sketchA, - final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0.5; } - - return (double) countB / (double) countA; - } - - /** - * Gets the estimate for B over A - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Theta sketch B - * @param Summary - * @return the estimate for B over A - */ - public static double getEstimateOfBoverA( - final Sketch sketchA, - final org.apache.datasketches.theta.Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0.5; } - - return (double) countB / (double) countA; - } - - static void checkThetas(final long thetaLongA, final long thetaLongB) { - if (thetaLongB > thetaLongA) { - throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); - } - } -} From 7dccb96e6f668b73dbd47b7434cc35de8002a38e Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 16:07:56 -0700 Subject: [PATCH 20/25] preparing for delete of old theta, tuple, thetacommon, fdt and tests. --- .../{thetacommon => common}/QuickSelect.java | 2 +- .../org/apache/datasketches/common/Util.java | 56 ++++ .../apache/datasketches/cpc/BitMatrix.java | 4 +- .../datasketches/cpc/CompressedState.java | 4 +- .../cpc/CompressionCharacterization.java | 4 +- .../apache/datasketches/cpc/CpcSketch.java | 14 +- .../org/apache/datasketches/cpc/CpcUnion.java | 6 +- .../apache/datasketches/fdt2/FdtSketch.java | 166 ++++++++++++ .../org/apache/datasketches/fdt2/Group.java | 140 ++++++++++ .../datasketches/fdt2/PostProcessor.java | 186 ++++++++++++++ .../datasketches/fdt2/package-info.java | 23 ++ .../frequencies/ReversePurgeItemHashMap.java | 2 +- .../frequencies/ReversePurgeLongHashMap.java | 2 +- .../org/apache/datasketches/hash/XxHash.java | 4 +- .../datasketches/hll/BaseHllSketch.java | 18 +- .../apache/datasketches/theta/AnotBimpl.java | 11 +- .../datasketches/theta/CompactSketch.java | 14 +- .../theta/DirectCompactCompressedSketch.java | 4 +- .../theta/DirectCompactSketch.java | 4 +- .../theta/DirectQuickSelectSketch.java | 3 +- .../theta/HeapQuickSelectSketch.java | 2 +- .../datasketches/theta/HeapUpdateSketch.java | 3 +- .../datasketches/theta/IntersectionImpl.java | 9 +- .../datasketches/theta/PreambleUtil.java | 3 +- .../apache/datasketches/theta/Rebuilder.java | 2 +- .../datasketches/theta/SetOperation.java | 8 +- .../theta/SetOperationBuilder.java | 5 +- .../datasketches/theta/SingleItemSketch.java | 22 +- .../org/apache/datasketches/theta/Sketch.java | 5 +- .../apache/datasketches/theta/Sketches.java | 8 +- .../apache/datasketches/theta/UnionImpl.java | 8 +- .../datasketches/theta/UpdateSketch.java | 11 +- .../theta/UpdateSketchBuilder.java | 5 +- .../theta/WrappedCompactCompressedSketch.java | 4 +- .../theta/WrappedCompactSketch.java | 4 +- .../apache/datasketches/theta2/AnotBimpl.java | 11 +- .../datasketches/theta2/CompactSketch.java | 14 +- .../theta2/DirectCompactCompressedSketch.java | 4 +- .../theta2/DirectCompactSketch.java | 3 +- .../theta2/DirectQuickSelectSketch.java | 3 +- .../theta2/HeapQuickSelectSketch.java | 2 +- .../datasketches/theta2/HeapUpdateSketch.java | 3 +- .../datasketches/theta2/Intersection.java | 3 +- .../datasketches/theta2/IntersectionImpl.java | 8 +- .../datasketches/theta2/PreambleUtil.java | 3 +- .../apache/datasketches/theta2/Rebuilder.java | 2 +- .../datasketches/theta2/SetOperation.java | 7 +- .../theta2/SetOperationBuilder.java | 5 +- .../datasketches/theta2/SingleItemSketch.java | 22 +- .../apache/datasketches/theta2/Sketch.java | 5 +- .../apache/datasketches/theta2/Sketches.java | 6 +- .../org/apache/datasketches/theta2/Union.java | 6 +- .../apache/datasketches/theta2/UnionImpl.java | 8 +- .../datasketches/theta2/UpdateSketch.java | 11 +- .../theta2/UpdateSketchBuilder.java | 5 +- .../WrappedCompactCompressedSketch.java | 4 +- .../theta2/WrappedCompactSketch.java | 4 +- .../datasketches/thetacommon/ThetaUtil.java | 56 ---- .../thetacommon2/QuickSelect.java | 242 ------------------ .../datasketches/thetacommon2/ThetaUtil.java | 56 ---- .../datasketches/tuple/QuickSelectSketch.java | 2 +- .../org/apache/datasketches/tuple/Union.java | 2 +- .../datasketches/tuple/UpdatableSketch.java | 11 +- .../org/apache/datasketches/tuple/Util.java | 32 --- .../ArrayOfDoublesAnotBImpl.java | 2 +- .../ArrayOfDoublesIntersection.java | 4 +- .../ArrayOfDoublesQuickSelectSketch.java | 2 +- .../ArrayOfDoublesSetOperationBuilder.java | 4 +- .../arrayofdoubles/ArrayOfDoublesSketch.java | 6 +- .../ArrayOfDoublesSketches.java | 16 +- .../arrayofdoubles/ArrayOfDoublesUnion.java | 10 +- .../ArrayOfDoublesUpdatableSketch.java | 9 +- .../ArrayOfDoublesUpdatableSketchBuilder.java | 3 +- .../DirectArrayOfDoublesCompactSketch.java | 4 +- ...DirectArrayOfDoublesQuickSelectSketch.java | 4 +- .../HeapArrayOfDoublesCompactSketch.java | 9 +- .../HeapArrayOfDoublesQuickSelectSketch.java | 4 +- .../tuple2/QuickSelectSketch.java | 2 +- .../org/apache/datasketches/tuple2/Union.java | 2 +- .../datasketches/tuple2/UpdatableSketch.java | 11 +- .../ArrayOfDoublesAnotBImpl.java | 4 +- .../ArrayOfDoublesIntersection.java | 7 +- .../ArrayOfDoublesQuickSelectSketch.java | 2 +- .../ArrayOfDoublesSetOperationBuilder.java | 4 +- .../arrayofdoubles/ArrayOfDoublesSketch.java | 6 +- .../ArrayOfDoublesSketches.java | 14 +- .../arrayofdoubles/ArrayOfDoublesUnion.java | 9 +- .../ArrayOfDoublesUpdatableSketch.java | 8 +- .../ArrayOfDoublesUpdatableSketchBuilder.java | 3 +- .../DirectArrayOfDoublesCompactSketch.java | 7 +- ...DirectArrayOfDoublesQuickSelectSketch.java | 4 +- .../HeapArrayOfDoublesCompactSketch.java | 10 +- .../HeapArrayOfDoublesQuickSelectSketch.java | 4 +- .../apache/datasketches/common/UtilTest.java | 8 +- .../datasketches/cpc/CpcSketchTest.java | 8 +- .../datasketches/cpc/PreambleUtilTest.java | 4 +- .../apache/datasketches/cpc/TestAllTest.java | 4 +- .../datasketches/fdt2/FdtSketchTest.java | 184 +++++++++++++ .../apache/datasketches/fdt2/GroupTest.java | 67 +++++ .../datasketches/hash/MurmurHash3Test.java | 21 +- .../datasketches/theta/AnotBimplTest.java | 6 +- .../theta/BackwardConversions.java | 4 +- ...ConcurrentDirectQuickSelectSketchTest.java | 12 +- .../ConcurrentHeapQuickSelectSketchTest.java | 10 +- .../theta/DirectIntersectionTest.java | 16 +- .../theta/DirectQuickSelectSketchTest.java | 24 +- .../datasketches/theta/DirectUnionTest.java | 14 +- .../theta/ForwardCompatibilityTest.java | 14 +- .../theta/HeapAlphaSketchTest.java | 23 +- .../theta/HeapIntersectionTest.java | 4 +- .../theta/HeapQuickSelectSketchTest.java | 25 +- .../datasketches/theta/HeapUnionTest.java | 10 +- .../theta/HeapifyWrapSerVer1and2Test.java | 52 ++-- .../datasketches/theta/PreambleUtilTest.java | 4 +- .../theta/SingleItemSketchTest.java | 12 +- .../apache/datasketches/theta/SketchTest.java | 5 +- .../datasketches/theta/SketchesTest.java | 9 +- .../datasketches/theta/UnionImplTest.java | 10 +- .../datasketches/theta/UpdateSketchTest.java | 7 +- .../datasketches/theta2/AnotBimplTest.java | 6 +- .../theta2/BackwardConversions.java | 3 +- ...ConcurrentDirectQuickSelectSketchTest.java | 11 +- .../ConcurrentHeapQuickSelectSketchTest.java | 9 +- .../theta2/DirectIntersectionTest.java | 15 +- .../theta2/DirectQuickSelectSketchTest.java | 24 +- .../datasketches/theta2/DirectUnionTest.java | 14 +- .../theta2/ForwardCompatibilityTest.java | 14 +- .../theta2/HeapAlphaSketchTest.java | 23 +- .../theta2/HeapIntersectionTest.java | 4 +- .../theta2/HeapQuickSelectSketchTest.java | 24 +- .../datasketches/theta2/HeapUnionTest.java | 10 +- .../theta2/HeapifyWrapSerVer1and2Test.java | 52 ++-- .../datasketches/theta2/PreambleUtilTest.java | 4 +- .../theta2/SingleItemSketchTest.java | 12 +- .../datasketches/theta2/SketchTest.java | 4 +- .../datasketches/theta2/SketchesTest.java | 9 +- .../datasketches/theta2/UnionImplTest.java | 9 +- .../datasketches/theta2/UpdateSketchTest.java | 7 +- .../thetacommon/QuickSelectTest.java | 6 +- .../thetacommon2/QuickSelectTest.java | 6 +- .../tuple/SerializerDeserializerTest.java | 2 +- .../CornerCaseTupleSetOperationsTest.java | 4 +- .../ArrayOfDoublesCompactSketchTest.java | 6 +- ...erCaseArrayOfDoublesSetOperationsTest.java | 6 +- ...DirectArrayOfDoublesCompactSketchTest.java | 8 +- .../tuple2/SerializerDeserializerTest.java | 2 +- .../CornerCaseTupleSetOperationsTest.java | 4 +- .../ArrayOfDoublesCompactSketchTest.java | 6 +- ...erCaseArrayOfDoublesSetOperationsTest.java | 6 +- ...DirectArrayOfDoublesCompactSketchTest.java | 4 +- 150 files changed, 1404 insertions(+), 947 deletions(-) rename src/main/java/org/apache/datasketches/{thetacommon => common}/QuickSelect.java (99%) create mode 100644 src/main/java/org/apache/datasketches/fdt2/FdtSketch.java create mode 100644 src/main/java/org/apache/datasketches/fdt2/Group.java create mode 100644 src/main/java/org/apache/datasketches/fdt2/PostProcessor.java create mode 100644 src/main/java/org/apache/datasketches/fdt2/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java create mode 100644 src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java create mode 100644 src/test/java/org/apache/datasketches/fdt2/GroupTest.java diff --git a/src/main/java/org/apache/datasketches/thetacommon/QuickSelect.java b/src/main/java/org/apache/datasketches/common/QuickSelect.java similarity index 99% rename from src/main/java/org/apache/datasketches/thetacommon/QuickSelect.java rename to src/main/java/org/apache/datasketches/common/QuickSelect.java index e48c9b790..aa32af575 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/QuickSelect.java +++ b/src/main/java/org/apache/datasketches/common/QuickSelect.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.thetacommon; +package org.apache.datasketches.common; /** * QuickSelect algorithm improved from Sedgewick. Gets the kth order value diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 88b7ad7b7..5a993591f 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -25,6 +25,7 @@ import static java.lang.Math.pow; import static java.lang.Math.round; import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.hash.MurmurHash3.hash; import java.lang.foreign.MemorySegment; import java.util.Comparator; @@ -68,6 +69,28 @@ public final class Util { */ public static final double LONG_MAX_VALUE_AS_DOUBLE = Long.MAX_VALUE; + /** + * The seed 9001 used in the sketch update methods is a prime number that + * was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and + * the author cannot prove that this particular seed is somehow superior to other seeds. There + * was some early Internet discussion that a seed of 0 did not produce as clean avalanche diagrams + * as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did + * have some issues. As far as the author can determine, MurmurHash3 does not have these problems. + * + *

      In order to perform set operations on two sketches it is critical that the same hash + * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship + * between the original source key value and the hashed bit string would be violated. Once + * you have developed a history of stored sketches you are stuck with it. + * + *

      WARNING: This seed is used internally by library sketches in different + * packages and thus must be declared public. However, this seed value must not be used by library + * users with the MurmurHash3 function. It should be viewed as existing for exclusive, private + * use by the library. + * + *

      See Default Update Seed + */ + public static final long DEFAULT_UPDATE_SEED = 9001L; + private Util() {} //Byte Conversions @@ -945,4 +968,37 @@ public static void setBits(final MemorySegment seg, final long offsetBytes, fina seg.set(JAVA_BYTE, offsetBytes, (byte)(b | bitMask)); } + /** + * Computes and checks the 16-bit seed hash from the given long seed. + * The seed hash may not be zero in order to maintain compatibility with older serialized + * versions that did not have this concept. + * @param seed See Update Hash Seed + * @return the seed hash. + */ + public static short computeSeedHash(final long seed) { + final long[] seedArr = {seed}; + final short seedHash = (short)(hash(seedArr, 0L)[0] & 0xFFFFL); + if (seedHash == 0) { + throw new SketchesArgumentException( + "The given seed: " + seed + " produced a seedHash of zero. " + + "You must choose a different seed."); + } + return seedHash; + } + + /** + * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException. + * @param seedHashA the seedHash A + * @param seedHashB the seedHash B + * @return seedHashA if they are equal + */ + public static short checkSeedHashes(final short seedHashA, final short seedHashB) { + if (seedHashA != seedHashB) { + throw new SketchesArgumentException( + "Incompatible Seed Hashes. " + Integer.toHexString(seedHashA & 0XFFFF) + + ", " + Integer.toHexString(seedHashB & 0XFFFF)); + } + return seedHashA; + } + } diff --git a/src/main/java/org/apache/datasketches/cpc/BitMatrix.java b/src/main/java/org/apache/datasketches/cpc/BitMatrix.java index 419640a4f..ecf27b70f 100644 --- a/src/main/java/org/apache/datasketches/cpc/BitMatrix.java +++ b/src/main/java/org/apache/datasketches/cpc/BitMatrix.java @@ -23,7 +23,7 @@ import java.util.Arrays; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Used only in test. @@ -38,7 +38,7 @@ class BitMatrix { private boolean numCouponsInvalid; //only used if we allowed merges BitMatrix(final int lgK) { - this(lgK, ThetaUtil.DEFAULT_UPDATE_SEED); + this(lgK, Util.DEFAULT_UPDATE_SEED); } BitMatrix(final int lgK, final long seed) { diff --git a/src/main/java/org/apache/datasketches/cpc/CompressedState.java b/src/main/java/org/apache/datasketches/cpc/CompressedState.java index 6ea6fde4b..b88a71812 100644 --- a/src/main/java/org/apache/datasketches/cpc/CompressedState.java +++ b/src/main/java/org/apache/datasketches/cpc/CompressedState.java @@ -47,9 +47,9 @@ import static org.apache.datasketches.cpc.PreambleUtil.putSparseHybridMerged; import static org.apache.datasketches.cpc.RuntimeAsserts.rtAssert; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * @author Lee Rhodes @@ -82,7 +82,7 @@ private CompressedState(final int lgK, final short seedHash) { } static CompressedState compress(final CpcSketch source) { - final short seedHash = ThetaUtil.computeSeedHash(source.seed); + final short seedHash = Util.computeSeedHash(source.seed); final CompressedState target = new CompressedState(source.lgK, seedHash); target.fiCol = source.fiCol; target.mergeFlag = source.mergeFlag; diff --git a/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java b/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java index 4f0a93352..f34054d48 100644 --- a/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java +++ b/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java @@ -31,9 +31,9 @@ import java.io.PrintWriter; import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * This code is used both by unit tests, for short running tests, @@ -222,7 +222,7 @@ private void doTrialsAtLgKAtN(final int lgK, final long n, final int totalTrials for (int trial = 0; trial < trialsPerWave; trial++) { final CompressedState state = compressedStates2[trial]; CpcSketch uncSk = null; - uncSk = CpcSketch.uncompress(state, ThetaUtil.DEFAULT_UPDATE_SEED); + uncSk = CpcSketch.uncompress(state, Util.DEFAULT_UPDATE_SEED); unCompressedSketches[trial] = uncSk; } diff --git a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java index 4ed89dd19..a87fd1448 100644 --- a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java +++ b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java @@ -34,9 +34,9 @@ import java.util.Arrays; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * This is a unique-counting sketch that implements the @@ -89,7 +89,7 @@ public final class CpcSketch { * Constructor with default log_base2 of k */ public CpcSketch() { - this(DEFAULT_LG_K, ThetaUtil.DEFAULT_UPDATE_SEED); + this(DEFAULT_LG_K, Util.DEFAULT_UPDATE_SEED); } /** @@ -97,7 +97,7 @@ public CpcSketch() { * @param lgK the given log_base2 of k */ public CpcSketch(final int lgK) { - this(lgK, ThetaUtil.DEFAULT_UPDATE_SEED); + this(lgK, Util.DEFAULT_UPDATE_SEED); } /** @@ -233,7 +233,7 @@ public double getUpperBound(final int kappa) { * @return the given Memory as a CpcSketch on the Java heap. */ public static CpcSketch heapify(final Memory mem) { - return heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(mem, Util.DEFAULT_UPDATE_SEED); } /** @@ -242,7 +242,7 @@ public static CpcSketch heapify(final Memory mem) { * @return the given byte array as a CpcSketch on the Java heap. */ public static CpcSketch heapify(final byte[] byteArray) { - return heapify(byteArray, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(byteArray, Util.DEFAULT_UPDATE_SEED); } /** @@ -662,7 +662,7 @@ else if (col < (sketch.windowOffset + 8)) { // track the 8 bits inside the windo //also used in test static CpcSketch uncompress(final CompressedState source, final long seed) { - ThetaUtil.checkSeedHashes(ThetaUtil.computeSeedHash(seed), source.seedHash); + Util.checkSeedHashes(Util.computeSeedHash(seed), source.seedHash); final CpcSketch sketch = new CpcSketch(source.lgK, seed); sketch.numCoupons = source.numCoupons; sketch.windowOffset = source.getWindowOffset(); @@ -723,7 +723,7 @@ public String toString() { */ public String toString(final boolean detail) { final int numPairs = (pairTable == null) ? 0 : pairTable.getNumPairs(); - final int seedHash = Short.toUnsignedInt(ThetaUtil.computeSeedHash(seed)); + final int seedHash = Short.toUnsignedInt(Util.computeSeedHash(seed)); final double errConst = mergeFlag ? log(2) : sqrt(log(2) / 2.0); final double rse = errConst / Math.sqrt(1 << lgK); final StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/org/apache/datasketches/cpc/CpcUnion.java b/src/main/java/org/apache/datasketches/cpc/CpcUnion.java index 4b944c00f..8aeb48bbf 100644 --- a/src/main/java/org/apache/datasketches/cpc/CpcUnion.java +++ b/src/main/java/org/apache/datasketches/cpc/CpcUnion.java @@ -27,7 +27,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; /* * The merging logic is somewhat involved, so it will be summarized here. @@ -102,7 +102,7 @@ public class CpcUnion { * Construct this unioning object with the default LgK and the default update seed. */ public CpcUnion() { - this(CpcSketch.DEFAULT_LG_K, ThetaUtil.DEFAULT_UPDATE_SEED); + this(CpcSketch.DEFAULT_LG_K, Util.DEFAULT_UPDATE_SEED); } /** @@ -110,7 +110,7 @@ public CpcUnion() { * @param lgK The given log2 of K. */ public CpcUnion(final int lgK) { - this(lgK, ThetaUtil.DEFAULT_UPDATE_SEED); + this(lgK, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/fdt2/FdtSketch.java b/src/main/java/org/apache/datasketches/fdt2/FdtSketch.java new file mode 100644 index 000000000..a4ecab83d --- /dev/null +++ b/src/main/java/org/apache/datasketches/fdt2/FdtSketch.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.fdt2; + +import java.lang.foreign.MemorySegment; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.tuple2.strings.ArrayOfStringsSketch; + +/** + * A Frequent Distinct Tuples sketch. + * + *

      Suppose our data is a stream of pairs {IP address, User ID} and we want to identify the + * IP addresses that have the most distinct User IDs. Or conversely, we would like to identify + * the User IDs that have the most distinct IP addresses. This is a common challenge in the + * analysis of big data and the FDT sketch helps solve this problem using probabilistic techniques. + * + *

      More generally, given a multiset of tuples with dimensions {d1,d2, d3, ..., dN}, + * and a primary subset of dimensions M < N, our task is to identify the combinations of + * M subset dimensions that have the most frequent number of distinct combinations of + * the N-M non-primary dimensions. + * + *

      Please refer to the web page + * + * https://datasketches.apache.org/docs/Frequency/FrequentDistinctTuplesSketch.html for a more + * complete discussion about this sketch. + * + * @author Lee Rhodes + */ +public final class FdtSketch extends ArrayOfStringsSketch { + + /** + * Create new instance of Frequent Distinct Tuples sketch with the given + * Log-base2 of required nominal entries. + * @param lgK Log-base2 of required nominal entries. + */ + public FdtSketch(final int lgK) { + super(lgK); + } + + /** + * Used by deserialization. + * @param seg the image of a FdtSketch + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. + * This capability will be removed in a future release. + * Heapifying a CompactSketch is not deprecated. + */ + @Deprecated + FdtSketch(final MemorySegment seg) { + super(seg); + } + + /** + * Create a new instance of Frequent Distinct Tuples sketch with a size determined by the given + * threshold and rse. + * @param threshold : the fraction, between zero and 1.0, of the total distinct stream length + * that defines a "Frequent" (or heavy) item. + * @param rse the maximum Relative Standard Error for the estimate of the distinct population of a + * reported tuple (selected with a primary key) at the threshold. + */ + public FdtSketch(final double threshold, final double rse) { + super(computeLgK(threshold, rse)); + } + + /** + * Copy Constructor + * @param sketch the sketch to copy + */ + public FdtSketch(final FdtSketch sketch) { + super(sketch); + } + + /** + * @return a deep copy of this sketch + */ + @Override + public FdtSketch copy() { + return new FdtSketch(this); + } + + /** + * Update the sketch with the given string array tuple. + * @param tuple the given string array tuple. + */ + public void update(final String[] tuple) { + super.update(tuple, tuple); + } + + /** + * Returns an ordered List of Groups of the most frequent distinct population of subset tuples + * represented by the count of entries of each group. + * @param priKeyIndices these indices define the dimensions used for the Primary Keys. + * @param limit the maximum number of groups to return. If this value is ≤ 0, all + * groups will be returned. + * @param numStdDev the number of standard deviations for the upper and lower error bounds, + * this value is an integer and must be one of 1, 2, or 3. + * See Number of Standard Deviations + * @param sep the separator character + * @return an ordered List of Groups of the most frequent distinct population of subset tuples + * represented by the count of entries of each group. + */ + public List getResult(final int[] priKeyIndices, final int limit, final int numStdDev, + final char sep) { + final PostProcessor proc = new PostProcessor(this, new Group(), sep); + return proc.getGroupList(priKeyIndices, numStdDev, limit); + } + + /** + * Returns the PostProcessor that enables multiple queries against the sketch results. + * This assumes the default Group and the default separator character '|'. + * @return the PostProcessor + */ + public PostProcessor getPostProcessor() { + return getPostProcessor(new Group(), '|'); + } + + /** + * Returns the PostProcessor that enables multiple queries against the sketch results. + * @param group the Group class to use during post processing. + * @param sep the separator character. + * @return the PostProcessor + */ + public PostProcessor getPostProcessor(final Group group, final char sep) { + return new PostProcessor(this, group, sep); + } + + // Restricted + + /** + * Computes LgK given the threshold and RSE. + * @param threshold the fraction, between zero and 1.0, of the total stream length that defines + * a "Frequent" (or heavy) tuple. + * @param rse the maximum Relative Standard Error for the estimate of the distinct population of a + * reported tuple (selected with a primary key) at the threshold. + * @return LgK + */ + static int computeLgK(final double threshold, final double rse) { + final double v = Math.ceil(1.0 / (threshold * rse * rse)); + final int lgK = (int) Math.ceil(Math.log(v) / Math.log(2)); + if (lgK > ThetaUtil.MAX_LG_NOM_LONGS) { + throw new SketchesArgumentException("Requested Sketch (LgK = " + lgK + " > 2^26), " + + "either increase the threshold, the rse or both."); + } + return lgK; + } + +} diff --git a/src/main/java/org/apache/datasketches/fdt2/Group.java b/src/main/java/org/apache/datasketches/fdt2/Group.java new file mode 100644 index 000000000..7ddded1db --- /dev/null +++ b/src/main/java/org/apache/datasketches/fdt2/Group.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.fdt2; + +/** + * Defines a Group from a Frequent Distinct Tuple query. This class is called internally during + * post processing and is not intended to be called by the user. + * @author Lee Rhodes + */ +public class Group implements Comparable { + private int count = 0; + private double est = 0; + private double ub = 0; + private double lb = 0; + private double fraction = 0; + private double rse = 0; + private String priKey = null; + private final static String fmt = + "%,12d" + "%,15.2f" + "%,15.2f" + "%,15.2f" + "%12.6f" + "%12.6f" + " %s"; + private final static String hfmt = + "%12s" + "%15s" + "%15s" + "%15s" + "%12s" + "%12s" + " %s"; + + /** + * Construct an empty Group + */ + public Group() { } + + /** + * Specifies the parameters to be listed as columns + * @param priKey the primary key of the FDT sketch + * @param count the number of retained rows associated with this group + * @param estimate the estimate of the original population associated with this group + * @param ub the upper bound of the estimate + * @param lb the lower bound of the estimate + * @param fraction the fraction of all retained rows of the sketch associated with this group + * @param rse the estimated Relative Standard Error for this group. + * @return return this + */ + public Group init(final String priKey, final int count, final double estimate, final double ub, + final double lb, final double fraction, final double rse) { + this.count = count; + est = estimate; + this.ub = ub; + this.lb = lb; + this.fraction = fraction; + this.rse = rse; + this.priKey = priKey; + return this; + } + + /** + * Gets the primary key of type String + * @return priKey of type String + */ + public String getPrimaryKey() { return priKey; } + + /** + * Returns the count + * @return the count + */ + public int getCount() { return count; } + + /** + * Returns the estimate + * @return the estimate + */ + public double getEstimate() { return est; } + + /** + * Returns the upper bound + * @return the upper bound + */ + public double getUpperBound() { return ub; } + + /** + * Returns the lower bound + * @return the lower bound + */ + public double getLowerBound() { return lb; } + + /** + * Returns the fraction for this group + * @return the fraction for this group + */ + public double getFraction() { return fraction; } + + /** + * Returns the RSE + * @return the RSE + */ + public double getRse() { return rse; } + + /** + * Returns the descriptive header + * @return the descriptive header + */ + public String getHeader() { + return String.format(hfmt,"Count", "Est", "UB", "LB", "Fraction", "RSE", "PriKey"); + } + + @Override + public String toString() { + return String.format(fmt, count, est, ub, lb, fraction, rse, priKey); + } + + @Override + public int compareTo(final Group that) { + return that.count - count; //decreasing + } + + @Override + public boolean equals(final Object that) { + if (this == that) { return true; } + if (!(that instanceof Group)) { return false; } + return ((Group)that).count == count; + } + + @Override + public int hashCode() { + return Integer.MAX_VALUE - count; //MAX_VALUE is a Double Mersenne Prime = 2^31 - 1 = M_M_5 + } + +} diff --git a/src/main/java/org/apache/datasketches/fdt2/PostProcessor.java b/src/main/java/org/apache/datasketches/fdt2/PostProcessor.java new file mode 100644 index 000000000..cde4eaa21 --- /dev/null +++ b/src/main/java/org/apache/datasketches/fdt2/PostProcessor.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.fdt2; + +import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsert; +import static org.apache.datasketches.tuple2.Util.stringHash; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.strings.ArrayOfStringsSummary; + +/** + * This processes the contents of a FDT sketch to extract the + * primary keys with the most frequent unique combinations of the non-primary dimensions. + * The source sketch is not modified. + * + * @author Lee Rhodes + */ +public class PostProcessor { + private final FdtSketch sketch; + private final char sep; + private int groupCount; + @SuppressWarnings("unused") + private Group group; //uninitialized + + //simple hash-map + private boolean mapValid; + private final int mapArrSize; + private final long[] hashArr; + private final String[] priKeyArr; + private final int[] counterArr; + + /** + * Construct with a populated FdtSketch + * @param sketch the given sketch to query. + * @param group the Group + * @param sep the separator character + */ + public PostProcessor(final FdtSketch sketch, final Group group, final char sep) { + Objects.requireNonNull(sketch, "sketch must be non-null"); + Objects.requireNonNull(group, "group must be non-null"); + this.sketch = sketch.copy(); + this.sep = sep; + final int numEntries = sketch.getRetainedEntries(); + mapArrSize = ceilingPowerOf2((int)(numEntries / 0.75)); + hashArr = new long[mapArrSize]; + priKeyArr = new String[mapArrSize]; + counterArr = new int[mapArrSize]; + mapValid = false; + this.group = group; + } + + /** + * Returns the number of groups in the final sketch. + * @return the number of groups in the final sketch. + */ + public int getGroupCount() { + return groupCount; + } + + /** + * Return the most frequent Groups associated with Primary Keys based on the size of the groups. + * @param priKeyIndices the indices of the primary dimensions + * @param numStdDev the number of standard deviations for the error bounds, this value is an + * integer and must be one of 1, 2, or 3. + * See Number of Standard Deviations + * @param limit the maximum number of rows to return. If ≤ 0, all rows will be returned. + * @return the most frequent Groups associated with Primary Keys based on the size of the groups. + */ + public List getGroupList(final int[] priKeyIndices, final int numStdDev, + final int limit) { + //allows subsequent queries with different priKeyIndices without rebuilding the map + if (!mapValid) { populateMap(priKeyIndices); } + return populateList(numStdDev, limit); + } + + /** + * Scan each entry in the sketch. Count the number of duplicate occurrences of each + * primary key in a hash map. + * @param priKeyIndices identifies the primary key indices + */ + private void populateMap(final int[] priKeyIndices) { + final TupleSketchIterator it = sketch.iterator(); + Arrays.fill(hashArr, 0L); + Arrays.fill(priKeyArr, null); + Arrays.fill(counterArr, 0); + groupCount = 0; + final int lgMapArrSize = Integer.numberOfTrailingZeros(mapArrSize); + + while (it.next()) { + //getSummary() is not a copy, but getValue() is + final String[] arr = it.getSummary().getValue(); + final String priKey = getPrimaryKey(arr, priKeyIndices, sep); + final long hash = stringHash(priKey); + final int index = hashSearchOrInsert(hashArr, lgMapArrSize, hash); + if (index < 0) { //was empty, hash inserted + final int idx = -(index + 1); //actual index + counterArr[idx] = 1; + groupCount++; + priKeyArr[idx] = priKey; + } else { //found, duplicate + counterArr[index]++; //increment + } + } + mapValid = true; + } + + /** + * Create the list of groups along with the error statistics + * @param numStdDev number of standard deviations + * @param limit the maximum size of the list to return + * @return the list of groups along with the error statistics + */ + private List populateList(final int numStdDev, final int limit) { + final List list = new ArrayList<>(); + for (int i = 0; i < mapArrSize; i++) { + if (hashArr[i] != 0) { + final String priKey = priKeyArr[i]; + final int count = counterArr[i]; + final double est = sketch.getEstimate(count); + final double ub = sketch.getUpperBound(numStdDev, count); + final double lb = sketch.getLowerBound(numStdDev, count); + final double thresh = (double) count / sketch.getRetainedEntries(); + final double rse = (sketch.getUpperBound(1, count) / est) - 1.0; + final Group gp = new Group(); + gp.init(priKey, count, est, ub, lb, thresh, rse); + list.add(gp); + } + } + list.sort(null); //Comparable implemented in Group + final int totLen = list.size(); + + final List returnList; + if ((limit > 0) && (limit < totLen)) { + returnList = list.subList(0, limit); + } else { + returnList = list; + } + return returnList; + } + + /** + * Extract simple string Primary Key defined by the priKeyIndices from the given tuple. + * @param tuple the given tuple containing the Primary Key + * @param priKeyIndices the indices indicating the ordering and selection of dimensions defining + * the Primary Key + * @param sep the separator character + * @return a simple string Primary Key defined by the priKeyIndices from the given tuple. + */ + //also used by test + private static String getPrimaryKey(final String[] tuple, final int[] priKeyIndices, + final char sep) { + assert priKeyIndices.length < tuple.length; + final StringBuilder sb = new StringBuilder(); + final int keys = priKeyIndices.length; + for (int i = 0; i < keys; i++) { + final int idx = priKeyIndices[i]; + sb.append(tuple[idx]); + if ((i + 1) < keys) { sb.append(sep); } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/fdt2/package-info.java b/src/main/java/org/apache/datasketches/fdt2/package-info.java new file mode 100644 index 000000000..85a9d7028 --- /dev/null +++ b/src/main/java/org/apache/datasketches/fdt2/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * Frequent Distinct Tuples Sketch + */ +package org.apache.datasketches.fdt2; diff --git a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java index 355724e2e..67e946b01 100644 --- a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java +++ b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java @@ -25,7 +25,7 @@ import java.lang.reflect.Array; -import org.apache.datasketches.thetacommon.QuickSelect; +import org.apache.datasketches.common.QuickSelect; /** * Implements a linear-probing based hash map of (key, value) pairs and is distinguished by a diff --git a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java index dd33589d4..358f85bce 100644 --- a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java +++ b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java @@ -24,8 +24,8 @@ import static org.apache.datasketches.common.Util.INVERSE_GOLDEN; import static org.apache.datasketches.frequencies.Util.hash; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.QuickSelect; /** * Implements a linear-probing based hash map of (key, value) pairs and is distinguished by a diff --git a/src/main/java/org/apache/datasketches/hash/XxHash.java b/src/main/java/org/apache/datasketches/hash/XxHash.java index e0d6947d5..2185471a8 100644 --- a/src/main/java/org/apache/datasketches/hash/XxHash.java +++ b/src/main/java/org/apache/datasketches/hash/XxHash.java @@ -28,6 +28,8 @@ import static org.apache.datasketches.memory.internal.XxHash64.hashLongs; import static org.apache.datasketches.memory.internal.XxHash64.hashShorts; +import org.apache.datasketches.memory.internal.XxHash64; + /** * The XxHash is a fast, non-cryptographic, 64-bit hash function that has * excellent avalanche and 2-way bit independence properties. @@ -196,7 +198,7 @@ public static long hashString( final int offsetChars, final int lengthChars, final long seed) { - return org.apache.datasketches.memory.internal.XxHash64.hashString(str, offsetChars, lengthChars, seed); + return XxHash64.hashString(str, offsetChars, lengthChars, seed); } } diff --git a/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java b/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java index c602948a8..ad21fc5ae 100644 --- a/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java +++ b/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java @@ -28,8 +28,8 @@ import java.nio.ByteBuffer; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Although this class is package-private, it provides a single place to define and document @@ -299,7 +299,7 @@ public abstract String toString(boolean summary, boolean detail, boolean auxDeta */ public void update(final long datum) { final long[] data = { datum }; - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -314,7 +314,7 @@ public void update(final long datum) { public void update(final double datum) { final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN & +/- infinity forms - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -334,7 +334,7 @@ public void update(final double datum) { public void update(final String datum) { if ((datum == null) || datum.isEmpty()) { return; } final byte[] data = datum.getBytes(UTF_8); - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -351,7 +351,7 @@ public void update(final String datum) { */ public void update(final ByteBuffer data) { if ((data == null) || (data.remaining() == 0)) { return; } - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -362,7 +362,7 @@ public void update(final ByteBuffer data) { */ public void update(final byte[] data) { if ((data == null) || (data.length == 0)) { return; } - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -376,7 +376,7 @@ public void update(final byte[] data) { */ public void update(final char[] data) { if ((data == null) || (data.length == 0)) { return; } - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -387,7 +387,7 @@ public void update(final char[] data) { */ public void update(final int[] data) { if ((data == null) || (data.length == 0)) { return; } - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } /** @@ -398,7 +398,7 @@ public void update(final int[] data) { */ public void update(final long[] data) { if ((data == null) || (data.length == 0)) { return; } - couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED))); + couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED))); } private static final int coupon(final long[] hash) { diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java index cc076fd85..d3075bb13 100644 --- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java @@ -29,6 +29,7 @@ import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -50,7 +51,7 @@ final class AnotBimpl extends AnotB { * @param seed See seed */ AnotBimpl(final long seed) { - this(ThetaUtil.computeSeedHash(seed)); + this(Util.computeSeedHash(seed)); } /** @@ -74,7 +75,7 @@ public void setA(final Sketch skA) { return; } //skA is not empty - ThetaUtil.checkSeedHashes(seedHash_, skA.getSeedHash()); + Util.checkSeedHashes(seedHash_, skA.getSeedHash()); //process A hashArr_ = getHashArrA(skA); @@ -87,7 +88,7 @@ public void setA(final Sketch skA) { public void notB(final Sketch skB) { if (empty_ || skB == null || skB.isEmpty()) { return; } //local and skB is not empty - ThetaUtil.checkSeedHashes(seedHash_, skB.getSeedHash()); + Util.checkSeedHashes(seedHash_, skB.getSeedHash()); thetaLong_ = Math.min(thetaLong_, skB.getThetaLong()); @@ -123,12 +124,12 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dst if (skA.isEmpty()) { return skA.compact(dstOrdered, dstMem); } //A is not Empty - ThetaUtil.checkSeedHashes(skA.getSeedHash(), seedHash_); + Util.checkSeedHashes(skA.getSeedHash(), seedHash_); if (skB.isEmpty()) { return skA.compact(dstOrdered, dstMem); } - ThetaUtil.checkSeedHashes(skB.getSeedHash(), seedHash_); + Util.checkSeedHashes(skB.getSeedHash(), seedHash_); //Both skA & skB are not empty //process A diff --git a/src/main/java/org/apache/datasketches/theta/CompactSketch.java b/src/main/java/org/apache/datasketches/theta/CompactSketch.java index 630b2e135..4079f22f2 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/CompactSketch.java @@ -41,9 +41,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * The parent class of all the CompactSketches. CompactSketches are never created directly. @@ -76,7 +76,7 @@ public abstract class CompactSketch extends Sketch { * @return a CompactSketch on the heap. */ public static CompactSketch heapify(final Memory srcMem) { - return heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return heapify(srcMem, Util.DEFAULT_UPDATE_SEED, false); } /** @@ -117,7 +117,7 @@ private static CompactSketch heapify(final Memory srcMem, final long seed, final return CompactOperations.memoryToCompact(srcMem, srcOrdered, null); } //not SerVer 3, assume compact stored form - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 1) { return ForwardCompatibility.heapify1to3(srcMem, seedHash); } @@ -151,7 +151,7 @@ private static CompactSketch heapify(final Memory srcMem, final long seed, final * @return a CompactSketch backed by the given Memory except as above. */ public static CompactSketch wrap(final Memory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return wrap(srcMem, Util.DEFAULT_UPDATE_SEED, false); } /** @@ -189,7 +189,7 @@ private static CompactSketch wrap(final Memory srcMem, final long seed, final bo if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { return DirectCompactCompressedSketch.wrapInstance(srcMem, @@ -252,7 +252,7 @@ else if (serVer == 2) { * @return a CompactSketch backed by the given Memory except as above. */ public static CompactSketch wrap(final byte[] bytes) { - return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return wrap(bytes, Util.DEFAULT_UPDATE_SEED, false); } /** @@ -290,7 +290,7 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash); } else if (serVer == 3) { diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java index 64c0fafd4..60c38afb2 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java @@ -26,9 +26,9 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4; import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. @@ -57,7 +57,7 @@ class DirectCompactCompressedSketch extends DirectCompactSketch { * @return this sketch */ static DirectCompactCompressedSketch wrapInstance(final Memory srcMem, final short seedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); + Util.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); return new DirectCompactCompressedSketch(srcMem); } diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java index 15b03311b..e2f3efc86 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java @@ -29,9 +29,9 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered @@ -65,7 +65,7 @@ class DirectCompactSketch extends CompactSketch { * @return this sketch */ static DirectCompactSketch wrapInstance(final Memory srcMem, final short seedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); + Util.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); return new DirectCompactSketch(srcMem); } diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index af073a5ee..cd18a093d 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -56,6 +56,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -156,7 +157,7 @@ private DirectQuickSelectSketch( insertLgArrLongs(dstMem, lgArrLongs); //byte 4 //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4 insertFlags(dstMem, EMPTY_FLAG_MASK); //byte 5 - insertSeedHash(dstMem, ThetaUtil.computeSeedHash(seed)); //bytes 6,7 + insertSeedHash(dstMem, Util.computeSeedHash(seed)); //bytes 6,7 insertCurCount(dstMem, 0); //bytes 8-11 insertP(dstMem, p); //bytes 12-15 final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java index b9d4dc9e1..cf7249ed8 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java @@ -21,6 +21,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; @@ -35,7 +36,6 @@ import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountIncrementedResized; import static org.apache.datasketches.theta.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java index 49734a9e8..b17af35db 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java @@ -36,6 +36,7 @@ import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -92,7 +93,7 @@ long getSeed() { @Override short getSeedHash() { - return ThetaUtil.computeSeedHash(getSeed()); + return Util.computeSeedHash(getSeed()); } //Used by HeapAlphaSketch and HeapQuickSelectSketch / Theta UpdateSketch diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index 772480fea..ce63bd03b 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -57,6 +57,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -100,17 +101,17 @@ protected IntersectionImpl(final WritableMemory wmem, final long seed, final boo if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking checkMinSizeMemory(wmem); maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wmem) : 0; //Only Off Heap - seedHash_ = ThetaUtil.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); wmem_.putShort(SEED_HASH_SHORT, seedHash_); } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed seedHash_ = wmem_.getShort(SEED_HASH_SHORT); - ThetaUtil.checkSeedHashes(seedHash_, ThetaUtil.computeSeedHash(seed)); //check for seed hash conflict + Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); //check for seed hash conflict maxLgArrLongs_ = 0; } } else { //compute & store seedHash wmem_ = null; maxLgArrLongs_ = 0; - seedHash_ = ThetaUtil.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); } } @@ -237,7 +238,7 @@ public void intersect(final Sketch sketchIn) { resetToEmpty(); return; } - ThetaUtil.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); + Util.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); //Set minTheta thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule empty_ = false; diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index ec0bc1268..de8d8dca9 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -30,7 +30,6 @@ import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; //@formatter:off @@ -514,7 +513,7 @@ static int getAndCheckPreLongs(final Memory mem) { static final short checkMemorySeedHash(final Memory mem, final long seed) { final short seedHashMem = (short) extractSeedHash(mem); - ThetaUtil.checkSeedHashes(seedHashMem, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash + Util.checkSeedHashes(seedHashMem, Util.computeSeedHash(seed)); //throws if bad seedHash return seedHashMem; } diff --git a/src/main/java/org/apache/datasketches/theta/Rebuilder.java b/src/main/java/org/apache/datasketches/theta/Rebuilder.java index b6e3de342..c2e78edab 100644 --- a/src/main/java/org/apache/datasketches/theta/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta/Rebuilder.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs; @@ -26,7 +27,6 @@ import static org.apache.datasketches.theta.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta.PreambleUtil.insertLgArrLongs; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java index c198dceb7..54797dc88 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java @@ -27,9 +27,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * The parent API for all Set Operations @@ -67,7 +67,7 @@ public static final SetOperationBuilder builder() { * @return a Heap-based SetOperation from the given Memory */ public static SetOperation heapify(final Memory srcMem) { - return heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -115,7 +115,7 @@ public static SetOperation heapify(final Memory srcMem, final long expectedSeed) * @return a SetOperation backed by the given Memory */ public static SetOperation wrap(final Memory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -164,7 +164,7 @@ public static SetOperation wrap(final Memory srcMem, final long expectedSeed) { * @return a SetOperation backed by the given Memory */ public static SetOperation wrap(final WritableMemory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcMem, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java index 4a35cf67d..a978e3c7b 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java @@ -26,6 +26,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -48,7 +49,7 @@ public class SetOperationBuilder { *

        *
      • Max Nominal Entries (max K): * {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
      • - *
      • Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
      • + *
      • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
      • *
      • {@link ResizeFactor#X8}
      • *
      • Input Sampling Probability: 1.0
      • *
      • Memory: null
      • @@ -56,7 +57,7 @@ public class SetOperationBuilder { */ public SetOperationBuilder() { bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); - bSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + bSeed = Util.DEFAULT_UPDATE_SEED; bP = (float) 1.0; bRF = ResizeFactor.X8; bMemReqSvr = new DefaultMemoryRequestServer(); diff --git a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java index 3cfc13b1e..5bef18392 100644 --- a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java @@ -31,9 +31,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * A CompactSketch that holds only one item hash. @@ -41,7 +41,7 @@ * @author Lee Rhodes */ final class SingleItemSketch extends CompactSketch { - private static final long DEFAULT_SEED_HASH = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED) & 0xFFFFL; + private static final long DEFAULT_SEED_HASH = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED) & 0xFFFFL; // For backward compatibility, a candidate pre0_ long must have: // Flags (byte 5): Ordered, Compact, NOT Empty, Read Only, LittleEndian = 11010 = 0x1A. @@ -62,7 +62,7 @@ private SingleItemSketch(final long hash) { //All checking & hashing has been done, given the relevant seed SingleItemSketch(final long hash, final long seed) { - final long seedHash = ThetaUtil.computeSeedHash(seed) & 0xFFFFL; + final long seedHash = Util.computeSeedHash(seed) & 0xFFFFL; pre0_ = (seedHash << 48) | PRE0_LO6_SI; hash_ = hash; } @@ -82,7 +82,7 @@ private SingleItemSketch(final long hash) { * @return a SingleItemSketch */ //does not override Sketch static SingleItemSketch heapify(final Memory srcMem, final short expectedSeedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcMem), expectedSeedHash); + Util.checkSeedHashes((short) extractSeedHash(srcMem), expectedSeedHash); final boolean singleItem = otherCheckForSingleItem(srcMem); if (singleItem) { return new SingleItemSketch(srcMem.getLong(8), expectedSeedHash); } throw new SketchesArgumentException("Input Memory is not a SingleItemSketch."); @@ -108,7 +108,7 @@ public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstM */ static SingleItemSketch create(final long datum) { final long[] data = { datum }; - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -124,7 +124,7 @@ static SingleItemSketch create(final long datum) { static SingleItemSketch create(final double datum) { final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -142,7 +142,7 @@ static SingleItemSketch create(final double datum) { static SingleItemSketch create(final String datum) { if ((datum == null) || datum.isEmpty()) { return null; } final byte[] data = datum.getBytes(UTF_8); - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -154,7 +154,7 @@ static SingleItemSketch create(final String datum) { */ static SingleItemSketch create(final byte[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -169,7 +169,7 @@ static SingleItemSketch create(final byte[] data) { */ static SingleItemSketch create(final char[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -181,7 +181,7 @@ static SingleItemSketch create(final char[] data) { */ static SingleItemSketch create(final int[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -193,7 +193,7 @@ static SingleItemSketch create(final int[] data) { */ static SingleItemSketch create(final long[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } //Updates with a user specified seed diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index cb202a189..8370b295c 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -34,6 +34,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.BinomialBoundsN; @@ -74,7 +75,7 @@ public static Sketch heapify(final Memory srcMem) { if (family == Family.COMPACT) { return CompactSketch.heapify(srcMem); } - return heapifyUpdateFromMemory(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUpdateFromMemory(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -134,7 +135,7 @@ public static Sketch wrap(final Memory srcMem) { final Family family = Family.idToFamily(familyID); if (family == Family.QUICKSELECT) { if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return DirectQuickSelectSketchR.readOnlyWrap(srcMem, Util.DEFAULT_UPDATE_SEED); } else { throw new SketchesArgumentException( "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index 2e7fa0915..359a3eb87 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -29,9 +29,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * This class brings together the common sketch and set operation creation methods and @@ -302,7 +302,7 @@ public static Intersection wrapIntersection(final WritableMemory srcMem) { * @return {@link SetOperation SetOperation} */ public static SetOperation wrapSetOperation(final Memory srcMem) { - return wrapSetOperation(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapSetOperation(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -324,7 +324,7 @@ public static SetOperation wrapSetOperation(final Memory srcMem, final long expe * @return {@link SetOperation SetOperation} */ public static SetOperation wrapSetOperation(final WritableMemory srcMem) { - return wrapSetOperation(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapSetOperation(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -384,7 +384,7 @@ public static Union wrapUnion(final WritableMemory srcMem) { * @return {@link UpdateSketch UpdateSketch} */ public static UpdateSketch wrapUpdateSketch(final WritableMemory srcMem) { - return wrapUpdateSketch(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUpdateSketch(srcMem, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 8c5b2f8f0..5cbae1dd3 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -20,22 +20,22 @@ package org.apache.datasketches.theta; import static java.lang.Math.min; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta.PreambleUtil.UNION_THETA_LONG; import static org.apache.datasketches.theta.PreambleUtil.clearEmpty; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractUnionThetaLong; import static org.apache.datasketches.theta.PreambleUtil.insertUnionThetaLong; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; import java.nio.ByteBuffer; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Shared code for the HeapUnion and DirectUnion implementations. @@ -61,7 +61,7 @@ final class UnionImpl extends Union { private UnionImpl(final UpdateSketch gadget, final long seed) { gadget_ = gadget; - expectedSeedHash_ = ThetaUtil.computeSeedHash(seed); + expectedSeedHash_ = Util.computeSeedHash(seed); } /** @@ -307,7 +307,7 @@ public void union(final Sketch sketchIn) { return; } //sketchIn is valid and not empty - ThetaUtil.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); + Util.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); if (sketchIn instanceof SingleItemSketch) { gadget_.hashUpdate(sketchIn.getCache()[0]); return; diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java index cb6854b02..011e4db9c 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java @@ -48,6 +48,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -67,7 +68,7 @@ public abstract class UpdateSketch extends Sketch { * Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have * been explicitly stored as direct objects can be wrapped. This method assumes the - * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. * Default Update Seed. * @param srcMem an image of a Sketch where the image seed hash matches the default seed hash. * It must have a size of at least 24 bytes. @@ -75,7 +76,7 @@ public abstract class UpdateSketch extends Sketch { * @return a Sketch backed by the given Memory */ public static UpdateSketch wrap(final WritableMemory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -113,13 +114,13 @@ public static UpdateSketch wrap(final WritableMemory srcMem, final long expected /** * Instantiates an on-heap UpdateSketch from Memory. This method assumes the - * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. * @param srcMem See Memory * It must have a size of at least 24 bytes. * @return an UpdateSketch */ public static UpdateSketch heapify(final Memory srcMem) { - return heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -428,7 +429,7 @@ static void checkMemIntegrity(final Memory srcMem, final long expectedSeed, fina //Check seed hashes final short seedHash = checkMemorySeedHash(srcMem, expectedSeed); //byte 6,7 - ThetaUtil.checkSeedHashes(seedHash, ThetaUtil.computeSeedHash(expectedSeed)); + Util.checkSeedHashes(seedHash, Util.computeSeedHash(expectedSeed)); //Check mem capacity, lgArrLongs final long curCapBytes = srcMem.getCapacity(); diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java index eec91e881..789a35bc7 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java @@ -28,6 +28,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -57,7 +58,7 @@ public class UpdateSketchBuilder { * Constructor for building a new UpdateSketch. The default configuration is *
          *
        • Nominal Entries: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
        • - *
        • Seed: {@value org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}
        • + *
        • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
        • *
        • Input Sampling Probability: 1.0
        • *
        • Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
        • *
        • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. @@ -76,7 +77,7 @@ public class UpdateSketchBuilder { */ public UpdateSketchBuilder() { bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); - bSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + bSeed = Util.DEFAULT_UPDATE_SEED; bP = (float) 1.0; bRF = ResizeFactor.X8; bFam = Family.QUICKSELECT; diff --git a/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java index 170c98cad..c9c38ca61 100644 --- a/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java @@ -25,7 +25,7 @@ import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE; import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item. @@ -50,7 +50,7 @@ class WrappedCompactCompressedSketch extends WrappedCompactSketch { * @return this sketch */ static WrappedCompactCompressedSketch wrapInstance(final byte[] bytes, final short seedHash) { - ThetaUtil.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); + Util.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); return new WrappedCompactCompressedSketch(bytes); } diff --git a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java index 519857d21..633bcecce 100644 --- a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java @@ -33,9 +33,9 @@ import java.util.Arrays; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. @@ -61,7 +61,7 @@ class WrappedCompactSketch extends CompactSketch { * @return this sketch */ static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHash) { - ThetaUtil.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); + Util.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); return new WrappedCompactSketch(bytes); } diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java index 3374c5992..85db27af6 100644 --- a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java @@ -30,6 +30,7 @@ import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -50,7 +51,7 @@ final class AnotBimpl extends AnotB { * @param seed See seed */ AnotBimpl(final long seed) { - this(ThetaUtil.computeSeedHash(seed)); + this(Util.computeSeedHash(seed)); } /** @@ -74,7 +75,7 @@ public void setA(final Sketch skA) { return; } //skA is not empty - ThetaUtil.checkSeedHashes(seedHash_, skA.getSeedHash()); + Util.checkSeedHashes(seedHash_, skA.getSeedHash()); //process A hashArr_ = getHashArrA(skA); @@ -87,7 +88,7 @@ public void setA(final Sketch skA) { public void notB(final Sketch skB) { if (empty_ || skB == null || skB.isEmpty()) { return; } //local and skB is not empty - ThetaUtil.checkSeedHashes(seedHash_, skB.getSeedHash()); + Util.checkSeedHashes(seedHash_, skB.getSeedHash()); thetaLong_ = Math.min(thetaLong_, skB.getThetaLong()); @@ -123,12 +124,12 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dst if (skA.isEmpty()) { return skA.compact(dstOrdered, dstSeg); } //A is not Empty - ThetaUtil.checkSeedHashes(skA.getSeedHash(), seedHash_); + Util.checkSeedHashes(skA.getSeedHash(), seedHash_); if (skB.isEmpty()) { return skA.compact(dstOrdered, dstSeg); } - ThetaUtil.checkSeedHashes(skB.getSeedHash(), seedHash_); + Util.checkSeedHashes(skB.getSeedHash(), seedHash_); //Both skA & skB are not empty //process A diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java index 594b5076f..38fdd29cc 100644 --- a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java @@ -46,7 +46,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * The parent class of all the CompactSketches. CompactSketches are never created directly. @@ -78,7 +78,7 @@ public abstract class CompactSketch extends Sketch { * @return a CompactSketch on the heap. */ public static CompactSketch heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED, false); } /** @@ -118,7 +118,7 @@ private static CompactSketch heapify(final MemorySegment srcSeg, final long seed return CompactOperations.segmentToCompact(srcSeg, srcOrdered, null); } //not SerVer 3, assume compact stored form - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 1) { return ForwardCompatibility.heapify1to3(srcSeg, seedHash); } @@ -151,7 +151,7 @@ private static CompactSketch heapify(final MemorySegment srcSeg, final long seed * @return a CompactSketch backed by the given MemorySegment except as above. */ public static CompactSketch wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED, false); } /** @@ -188,7 +188,7 @@ private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, f if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { return DirectCompactCompressedSketch.wrapInstance(srcSeg, @@ -250,7 +250,7 @@ else if (serVer == 2) { * @return a CompactSketch backed by the given MemorySegment except as above. */ public static CompactSketch wrap(final byte[] bytes) { - return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return wrap(bytes, Util.DEFAULT_UPDATE_SEED, false); } /** @@ -287,7 +287,7 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash); } else if (serVer == 3) { diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java index 521334c22..288f5ba42 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java @@ -29,7 +29,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. @@ -58,7 +58,7 @@ final class DirectCompactCompressedSketch extends DirectCompactSketch { * @return this sketch */ static DirectCompactCompressedSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); return new DirectCompactCompressedSketch(srcSeg); } diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java index f6eda994f..689166a06 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java @@ -34,7 +34,6 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; /** * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered @@ -68,7 +67,7 @@ class DirectCompactSketch extends CompactSketch { * @return this sketch */ static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); return new DirectCompactSketch(srcSeg); } diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java index f4a9e72f6..29e60a180 100644 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java @@ -62,6 +62,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.HashOperations; import org.apache.datasketches.thetacommon2.ThetaUtil; @@ -152,7 +153,7 @@ private DirectQuickSelectSketch( insertLgArrLongs(dstSeg, lgArrLongs); //byte 4 //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4 insertFlags(dstSeg, EMPTY_FLAG_MASK); //byte 5 - insertSeedHash(dstSeg, ThetaUtil.computeSeedHash(seed)); //bytes 6,7 + insertSeedHash(dstSeg, Util.computeSeedHash(seed)); //bytes 6,7 insertCurCount(dstSeg, 0); //bytes 8-11 insertP(dstSeg, p); //bytes 12-15 final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java index abba02515..1da4521b0 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java @@ -22,6 +22,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; @@ -36,7 +37,6 @@ import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; -import static org.apache.datasketches.thetacommon2.QuickSelect.selectExcludingZeros; import java.lang.foreign.MemorySegment; diff --git a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java index b50c25af0..5e2840ac6 100644 --- a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java @@ -39,6 +39,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -94,7 +95,7 @@ long getSeed() { @Override short getSeedHash() { - return ThetaUtil.computeSeedHash(getSeed()); + return Util.computeSeedHash(getSeed()); } //Used by HeapAlphaSketch and HeapQuickSelectSketch / Theta UpdateSketch diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java index 30cb910d9..91f0e470b 100644 --- a/src/main/java/org/apache/datasketches/theta2/Intersection.java +++ b/src/main/java/org/apache/datasketches/theta2/Intersection.java @@ -35,6 +35,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -145,7 +146,7 @@ public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered, * @return an Intersection that wraps a source MemorySegment that contains an Intersection image */ public static Intersection wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java index 0d7a2bb8f..be1c94707 100644 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java @@ -107,17 +107,17 @@ protected IntersectionImpl(final MemorySegment wseg, final long seed, final bool if (dstSegFlag) { //DstSeg: compute & store seedHash, no seedHash checking checkMinSizeMemorySegment(wseg); maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap - seedHash_ = ThetaUtil.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); } else { //SrcSeg:gets and stores the seedHash, checks seg_seedHash against the seed seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); - ThetaUtil.checkSeedHashes(seedHash_, ThetaUtil.computeSeedHash(seed)); //check for seed hash conflict + Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); //check for seed hash conflict maxLgArrLongs_ = 0; } } else { //compute & store seedHash wseg_ = null; maxLgArrLongs_ = 0; - seedHash_ = ThetaUtil.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); } } @@ -240,7 +240,7 @@ public void intersect(final Sketch sketchIn) { resetToEmpty(); return; } - ThetaUtil.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); + Util.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); //Set minTheta thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule empty_ = false; diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java index 403ba704c..ab5e588bc 100644 --- a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java @@ -34,7 +34,6 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; //@formatter:off @@ -517,7 +516,7 @@ static int getAndCheckPreLongs(final MemorySegment seg) { static final short checkSegmentSeedHash(final MemorySegment seg, final long seed) { final short seedHashSeg = (short) extractSeedHash(seg); - ThetaUtil.checkSeedHashes(seedHashSeg, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash + Util.checkSeedHashes(seedHashSeg, Util.computeSeedHash(seed)); //throws if bad seedHash return seedHashSeg; } diff --git a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java index b436a4e19..aced2c645 100644 --- a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java @@ -21,6 +21,7 @@ import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; @@ -28,7 +29,6 @@ import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; -import static org.apache.datasketches.thetacommon2.QuickSelect.selectExcludingZeros; import java.lang.foreign.MemorySegment; diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java index 227ae54db..e8ed24fd1 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperation.java @@ -30,8 +30,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; -//import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * The parent API for all Set Operations @@ -68,7 +67,7 @@ public static final SetOperationBuilder builder() { * @return a Heap-based SetOperation from the given MemorySegment */ public static SetOperation heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -113,7 +112,7 @@ public static SetOperation heapify(final MemorySegment srcSeg, final long expect * @return a SetOperation backed by the given MemorySegment */ public static SetOperation wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java index c57eaddc6..f56163951 100644 --- a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java @@ -28,6 +28,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -46,7 +47,7 @@ public final class SetOperationBuilder { *
            *
          • Max Nominal Entries (max K): * {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
          • - *
          • Seed: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}
          • + *
          • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
          • *
          • {@link ResizeFactor#X8}
          • *
          • Input Sampling Probability: 1.0
          • *
          • MemorySegment: null
          • @@ -54,7 +55,7 @@ public final class SetOperationBuilder { */ public SetOperationBuilder() { bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); - bSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + bSeed = Util.DEFAULT_UPDATE_SEED; bP = (float) 1.0; bRF = ResizeFactor.X8; } diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java index 8c92766d1..7ec932b1f 100644 --- a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java @@ -34,7 +34,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * A CompactSketch that holds only one item hash. @@ -42,7 +42,7 @@ * @author Lee Rhodes */ final class SingleItemSketch extends CompactSketch { - private static final long DEFAULT_SEED_HASH = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED) & 0xFFFFL; + private static final long DEFAULT_SEED_HASH = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED) & 0xFFFFL; // For backward compatibility, a candidate pre0_ long must have: // Flags (byte 5): Ordered, Compact, NOT Empty, Read Only, LittleEndian = 11010 = 0x1A. @@ -63,7 +63,7 @@ private SingleItemSketch(final long hash) { //All checking & hashing has been done, given the relevant seed SingleItemSketch(final long hash, final long seed) { - final long seedHash = ThetaUtil.computeSeedHash(seed) & 0xFFFFL; + final long seedHash = Util.computeSeedHash(seed) & 0xFFFFL; pre0_ = (seedHash << 48) | PRE0_LO6_SI; hash_ = hash; } @@ -83,7 +83,7 @@ private SingleItemSketch(final long hash) { * @return a SingleItemSketch */ //does not override Sketch static SingleItemSketch heapify(final MemorySegment srcSeg, final short expectedSeedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcSeg), expectedSeedHash); + Util.checkSeedHashes((short) extractSeedHash(srcSeg), expectedSeedHash); final boolean singleItem = otherCheckForSingleItem(srcSeg); if (singleItem) { return new SingleItemSketch(srcSeg.get(JAVA_LONG_UNALIGNED, 8), expectedSeedHash); } throw new SketchesArgumentException("Input MemorySegment is not a SingleItemSketch."); @@ -109,7 +109,7 @@ public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSe */ static SingleItemSketch create(final long datum) { final long[] data = { datum }; - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -125,7 +125,7 @@ static SingleItemSketch create(final long datum) { static SingleItemSketch create(final double datum) { final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -143,7 +143,7 @@ static SingleItemSketch create(final double datum) { static SingleItemSketch create(final String datum) { if ((datum == null) || datum.isEmpty()) { return null; } final byte[] data = datum.getBytes(UTF_8); - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -155,7 +155,7 @@ static SingleItemSketch create(final String datum) { */ static SingleItemSketch create(final byte[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -170,7 +170,7 @@ static SingleItemSketch create(final byte[] data) { */ static SingleItemSketch create(final char[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -182,7 +182,7 @@ static SingleItemSketch create(final char[] data) { */ static SingleItemSketch create(final int[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } /** @@ -194,7 +194,7 @@ static SingleItemSketch create(final int[] data) { */ static SingleItemSketch create(final long[] data) { if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1); + return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); } //Updates with a user specified seed diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java index b915ec6ba..7789a2a08 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketch.java @@ -37,6 +37,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.BinomialBoundsN; import org.apache.datasketches.thetacommon2.ThetaUtil; @@ -74,7 +75,7 @@ public static Sketch heapify(final MemorySegment srcSeg) { if (family == Family.COMPACT) { return CompactSketch.heapify(srcSeg); } - return heapifyUpdateFromMemorySegment(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUpdateFromMemorySegment(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -132,7 +133,7 @@ public static Sketch wrap(final MemorySegment srcSeg) { final Family family = Family.idToFamily(familyID); if (family == Family.QUICKSELECT) { if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } else { throw new SketchesArgumentException( "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); diff --git a/src/main/java/org/apache/datasketches/theta2/Sketches.java b/src/main/java/org/apache/datasketches/theta2/Sketches.java index fa813b25f..0a3904511 100644 --- a/src/main/java/org/apache/datasketches/theta2/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta2/Sketches.java @@ -34,7 +34,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * This class brings together the common sketch and set operation creation methods and @@ -295,7 +295,7 @@ public static Intersection wrapIntersection(final MemorySegment srcSeg) { * @return {@link SetOperation SetOperation} */ public static SetOperation wrapSetOperation(final MemorySegment srcSeg) { - return wrapSetOperation(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapSetOperation(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -344,7 +344,7 @@ public static Union wrapUnion(final MemorySegment srcSeg) { * @return {@link UpdateSketch UpdateSketch} */ public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg) { - return wrapUpdateSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUpdateSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/theta2/Union.java b/src/main/java/org/apache/datasketches/theta2/Union.java index fde9e90bd..5c0261782 100644 --- a/src/main/java/org/apache/datasketches/theta2/Union.java +++ b/src/main/java/org/apache/datasketches/theta2/Union.java @@ -27,7 +27,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Compute the union of two or more theta sketches. @@ -46,7 +46,7 @@ public abstract class Union extends SetOperation { * @return this class */ public static Union fastWrap(final MemorySegment srcSeg) { - return fastWrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return fastWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -74,7 +74,7 @@ public static Union fastWrap(final MemorySegment srcSeg, final long expectedSeed * @return this class */ public static Union wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java index 9d3aca04b..1f8f19180 100644 --- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java @@ -21,12 +21,12 @@ import static java.lang.Math.min; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta2.PreambleUtil.UNION_THETA_LONG; import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty; import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta2.PreambleUtil.extractUnionThetaLong; import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong; -import static org.apache.datasketches.thetacommon2.QuickSelect.selectExcludingZeros; import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; @@ -34,8 +34,8 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; /** * Shared code for the HeapUnion and DirectUnion implementations. @@ -61,7 +61,7 @@ final class UnionImpl extends Union { private UnionImpl(final UpdateSketch gadget, final long seed) { gadget_ = gadget; - expectedSeedHash_ = ThetaUtil.computeSeedHash(seed); + expectedSeedHash_ = Util.computeSeedHash(seed); } /** @@ -274,7 +274,7 @@ public void union(final Sketch sketchIn) { return; } //sketchIn is valid and not empty - ThetaUtil.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); + Util.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); if (sketchIn instanceof SingleItemSketch) { gadget_.hashUpdate(sketchIn.getCache()[0]); return; diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java index 9d7b7a1a5..e58a80bf5 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java @@ -50,6 +50,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -67,14 +68,14 @@ public abstract class UpdateSketch extends Sketch { * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have * been explicitly stored as writable, direct objects can be wrapped. This method assumes the - * {@link org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}. + * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. * Default Update Seed. * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash. * It must have a size of at least 24 bytes. * @return an UpdateSketch backed by the given MemorySegment */ public static UpdateSketch wrap(final MemorySegment srcWSeg) { - return wrap(srcWSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcWSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -111,13 +112,13 @@ public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expected /** * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the - * {@link org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}. + * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. * @param srcSeg the given MemorySegment with a sketch image. * It must have a size of at least 24 bytes. * @return an UpdateSketch */ public static UpdateSketch heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -441,7 +442,7 @@ static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSee //Check seed hashes final short seedHash = checkSegmentSeedHash(srcSeg, expectedSeed); //byte 6,7 - ThetaUtil.checkSeedHashes(seedHash, ThetaUtil.computeSeedHash(expectedSeed)); + Util.checkSeedHashes(seedHash, Util.computeSeedHash(expectedSeed)); //Check seg capacity, lgArrLongs final long curCapBytes = srcSeg.byteSize(); diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java index 7580f59b4..19abe7b13 100644 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java @@ -30,6 +30,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -55,7 +56,7 @@ public final class UpdateSketchBuilder { * Constructor for building a new UpdateSketch. The default configuration is *
              *
            • Nominal Entries: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
            • - *
            • Seed: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_UPDATE_SEED}
            • + *
            • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
            • *
            • Input Sampling Probability: 1.0
            • *
            • Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
            • *
            • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. @@ -73,7 +74,7 @@ public final class UpdateSketchBuilder { */ public UpdateSketchBuilder() { bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); - bSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + bSeed = Util.DEFAULT_UPDATE_SEED; bP = (float) 1.0; bRF = ResizeFactor.X8; bFam = Family.QUICKSELECT; diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java index b2d3a8d4b..8973d76ff 100644 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java @@ -25,7 +25,7 @@ import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item. @@ -50,7 +50,7 @@ final class WrappedCompactCompressedSketch extends WrappedCompactSketch { * @return this sketch */ static WrappedCompactCompressedSketch wrapInstance(final byte[] bytes, final short seedHash) { - ThetaUtil.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); + Util.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); return new WrappedCompactCompressedSketch(bytes); } diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java index 0da98d5fc..a1f65b3e2 100644 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java @@ -34,7 +34,7 @@ import java.lang.foreign.MemorySegment; import java.util.Arrays; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. @@ -60,7 +60,7 @@ class WrappedCompactSketch extends CompactSketch { * @return this sketch */ static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHash) { - ThetaUtil.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); + Util.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); return new WrappedCompactSketch(bytes); } diff --git a/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java b/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java index be209ece1..4012cb412 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java +++ b/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java @@ -19,8 +19,6 @@ package org.apache.datasketches.thetacommon; -import static org.apache.datasketches.hash.MurmurHash3.hash; - import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; @@ -55,27 +53,6 @@ public final class ThetaUtil { * See Default Nominal Entries */ public static final int DEFAULT_NOMINAL_ENTRIES = 4096; - /** - * The seed 9001 used in the sketch update methods is a prime number that - * was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and - * the author cannot prove that this particular seed is somehow superior to other seeds. There - * was some early Internet discussion that a seed of 0 did not produce as clean avalanche diagrams - * as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did - * have some issues. As far as the author can determine, MurmurHash3 does not have these problems. - * - *

              In order to perform set operations on two sketches it is critical that the same hash - * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship - * between the original source key value and the hashed bit string would be violated. Once - * you have developed a history of stored sketches you are stuck with it. - * - *

              WARNING: This seed is used internally by library sketches in different - * packages and thus must be declared public. However, this seed value must not be used by library - * users with the MurmurHash3 function. It should be viewed as existing for exclusive, private - * use by the library. - * - *

              See Default Update Seed - */ - public static final long DEFAULT_UPDATE_SEED = 9001L; private ThetaUtil() {} @@ -84,39 +61,6 @@ private ThetaUtil() {} */ public static final int MIN_LG_ARR_LONGS = 5; - /** - * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException. - * @param seedHashA the seedHash A - * @param seedHashB the seedHash B - * @return seedHashA if they are equal - */ - public static short checkSeedHashes(final short seedHashA, final short seedHashB) { - if (seedHashA != seedHashB) { - throw new SketchesArgumentException( - "Incompatible Seed Hashes. " + Integer.toHexString(seedHashA & 0XFFFF) - + ", " + Integer.toHexString(seedHashB & 0XFFFF)); - } - return seedHashA; - } - - /** - * Computes and checks the 16-bit seed hash from the given long seed. - * The seed hash may not be zero in order to maintain compatibility with older serialized - * versions that did not have this concept. - * @param seed See Update Hash Seed - * @return the seed hash. - */ - public static short computeSeedHash(final long seed) { - final long[] seedArr = {seed}; - final short seedHash = (short)(hash(seedArr, 0L)[0] & 0xFFFFL); - if (seedHash == 0) { - throw new SketchesArgumentException( - "The given seed: " + seed + " produced a seedHash of zero. " - + "You must choose a different seed."); - } - return seedHash; - } - /** * Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero, * one or more resize factors. diff --git a/src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java b/src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java deleted file mode 100644 index e2e80e0a7..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/QuickSelect.java +++ /dev/null @@ -1,242 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -/** - * QuickSelect algorithm improved from Sedgewick. Gets the kth order value - * (1-based or 0-based) from the array. - * Warning! This changes the ordering of elements in the given array!
              - * Also see:
              - * blog.teamleadnet.com/2012/07/quick-select-algorithm-find-kth-element.html
              - * See QuickSelectTest for examples and testNG tests. - * - * @author Lee Rhodes - */ -public final class QuickSelect { - - private QuickSelect() {} - - /** - * Gets the 0-based kth order statistic from the array. Warning! This changes the ordering - * of elements in the given array! - * - * @param arr The array to be re-arranged. - * @param lo The lowest 0-based index to be considered. - * @param hi The highest 0-based index to be considered. - * @param pivot The 0-based index of the value to pivot on. - * @return The value of the smallest (n)th element where n is 0-based. - */ - public static long select(final long[] arr, int lo, int hi, final int pivot) { - while (hi > lo) { - final int j = partition(arr, lo, hi); - if (j == pivot) { - return arr[pivot]; - } - if (j > pivot) { - hi = j - 1; - } - else { - lo = j + 1; - } - } - return arr[pivot]; - } - - /** - * Gets the 1-based kth order statistic from the array including any zero values in the - * array. Warning! This changes the ordering of elements in the given array! - * - * @param arr The hash array. - * @param pivot The 1-based index of the value that is chosen as the pivot for the array. - * After the operation all values below this 1-based index will be less than this value - * and all values above this index will be greater. The 0-based index of the pivot will be - * pivot-1. - * @return The value of the smallest (N)th element including zeros, where N is 1-based. - */ - public static long selectIncludingZeros(final long[] arr, final int pivot) { - final int arrSize = arr.length; - final int adj = pivot - 1; - return select(arr, 0, arrSize - 1, adj); - } - - /** - * Gets the 1-based kth order statistic from the array excluding any zero values in the - * array. Warning! This changes the ordering of elements in the given array! - * - * @param arr The hash array. - * @param nonZeros The number of non-zero values in the array. - * @param pivot The 1-based index of the value that is chosen as the pivot for the array. - * After the operation all values below this 1-based index will be less than this value - * and all values above this index will be greater. The 0-based index of the pivot will be - * pivot+arr.length-nonZeros-1. - * @return The value of the smallest (N)th element excluding zeros, where N is 1-based. - */ - public static long selectExcludingZeros(final long[] arr, final int nonZeros, final int pivot) { - if (pivot > nonZeros) { - return 0L; - } - final int arrSize = arr.length; - final int zeros = arrSize - nonZeros; - final int adjK = (pivot + zeros) - 1; - return select(arr, 0, arrSize - 1, adjK); - } - - /** - * Partition arr[] into arr[lo .. i-1], arr[i], arr[i+1,hi] - * - * @param arr The given array to partition - * @param lo the low index - * @param hi the high index - * @return the next partition value. Ultimately, the desired pivot. - */ - private static int partition(final long[] arr, final int lo, final int hi) { - int i = lo, j = hi + 1; //left and right scan indices - final long v = arr[lo]; //partitioning item value - while (true) { - //Scan right, scan left, check for scan complete, and exchange - while (arr[ ++i] < v) { - if (i == hi) { - break; - } - } - while (v < arr[ --j]) { - if (j == lo) { - break; - } - } - if (i >= j) { - break; - } - final long x = arr[i]; - arr[i] = arr[j]; - arr[j] = x; - } - //put v=arr[j] into position with a[lo .. j-1] <= a[j] <= a[j+1 .. hi] - final long x = arr[lo]; - arr[lo] = arr[j]; - arr[j] = x; - return j; - } - - //For double arrays - - /** - * Gets the 0-based kth order statistic from the array. Warning! This changes the ordering - * of elements in the given array! - * - * @param arr The array to be re-arranged. - * @param lo The lowest 0-based index to be considered. - * @param hi The highest 0-based index to be considered. - * @param pivot The 0-based smallest value to pivot on. - * @return The value of the smallest (n)th element where n is 0-based. - */ - public static double select(final double[] arr, int lo, int hi, final int pivot) { - while (hi > lo) { - final int j = partition(arr, lo, hi); - if (j == pivot) { - return arr[pivot]; - } - if (j > pivot) { - hi = j - 1; - } - else { - lo = j + 1; - } - } - return arr[pivot]; - } - - /** - * Gets the 1-based kth order statistic from the array including any zero values in the - * array. Warning! This changes the ordering of elements in the given array! - * - * @param arr The hash array. - * @param pivot The 1-based index of the value that is chosen as the pivot for the array. - * After the operation all values below this 1-based index will be less than this value - * and all values above this index will be greater. The 0-based index of the pivot will be - * pivot-1. - * @return The value of the smallest (N)th element including zeros, where N is 1-based. - */ - public static double selectIncludingZeros(final double[] arr, final int pivot) { - final int arrSize = arr.length; - final int adj = pivot - 1; - return select(arr, 0, arrSize - 1, adj); - } - - /** - * Gets the 1-based kth order statistic from the array excluding any zero values in the - * array. Warning! This changes the ordering of elements in the given array! - * - * @param arr The hash array. - * @param nonZeros The number of non-zero values in the array. - * @param pivot The 1-based index of the value that is chosen as the pivot for the array. - * After the operation all values below this 1-based index will be less than this value - * and all values above this index will be greater. The 0-based index of the pivot will be - * pivot+arr.length-nonZeros-1. - * @return The value of the smallest (N)th element excluding zeros, where N is 1-based. - */ - public static double selectExcludingZeros(final double[] arr, final int nonZeros, final int pivot) { - if (pivot > nonZeros) { - return 0L; - } - final int arrSize = arr.length; - final int zeros = arrSize - nonZeros; - final int adjK = (pivot + zeros) - 1; - return select(arr, 0, arrSize - 1, adjK); - } - - /** - * Partition arr[] into arr[lo .. i-1], arr[i], arr[i+1,hi] - * - * @param arr The given array to partition - * @param lo the low index - * @param hi the high index - * @return the next partition value. Ultimately, the desired pivot. - */ - private static int partition(final double[] arr, final int lo, final int hi) { - int i = lo, j = hi + 1; //left and right scan indices - final double v = arr[lo]; //partitioning item value - while (true) { - //Scan right, scan left, check for scan complete, and exchange - while (arr[ ++i] < v) { - if (i == hi) { - break; - } - } - while (v < arr[ --j]) { - if (j == lo) { - break; - } - } - if (i >= j) { - break; - } - final double x = arr[i]; - arr[i] = arr[j]; - arr[j] = x; - } - //put v=arr[j] into position with a[lo .. j-1] <= a[j] <= a[j+1 .. hi] - final double x = arr[lo]; - arr[lo] = arr[j]; - arr[j] = x; - return j; - } - -} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java b/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java index 52dd5c331..585efec6f 100644 --- a/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java +++ b/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java @@ -19,8 +19,6 @@ package org.apache.datasketches.thetacommon2; -import static org.apache.datasketches.hash.MurmurHash3.hash; - import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; @@ -55,27 +53,6 @@ public final class ThetaUtil { * See Default Nominal Entries */ public static final int DEFAULT_NOMINAL_ENTRIES = 4096; - /** - * The seed 9001 used in the sketch update methods is a prime number that - * was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and - * the author cannot prove that this particular seed is somehow superior to other seeds. There - * was some early Internet discussion that a seed of 0 did not produce as clean avalanche diagrams - * as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did - * have some issues. As far as the author can determine, MurmurHash3 does not have these problems. - * - *

              In order to perform set operations on two sketches it is critical that the same hash - * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship - * between the original source key value and the hashed bit string would be violated. Once - * you have developed a history of stored sketches you are stuck with it. - * - *

              WARNING: This seed is used internally by library sketches in different - * packages and thus must be declared public. However, this seed value must not be used by library - * users with the MurmurHash3 function. It should be viewed as existing for exclusive, private - * use by the library. - * - *

              See Default Update Seed - */ - public static final long DEFAULT_UPDATE_SEED = 9001L; private ThetaUtil() {} @@ -84,39 +61,6 @@ private ThetaUtil() {} */ public static final int MIN_LG_ARR_LONGS = 5; - /** - * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException. - * @param seedHashA the seedHash A - * @param seedHashB the seedHash B - * @return seedHashA if they are equal - */ - public static short checkSeedHashes(final short seedHashA, final short seedHashB) { - if (seedHashA != seedHashB) { - throw new SketchesArgumentException( - "Incompatible Seed Hashes. " + Integer.toHexString(seedHashA & 0XFFFF) - + ", " + Integer.toHexString(seedHashB & 0XFFFF)); - } - return seedHashA; - } - - /** - * Computes and checks the 16-bit seed hash from the given long seed. - * The seed hash may not be zero in order to maintain compatibility with older serialized - * versions that did not have this concept. - * @param seed See Update Hash Seed - * @return the seed hash. - */ - public static short computeSeedHash(final long seed) { - final long[] seedArr = {seed}; - final short seedHash = (short)(hash(seedArr, 0L)[0] & 0xFFFFL); - if (seedHash == 0) { - throw new SketchesArgumentException( - "The given seed: " + seed + " produced a seedHash of zero. " - + "You must choose a different seed."); - } - return seedHash; - } - /** * Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero, * one or more resize factors. diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 4b9afb1a1..756e99e8b 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -30,11 +30,11 @@ import org.apache.datasketches.common.ByteArrayUtil; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.QuickSelect; import org.apache.datasketches.thetacommon.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java index acefa2ab5..f67626d1b 100644 --- a/src/main/java/org/apache/datasketches/tuple/Union.java +++ b/src/main/java/org/apache/datasketches/tuple/Union.java @@ -21,8 +21,8 @@ import static java.lang.Math.min; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon.QuickSelect; import org.apache.datasketches.thetacommon.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java index 36743618b..f7a552add 100644 --- a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java @@ -19,11 +19,12 @@ package org.apache.datasketches.tuple; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; + import java.nio.ByteBuffer; import org.apache.datasketches.hash.MurmurHash3; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * An extension of QuickSelectSketch<S>, which can be updated with many types of keys. @@ -136,7 +137,7 @@ public void update(final String key, final U value) { */ public void update(final byte[] key, final U value) { if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } /** @@ -148,7 +149,7 @@ public void update(final byte[] key, final U value) { */ public void update(final ByteBuffer buffer, final U value) { if (buffer == null || buffer.hasRemaining() == false) { return; } - insertOrIgnore(MurmurHash3.hash(buffer, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(buffer, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } /** @@ -160,7 +161,7 @@ public void update(final ByteBuffer buffer, final U value) { */ public void update(final int[] key, final U value) { if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } /** @@ -172,7 +173,7 @@ public void update(final int[] key, final U value) { */ public void update(final long[] key, final U value) { if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } void insertOrIgnore(final long hash, final U value) { diff --git a/src/main/java/org/apache/datasketches/tuple/Util.java b/src/main/java/org/apache/datasketches/tuple/Util.java index bda6e7c25..46f069724 100644 --- a/src/main/java/org/apache/datasketches/tuple/Util.java +++ b/src/main/java/org/apache/datasketches/tuple/Util.java @@ -21,13 +21,11 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.hash.MurmurHash3.hash; import static org.apache.datasketches.hash.XxHash.hashCharArr; import static org.apache.datasketches.hash.XxHash.hashString; import java.lang.reflect.Array; -import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -58,36 +56,6 @@ public static final byte[] stringToByteArray(final String value) { return value.getBytes(UTF_8); } - /** - * Computes and checks the 16-bit seed hash from the given long seed. - * The seed hash may not be zero in order to maintain compatibility with older serialized - * versions that did not have this concept. - * @param seed See Update Hash Seed - * @return the seed hash. - */ - public static short computeSeedHash(final long seed) { - final long[] seedArr = {seed}; - final short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL); - if (seedHash == 0) { - throw new SketchesArgumentException( - "The given seed: " + seed + " produced a seedHash of zero. " - + "You must choose a different seed."); - } - return seedHash; - } - - /** - * Checks the two given seed hashes. If they are not equal, this method throws an Exception. - * @param seedHashA given seed hash A - * @param seedHashB given seed hash B - */ - public static final void checkSeedHashes(final short seedHashA, final short seedHashB) { - if (seedHashA != seedHashB) { - throw new SketchesArgumentException("Incompatible Seed Hashes. " + seedHashA + ", " - + seedHashB); - } - } - /** * Gets the starting capacity of a new sketch given the Nominal Entries and the log Resize Factor. * @param nomEntries the given Nominal Entries diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java index 44c0ed9b8..4bd241768 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java @@ -60,7 +60,7 @@ public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { numValues_ = numValues; - seedHash_ = Util.computeSeedHash(seed); + seedHash_ = org.apache.datasketches.common.Util.computeSeedHash(seed); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java index 60d6d61d4..386b70d3b 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java @@ -48,7 +48,7 @@ public abstract class ArrayOfDoublesIntersection { * @param seed the hash function update seed. */ ArrayOfDoublesIntersection(final int numValues, final long seed) { - seedHash_ = Util.computeSeedHash(seed); + seedHash_ = org.apache.datasketches.common.Util.computeSeedHash(seed); numValues_ = numValues; hashTables_ = null; empty_ = false; @@ -64,7 +64,7 @@ public abstract class ArrayOfDoublesIntersection { */ public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) { if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); + org.apache.datasketches.common.Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); if (tupleSketch.numValues_ != numValues_) { throw new SketchesArgumentException( "Input tupleSketch cannot have different numValues from the internal numValues."); diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index a6c536816..a1dba3512 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -21,9 +21,9 @@ import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.QuickSelect; import org.apache.datasketches.thetacommon.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java index bb6cc3741..e866a3191 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java @@ -19,8 +19,8 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Builds set operations object for tuple sketches of type ArrayOfDoubles. @@ -47,7 +47,7 @@ public class ArrayOfDoublesSetOperationBuilder { public ArrayOfDoublesSetOperationBuilder() { nomEntries_ = DEFAULT_NOMINAL_ENTRIES; numValues_ = DEFAULT_NUMBER_OF_VALUES; - seed_ = ThetaUtil.DEFAULT_UPDATE_SEED; + seed_ = Util.DEFAULT_UPDATE_SEED; } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java index 111427b7c..5dd0548f4 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java @@ -21,10 +21,10 @@ import static org.apache.datasketches.common.Util.LS; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.BinomialBoundsN; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.SerializerDeserializer; /** @@ -76,7 +76,7 @@ static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES } * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch heapify(final Memory mem) { - return heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(mem, Util.DEFAULT_UPDATE_SEED); } /** @@ -99,7 +99,7 @@ public static ArrayOfDoublesSketch heapify(final Memory mem, final long seed) { * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch wrap(final Memory mem) { - return wrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(mem, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java index 3e9abe6a3..575d1d7d7 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java @@ -19,9 +19,9 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles. @@ -34,7 +34,7 @@ public final class ArrayOfDoublesSketches { * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch heapifySketch(final Memory srcMem) { - return heapifySketch(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifySketch(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -53,7 +53,7 @@ public static ArrayOfDoublesSketch heapifySketch(final Memory srcMem, final long * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final Memory srcMem) { - return heapifyUpdatableSketch(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUpdatableSketch(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -72,7 +72,7 @@ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final Memory * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch wrapSketch(final Memory srcMem) { - return wrapSketch(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapSketch(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -91,7 +91,7 @@ public static ArrayOfDoublesSketch wrapSketch(final Memory srcMem, final long se * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final WritableMemory srcMem) { - return wrapUpdatableSketch(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUpdatableSketch(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -110,7 +110,7 @@ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final WritableMe * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion heapifyUnion(final Memory srcMem) { - return heapifyUnion(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUnion(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -129,7 +129,7 @@ public static ArrayOfDoublesUnion heapifyUnion(final Memory srcMem, final long s * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion wrapUnion(final Memory srcMem) { - return wrapUnion(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUnion(srcMem, Util.DEFAULT_UPDATE_SEED); } /** @@ -148,7 +148,7 @@ public static ArrayOfDoublesUnion wrapUnion(final Memory srcMem, final long seed * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion wrapUnion(final WritableMemory srcMem) { - return wrapUnion(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUnion(srcMem, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java index 9e1db0ada..d76754b85 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java @@ -20,12 +20,12 @@ package org.apache.datasketches.tuple.arrayofdoubles; import static java.lang.Math.min; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.SerializerDeserializer; import org.apache.datasketches.tuple.Util; @@ -64,7 +64,7 @@ public abstract class ArrayOfDoublesUnion { * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion heapify(final Memory srcMem) { - return heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(srcMem, DEFAULT_UPDATE_SEED); } /** @@ -83,7 +83,7 @@ public static ArrayOfDoublesUnion heapify(final Memory srcMem, final long seed) * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion wrap(final Memory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcMem, DEFAULT_UPDATE_SEED); } /** @@ -102,7 +102,7 @@ public static ArrayOfDoublesUnion wrap(final Memory srcMem, final long seed) { * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion wrap(final WritableMemory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcMem, DEFAULT_UPDATE_SEED); } /** @@ -125,7 +125,7 @@ public static ArrayOfDoublesUnion wrap(final WritableMemory srcMem, final long s */ public void union(final ArrayOfDoublesSketch tupleSketch) { if (tupleSketch == null) { return; } - Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); + org.apache.datasketches.common.Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); if (gadget_.getNumValues() != tupleSketch.getNumValues()) { throw new SketchesArgumentException("Incompatible sketches: number of values mismatch " + gadget_.getNumValues() + " and " + tupleSketch.getNumValues()); diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java index cd3c1d2e7..9521f85d6 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java @@ -19,13 +19,14 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; + import java.nio.ByteBuffer; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.hash.MurmurHash3; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.Util; /** @@ -46,7 +47,7 @@ public abstract class ArrayOfDoublesUpdatableSketch extends ArrayOfDoublesSketch * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch heapify(final Memory mem) { - return heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(mem, DEFAULT_UPDATE_SEED); } /** @@ -65,7 +66,7 @@ public static ArrayOfDoublesUpdatableSketch heapify(final Memory mem, final long * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch wrap(final WritableMemory mem) { - return wrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(mem, DEFAULT_UPDATE_SEED); } /** @@ -217,7 +218,7 @@ long getSeed() { @Override short getSeedHash() { - return Util.computeSeedHash(seed_); + return org.apache.datasketches.common.Util.computeSeedHash(seed_); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java index e9fd12d25..3ce9bac7b 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java @@ -21,6 +21,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -47,7 +48,7 @@ public ArrayOfDoublesUpdatableSketchBuilder() { resizeFactor_ = DEFAULT_RESIZE_FACTOR; numValues_ = DEFAULT_NUMBER_OF_VALUES; samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; - seed_ = ThetaUtil.DEFAULT_UPDATE_SEED; + seed_ = Util.DEFAULT_UPDATE_SEED; } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 52f827149..f3497709c 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -76,7 +76,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); dstMem.putByte(NUM_VALUES_BYTE, (byte) numValues_); - dstMem.putShort(SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed())); + dstMem.putShort(SEED_HASH_SHORT, org.apache.datasketches.common.Util.computeSeedHash(sketch.getSeed())); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); dstMem.putLong(THETA_LONG, thetaLong_); if (count > 0) { @@ -178,7 +178,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); + org.apache.datasketches.common.Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), org.apache.datasketches.common.Util.computeSeedHash(seed)); isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = mem_.getLong(THETA_LONG); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index ae1aa3dc0..8073a1b38 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -103,7 +103,7 @@ private DirectArrayOfDoublesQuickSelectSketch( | (1 << Flags.IS_EMPTY.ordinal()) )); mem_.putByte(NUM_VALUES_BYTE, (byte) numValues); - mem_.putShort(SEED_HASH_SHORT, Util.computeSeedHash(seed)); + mem_.putShort(SEED_HASH_SHORT, org.apache.datasketches.common.Util.computeSeedHash(seed)); thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); mem_.putLong(THETA_LONG, thetaLong_); mem_.putByte(LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries)); @@ -152,7 +152,7 @@ private DirectArrayOfDoublesQuickSelectSketch( SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); + org.apache.datasketches.common.Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), org.apache.datasketches.common.Util.computeSeedHash(seed)); keysOffset_ = ENTRIES_START; valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); // to do: make parent take care of its own parts diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java index 3277d4a2c..f69babc97 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java @@ -19,6 +19,8 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; + import java.nio.ByteOrder; import java.util.Arrays; @@ -26,7 +28,6 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.SerializerDeserializer; import org.apache.datasketches.tuple.Util; @@ -57,7 +58,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch super(sketch.getNumValues()); isEmpty_ = sketch.isEmpty(); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - seedHash_ = Util.computeSeedHash(sketch.getSeed()); + seedHash_ = org.apache.datasketches.common.Util.computeSeedHash(sketch.getSeed()); final int count = sketch.getRetainedEntries(); if (count > 0) { keys_ = new long[count]; @@ -103,7 +104,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch * @param mem See Memory */ HeapArrayOfDoublesCompactSketch(final Memory mem) { - this(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + this(mem, DEFAULT_UPDATE_SEED); } /** @@ -128,7 +129,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); + org.apache.datasketches.common.Util.checkSeedHashes(seedHash_, org.apache.datasketches.common.Util.computeSeedHash(seed)); isEmpty_ = (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = mem.getLong(THETA_LONG); final boolean hasEntries = diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java index 9409a5828..ab955a0ee 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java @@ -98,7 +98,7 @@ final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelec if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); + org.apache.datasketches.common.Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), org.apache.datasketches.common.Util.computeSeedHash(seed)); isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0; lgNomEntries_ = mem.getByte(LG_NOM_ENTRIES_BYTE); thetaLong_ = mem.getLong(THETA_LONG); @@ -238,7 +238,7 @@ void serializeInto(final WritableMemory mem) { | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); mem.putByte(NUM_VALUES_BYTE, (byte) numValues_); - mem.putShort(SEED_HASH_SHORT, Util.computeSeedHash(seed_)); + mem.putShort(SEED_HASH_SHORT, org.apache.datasketches.common.Util.computeSeedHash(seed_)); mem.putLong(THETA_LONG, thetaLong_); mem.putByte(LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_); mem.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length)); diff --git a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java index bdd376405..1babf6ab2 100644 --- a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java @@ -35,10 +35,10 @@ import org.apache.datasketches.common.ByteArrayUtil; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.QuickSelect; import org.apache.datasketches.thetacommon2.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple2/Union.java b/src/main/java/org/apache/datasketches/tuple2/Union.java index 8bb25b8b1..0beb8a29e 100644 --- a/src/main/java/org/apache/datasketches/tuple2/Union.java +++ b/src/main/java/org/apache/datasketches/tuple2/Union.java @@ -21,8 +21,8 @@ import static java.lang.Math.min; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.QuickSelect; import org.apache.datasketches.thetacommon2.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java index 9bede1dc4..b9a01c084 100644 --- a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java @@ -19,11 +19,12 @@ package org.apache.datasketches.tuple2; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; + import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.thetacommon2.ThetaUtil; /** * An extension of QuickSelectSketch<S>, which can be updated with many types of keys. @@ -136,7 +137,7 @@ public void update(final String key, final U value) { */ public void update(final byte[] key, final U value) { if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } /** @@ -148,7 +149,7 @@ public void update(final byte[] key, final U value) { */ public void update(final ByteBuffer buffer, final U value) { if (buffer == null || buffer.hasRemaining() == false) { return; } - insertOrIgnore(MurmurHash3.hash(buffer, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(buffer, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } /** @@ -160,7 +161,7 @@ public void update(final ByteBuffer buffer, final U value) { */ public void update(final int[] key, final U value) { if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } /** @@ -172,7 +173,7 @@ public void update(final int[] key, final U value) { */ public void update(final long[] key, final U value) { if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value); + insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); } void insertOrIgnore(final long hash, final U value) { diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java index a0f5044c3..f2a940528 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java @@ -25,13 +25,13 @@ import static org.apache.datasketches.thetacommon2.HashOperations.convertToHashTable; import static org.apache.datasketches.thetacommon2.HashOperations.count; import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.SetOperationCornerCases; import org.apache.datasketches.thetacommon2.SetOperationCornerCases.AnotbAction; import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; @@ -60,7 +60,7 @@ public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { numValues_ = numValues; - seedHash_ = computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java index def239c71..d1bbbf810 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java @@ -20,13 +20,12 @@ package org.apache.datasketches.tuple2.arrayofdoubles; import static java.lang.Math.min; -import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.Util; /** * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles. @@ -50,7 +49,7 @@ public abstract class ArrayOfDoublesIntersection { * @param seed the hash function update seed. */ ArrayOfDoublesIntersection(final int numValues, final long seed) { - seedHash_ = computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); numValues_ = numValues; hashTables_ = null; empty_ = false; @@ -66,7 +65,7 @@ public abstract class ArrayOfDoublesIntersection { */ public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) { if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); + Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); if (tupleSketch.numValues_ != numValues_) { throw new SketchesArgumentException( "Input tupleSketch cannot have different numValues from the internal numValues."); diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index 043fefe07..f691f153b 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -23,8 +23,8 @@ import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.QuickSelect; import org.apache.datasketches.thetacommon2.ThetaUtil; /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java index 43355a634..dd19792c2 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java @@ -21,7 +21,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Builds set operations object for tuple sketches of type ArrayOfDoubles. @@ -48,7 +48,7 @@ public class ArrayOfDoublesSetOperationBuilder { public ArrayOfDoublesSetOperationBuilder() { nomEntries_ = DEFAULT_NOMINAL_ENTRIES; numValues_ = DEFAULT_NUMBER_OF_VALUES; - seed_ = ThetaUtil.DEFAULT_UPDATE_SEED; + seed_ = Util.DEFAULT_UPDATE_SEED; } /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java index 99a87bbb1..145458419 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java @@ -23,8 +23,8 @@ import java.lang.foreign.MemorySegment; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.BinomialBoundsN; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.SerializerDeserializer; /** @@ -76,7 +76,7 @@ static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES } * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch heapify(final MemorySegment seg) { - return heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(seg, Util.DEFAULT_UPDATE_SEED); } /** @@ -100,7 +100,7 @@ public static ArrayOfDoublesSketch heapify(final MemorySegment seg, final long s * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch wrap(final MemorySegment seg) { - return wrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(seg, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java index 8ad121a00..b75c72f89 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java @@ -21,7 +21,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles. @@ -34,7 +34,7 @@ public final class ArrayOfDoublesSketches { * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg) { - return heapifySketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifySketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -53,7 +53,7 @@ public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg, fin * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg) { - return heapifyUpdatableSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUpdatableSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -73,7 +73,7 @@ public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemoryS * @return an ArrayOfDoublesSketch */ public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg) { - return wrapSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -94,7 +94,7 @@ public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg, final * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg) { - return wrapUpdatableSketch(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUpdatableSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -114,7 +114,7 @@ public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegm * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg) { - return heapifyUnion(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUnion(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -134,7 +134,7 @@ public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg, final * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg) { - return wrapUnion(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrapUnion(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java index 1c57b881e..aea9204b2 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java @@ -21,14 +21,13 @@ import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; import static java.lang.Math.min; import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.apache.datasketches.tuple2.SerializerDeserializer; /** @@ -66,7 +65,7 @@ public abstract class ArrayOfDoublesUnion { * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -86,7 +85,7 @@ public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg, final long * @return an ArrayOfDoublesUnion */ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** @@ -110,7 +109,7 @@ public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long se */ public void union(final ArrayOfDoublesSketch tupleSketch) { if (tupleSketch == null) { return; } - checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); + Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); if (gadget_.getNumValues() != tupleSketch.getNumValues()) { throw new SketchesArgumentException("Incompatible sketches: number of values mismatch " + gadget_.getNumValues() + " and " + tupleSketch.getNumValues()); diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java index 98cf6699a..d1384e9d2 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java @@ -19,14 +19,14 @@ package org.apache.datasketches.tuple2.arrayofdoubles; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; +import static org.apache.datasketches.common.Util.computeSeedHash; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.Util; /** @@ -47,7 +47,7 @@ public abstract class ArrayOfDoublesUpdatableSketch extends ArrayOfDoublesSketch * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg) { - return heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapify(seg, DEFAULT_UPDATE_SEED); } /** @@ -67,7 +67,7 @@ public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg, fin * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg) { - return wrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + return wrap(seg, DEFAULT_UPDATE_SEED); } /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java index 9f2b98a42..99723ec6f 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java @@ -23,6 +23,7 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; /** @@ -48,7 +49,7 @@ public ArrayOfDoublesUpdatableSketchBuilder() { resizeFactor_ = DEFAULT_RESIZE_FACTOR; numValues_ = DEFAULT_NUMBER_OF_VALUES; samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; - seed_ = ThetaUtil.DEFAULT_UPDATE_SEED; + seed_ = Util.DEFAULT_UPDATE_SEED; } /** diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index c6bce07cd..f41f35991 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -24,14 +24,13 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.tuple2.SerializerDeserializer; /** @@ -82,7 +81,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(sketch.getSeed())); + dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed())); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); if (count > 0) { @@ -183,7 +182,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); + Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index d2fbbdfd8..0b8de6a71 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -25,10 +25,10 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; +import static org.apache.datasketches.common.Util.checkSeedHashes; import static org.apache.datasketches.common.Util.clear; import static org.apache.datasketches.common.Util.clearBits; +import static org.apache.datasketches.common.Util.computeSeedHash; import static org.apache.datasketches.common.Util.setBits; import java.lang.foreign.MemorySegment; diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java index 0e118d5d5..8741fc639 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java @@ -24,8 +24,6 @@ import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; @@ -33,7 +31,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.apache.datasketches.tuple2.SerializerDeserializer; /** @@ -63,7 +61,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch super(sketch.getNumValues()); isEmpty_ = sketch.isEmpty(); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - seedHash_ = computeSeedHash(sketch.getSeed()); + seedHash_ = Util.computeSeedHash(sketch.getSeed()); final int count = sketch.getRetainedEntries(); if (count > 0) { keys_ = new long[count]; @@ -109,7 +107,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch * @param seg the destination segment */ HeapArrayOfDoublesCompactSketch(final MemorySegment seg) { - this(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + this(seg, Util.DEFAULT_UPDATE_SEED); } /** @@ -134,7 +132,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - checkSeedHashes(seedHash_, computeSeedHash(seed)); + Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); isEmpty_ = (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); final boolean hasEntries = diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java index 081750b5a..5d8744754 100644 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java @@ -26,9 +26,9 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.common.Util.checkSeedHashes; +import static org.apache.datasketches.common.Util.computeSeedHash; import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon2.ThetaUtil.checkSeedHashes; -import static org.apache.datasketches.thetacommon2.ThetaUtil.computeSeedHash; import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; diff --git a/src/test/java/org/apache/datasketches/common/UtilTest.java b/src/test/java/org/apache/datasketches/common/UtilTest.java index ca83dadd6..fac2dea1c 100644 --- a/src/test/java/org/apache/datasketches/common/UtilTest.java +++ b/src/test/java/org/apache/datasketches/common/UtilTest.java @@ -416,15 +416,15 @@ static void checkConvertToLongArray() { byte[] arr = {1,2,3,4,5,6,7,8,9,10,11,12}; long[] out = convertToLongArray(arr, false); - String s = org.apache.datasketches.common.Util.zeroPad(Long.toHexString(out[0]), 16); + String s = zeroPad(Long.toHexString(out[0]), 16); assertEquals(s, "0807060504030201"); - s = org.apache.datasketches.common.Util.zeroPad(Long.toHexString(out[1]), 16); + s = zeroPad(Long.toHexString(out[1]), 16); assertEquals(s, "000000000c0b0a09"); out = convertToLongArray(arr, true); - s = org.apache.datasketches.common.Util.zeroPad(Long.toHexString(out[0]), 16); + s = zeroPad(Long.toHexString(out[0]), 16); assertEquals(s, "0102030405060708"); - s = org.apache.datasketches.common.Util.zeroPad(Long.toHexString(out[1]), 16); + s = zeroPad(Long.toHexString(out[1]), 16); assertEquals(s, "00000000090a0b0c"); } diff --git a/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java b/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java index 1c03fb4bb..3782bdc34 100644 --- a/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java +++ b/src/test/java/org/apache/datasketches/cpc/CpcSketchTest.java @@ -29,8 +29,8 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -189,10 +189,10 @@ public void checkIconHipUBLBLg15() { @Test public void checkHeapify() { int lgK = 10; - CpcSketch sk = new CpcSketch(lgK, ThetaUtil.DEFAULT_UPDATE_SEED); + CpcSketch sk = new CpcSketch(lgK, Util.DEFAULT_UPDATE_SEED); assertTrue(sk.isEmpty()); byte[] byteArray = sk.toByteArray(); - CpcSketch sk2 = CpcSketch.heapify(byteArray, ThetaUtil.DEFAULT_UPDATE_SEED); + CpcSketch sk2 = CpcSketch.heapify(byteArray, Util.DEFAULT_UPDATE_SEED); assertTrue(specialEquals(sk2, sk, false, false)); } @@ -210,7 +210,7 @@ public void checkHeapify2() { @Test public void checkRowColUpdate() { int lgK = 10; - CpcSketch sk = new CpcSketch(lgK, ThetaUtil.DEFAULT_UPDATE_SEED); + CpcSketch sk = new CpcSketch(lgK, Util.DEFAULT_UPDATE_SEED); sk.rowColUpdate(0); assertEquals(sk.getFlavor(), Flavor.SPARSE); } diff --git a/src/test/java/org/apache/datasketches/cpc/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/cpc/PreambleUtilTest.java index d09653196..8577b8a3d 100644 --- a/src/test/java/org/apache/datasketches/cpc/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/cpc/PreambleUtilTest.java @@ -54,17 +54,17 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.cpc.PreambleUtil.HiField; /** * @author Lee Rhodes */ public class PreambleUtilTest { - static final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED) ; + static final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED) ; private static void checkFirst8(WritableMemory wmem, Format format, int lgK, int fiCol) { assertEquals(getFormat(wmem), format); diff --git a/src/test/java/org/apache/datasketches/cpc/TestAllTest.java b/src/test/java/org/apache/datasketches/cpc/TestAllTest.java index ce847fc36..4f94fce47 100644 --- a/src/test/java/org/apache/datasketches/cpc/TestAllTest.java +++ b/src/test/java/org/apache/datasketches/cpc/TestAllTest.java @@ -25,7 +25,7 @@ import java.io.PrintWriter; import java.util.Arrays; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; /** @@ -89,7 +89,7 @@ public void singleRowColCheck() { CompressedState state = CompressedState.compress(srcSketch); ps.println(CompressedState.toString(state, true)); - CpcSketch uncSketch = CpcSketch.uncompress(state, ThetaUtil.DEFAULT_UPDATE_SEED); + CpcSketch uncSketch = CpcSketch.uncompress(state, Util.DEFAULT_UPDATE_SEED); ps.println(uncSketch.toString(true)); } diff --git a/src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java b/src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java new file mode 100644 index 000000000..59f7e6565 --- /dev/null +++ b/src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.fdt2; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.lang.foreign.MemorySegment; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.tuple2.TupleSketchIterator; +import org.apache.datasketches.tuple2.strings.ArrayOfStringsSummary; +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class FdtSketchTest { + private static final String LS = System.getProperty("line.separator"); + private static final char sep = '|'; //string separator + + @SuppressWarnings("deprecation") + @Test + public void checkFdtSketch() { + final int lgK = 14; + final FdtSketch sketch = new FdtSketch(lgK); + + final String[] nodesArr = {"abc", "def" }; + sketch.update(nodesArr); + + final TupleSketchIterator it = sketch.iterator(); + int count = 0; + while (it.next()) { + final String[] nodesArr2 = it.getSummary().getValue(); + assertEquals(nodesArr2, nodesArr); + count++; + } + assertEquals(count, 1); + + //serialize + final byte[] byteArr = sketch.toByteArray(); + //deserialize + MemorySegment seg = MemorySegment.ofArray(byteArr); + FdtSketch sketch2 = new FdtSketch(seg); + + //check output + final TupleSketchIterator it2 = sketch2.iterator(); + int count2 = 0; + while (it2.next()) { + final String[] nodesArr2 = it2.getSummary().getValue(); + assertEquals(nodesArr2, nodesArr); + count2++; + } + assertEquals(count, count2); + assertEquals(sketch2.getEstimate(), sketch.getEstimate()); + assertEquals(sketch2.getLowerBound(2), sketch.getLowerBound(2)); + assertEquals(sketch2.getUpperBound(2), sketch.getUpperBound(2)); + } + + @Test + public void checkAlternateLgK() { + int lgK = FdtSketch.computeLgK(.01, .01); + assertEquals(lgK, 20); + lgK = FdtSketch.computeLgK(.02, .05); + assertEquals(lgK, 15); + try { + lgK = FdtSketch.computeLgK(.01, .001); + fail(); + } catch (SketchesArgumentException e) { + //ok + } + } + + @Test + public void checkFdtSketchWithThreshold() { + FdtSketch sk = new FdtSketch(.02, .05); //thresh, RSE + assertEquals(sk.getLgK(), 15); + println("LgK: " + sk.getLgK()); + } + + @Test + public void simpleCheckPostProcessing() { + FdtSketch sk = new FdtSketch(8); + int[] priKeyIndices = {0,2}; + String[] arr1 = {"a", "1", "c"}; + String[] arr2 = {"a", "2", "c"}; + String[] arr3 = {"a", "3", "c"}; + String[] arr4 = {"a", "4", "c"}; + String[] arr5 = {"a", "1", "d"}; + String[] arr6 = {"a", "2", "d"}; + sk.update(arr1); + sk.update(arr2); + sk.update(arr3); + sk.update(arr4); + sk.update(arr5); + sk.update(arr6); + //get results from PostProcessor directly + Group gp = new Group(); //uninitialized + PostProcessor post = new PostProcessor(sk, gp, sep); + post = sk.getPostProcessor(gp, sep); + post = sk.getPostProcessor(); //equivalent + List list = post.getGroupList(priKeyIndices, 2, 0); + assertEquals(list.size(), 2); + assertEquals(post.getGroupCount(), 2); + println(gp.getHeader()); + for (int i = 0; i < list.size(); i++) { + println(list.get(i).toString()); + } + list = post.getGroupList(priKeyIndices, 2, 1); + assertEquals(list.size(), 1); + + //get results from sketch directly + list = sk.getResult(priKeyIndices, 0, 2, sep); + assertEquals(list.size(), 2); + } + + @Test + public void checkEstimatingPostProcessing() { + FdtSketch sk = new FdtSketch(4); + int[] priKeyIndices = {0}; + for (int i = 0; i < 32; i++) { + String[] arr = {"a", Integer.toHexString(i)}; + sk.update(arr); + } + assertTrue(sk.isEstimationMode()); + List list = sk.getResult(priKeyIndices, 0, 2, sep); + assertEquals(list.size(), 1); + println(new Group().getHeader()); + for (int i = 0; i < list.size(); i++) { + println(list.get(i).toString()); + } + } + + @Test + public void checkCopyCtor() { + final int lgK = 14; + final FdtSketch sk = new FdtSketch(lgK); + + final String[] nodesArr = {"abc", "def" }; + sk.update(nodesArr); + assertEquals(sk.getRetainedEntries(), 1); + final FdtSketch sk2 = sk.copy(); + assertEquals(sk2.getRetainedEntries(), 1); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + print(s + LS); + } + + /** + * @param s value to print + */ + static void print(String s) { + //System.out.print(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/fdt2/GroupTest.java b/src/test/java/org/apache/datasketches/fdt2/GroupTest.java new file mode 100644 index 000000000..929e6b4c6 --- /dev/null +++ b/src/test/java/org/apache/datasketches/fdt2/GroupTest.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.fdt2; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + +/** + * @author Lee Rhodes + */ +public class GroupTest { + private static final String LS = System.getProperty("line.separator"); + + @Test + public void checkToString() { //check visually + Group gp = new Group(); + gp.init("AAAAAAAA,BBBBBBBBBB", 100_000_000, 1E8, 1.2E8, 8E7, 0.1, 0.01); + assertEquals(gp.getPrimaryKey(), "AAAAAAAA,BBBBBBBBBB"); + assertEquals(gp.getCount(), 100_000_000); + assertEquals(gp.getEstimate(), 1E8); + assertEquals(gp.getUpperBound(), 1.2E8); + assertEquals(gp.getLowerBound(), 8E7); + assertEquals(gp.getFraction(), 0.1); + assertEquals(gp.getRse(), 0.01); + + println(gp.getHeader()); + println(gp.toString()); + } + + @Test + public void printlnTest() { + println("PRINTING: "+this.getClass().getName()); + } + + /** + * @param s value to print + */ + static void println(String s) { + print(s + LS); + } + + /** + * @param s value to print + */ + static void print(String s) { + //System.out.print(s); //disable here + } + +} diff --git a/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java b/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java index 9caf42e1d..21ae20783 100644 --- a/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java +++ b/src/test/java/org/apache/datasketches/hash/MurmurHash3Test.java @@ -20,6 +20,7 @@ package org.apache.datasketches.hash; import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.datasketches.common.Util.longToHexBytes; import static org.apache.datasketches.hash.MurmurHash3.hash; import java.nio.ByteBuffer; @@ -265,37 +266,37 @@ public void checkCrossTypeHashConsistency() { println("Bytes"); byte[] bArr = {1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,16, 17,18,19,20,21,22,23,24}; long[] out1 = hash(bArr, 0L); - println(org.apache.datasketches.common.Util.longToHexBytes(out1[0])); - println(org.apache.datasketches.common.Util.longToHexBytes(out1[1])); + println(longToHexBytes(out1[0])); + println(longToHexBytes(out1[1])); println("ByteBuffer"); ByteBuffer bBuf = ByteBuffer.wrap(bArr); out = hash(bBuf, 0L); Assert.assertEquals(out, out1); - println(org.apache.datasketches.common.Util.longToHexBytes(out1[0])); - println(org.apache.datasketches.common.Util.longToHexBytes(out1[1])); + println(longToHexBytes(out1[0])); + println(longToHexBytes(out1[1])); println("Chars"); char[] cArr = {0X0201, 0X0403, 0X0605, 0X0807, 0X0a09, 0X0c0b, 0X0e0d, 0X100f, 0X1211, 0X1413, 0X1615, 0X1817}; out = hash(cArr, 0L); Assert.assertEquals(out, out1); - println(org.apache.datasketches.common.Util.longToHexBytes(out[0])); - println(org.apache.datasketches.common.Util.longToHexBytes(out[1])); + println(longToHexBytes(out[0])); + println(longToHexBytes(out[1])); println("Ints"); int[] iArr = {0X04030201, 0X08070605, 0X0c0b0a09, 0X100f0e0d, 0X14131211, 0X18171615}; out = hash(iArr, 0L); Assert.assertEquals(out, out1); - println(org.apache.datasketches.common.Util.longToHexBytes(out[0])); - println(org.apache.datasketches.common.Util.longToHexBytes(out[1])); + println(longToHexBytes(out[0])); + println(longToHexBytes(out[1])); println("Longs"); long[] lArr = {0X0807060504030201L, 0X100f0e0d0c0b0a09L, 0X1817161514131211L}; out = hash(lArr, 0L); Assert.assertEquals(out, out1); - println(org.apache.datasketches.common.Util.longToHexBytes(out[0])); - println(org.apache.datasketches.common.Util.longToHexBytes(out[1])); + println(longToHexBytes(out[0])); + println(longToHexBytes(out[1])); } diff --git a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java index 722a96e83..48c9fc2fb 100644 --- a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java +++ b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java @@ -26,8 +26,8 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -53,7 +53,7 @@ public void checkExactAnotB_AvalidNoOverlap() { assertTrue(aNb.isEmpty()); //only applies to stateful assertTrue(aNb.getCache().length == 0); //only applies to stateful assertEquals(aNb.getThetaLong(), Long.MAX_VALUE); //only applies to stateful - assertEquals(aNb.getSeedHash(), ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + assertEquals(aNb.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); aNb.setA(usk1); aNb.notB(usk2); @@ -308,7 +308,7 @@ public void checkGetResult() { @Test public void checkGetFamily() { //cheap trick - final AnotBimpl anotb = new AnotBimpl(ThetaUtil.DEFAULT_UPDATE_SEED); + final AnotBimpl anotb = new AnotBimpl(Util.DEFAULT_UPDATE_SEED); assertEquals(anotb.getFamily(), Family.A_NOT_B); } diff --git a/src/test/java/org/apache/datasketches/theta/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta/BackwardConversions.java index 18cd6e865..edd327021 100644 --- a/src/test/java/org/apache/datasketches/theta/BackwardConversions.java +++ b/src/test/java/org/apache/datasketches/theta/BackwardConversions.java @@ -20,9 +20,9 @@ package org.apache.datasketches.theta; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * This class converts current compact sketches into prior SerVer 1 and SerVer 2 format for testing. @@ -182,7 +182,7 @@ public static Memory convertSerVer3toSerVer1(final CompactSketch skV3) { * @return a SerVer2 SetSketch as Memory object. */ public static Memory convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) { - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); WritableMemory wmem = null; if (skV3 instanceof EmptyCompactSketch) { diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java index fe2b138ca..5b9e28afe 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java @@ -29,18 +29,18 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.SharedLocal; import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** * @author eshcar */ public class ConcurrentDirectQuickSelectSketchTest { - private static final long SEED = ThetaUtil.DEFAULT_UPDATE_SEED; + private static final long SEED = Util.DEFAULT_UPDATE_SEED; @Test public void checkDirectCompactConversion() { @@ -566,7 +566,7 @@ public void checkBadLgNomLongs() { boolean useMem = true; SharedLocal sl = new SharedLocal(lgK, lgK, useMem); sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte - DirectQuickSelectSketch.writableWrap(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(sl.wmem, Util.DEFAULT_UPDATE_SEED); } @Test @@ -648,14 +648,14 @@ public void checkWrapIllegalFamilyID_direct() { sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte //try to wrap the corrupted mem - DirectQuickSelectSketch.writableWrap(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(sl.wmem, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifySeedConflict() { int lgK = 9; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; boolean useMem = true; SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useMem, true, 1); UpdateSketch shared = sl.shared; @@ -671,7 +671,7 @@ public void checkCorruptLgNomLongs() { SharedLocal sl = new SharedLocal(lgK, lgK, useMem); sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(sl.wmem, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = UnsupportedOperationException.class) diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java index 84ddcb80e..f642bcae5 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java @@ -31,9 +31,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -122,7 +122,7 @@ public void checkIllegalSketchID_UpdateSketch() { public void checkHeapifySeedConflict() { int lgK = 9; long seed = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; SharedLocal sl = new SharedLocal(lgK, lgK, seed); byte[] byteArray = sl.shared.toByteArray(); Memory srcMem = Memory.wrap(byteArray); @@ -136,7 +136,7 @@ public void checkHeapifyCorruptLgNomLongs() { byte[] serArr = sl.shared.toByteArray(); WritableMemory srcMem = WritableMemory.writableWrap(serArr); srcMem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(srcMem, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = UnsupportedOperationException.class) @@ -240,7 +240,7 @@ public void checkHeapifyMemoryEstimating() { byte[] serArr = shared.toByteArray(); Memory srcMem = Memory.wrap(serArr); - UpdateSketch recoveredShared = UpdateSketch.heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch recoveredShared = UpdateSketch.heapify(srcMem, Util.DEFAULT_UPDATE_SEED); final int bytes = Sketch.getMaxUpdateSketchBytes(k); final WritableMemory wmem = WritableMemory.allocate(bytes); @@ -675,7 +675,7 @@ static void println(String s) { } static class SharedLocal { - static final long DefaultSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + static final long DefaultSeed = Util.DEFAULT_UPDATE_SEED; final UpdateSketch shared; final ConcurrentSharedThetaSketch sharedIf; final UpdateSketch local; diff --git a/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java index 4c9b1eccc..77f4ab5b6 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java @@ -32,9 +32,9 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -661,14 +661,14 @@ public void checkWrap() { public void checkDefaultMinSize() { final int k = 32; final WritableMemory mem = WritableMemory.writableWrap(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptionMinSize() { final int k = 16; final WritableMemory mem = WritableMemory.writableWrap(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); } @Test @@ -691,7 +691,7 @@ public void checkFamily() { //cheap trick final int k = 16; final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]); - final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem); + final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); assertEquals(impl.getFamily(), Family.INTERSECTION); } @@ -699,22 +699,22 @@ public void checkFamily() { public void checkExceptions1() { final int k = 16; final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); //corrupt SerVer mem.putByte(PreambleUtil.SER_VER_BYTE, (byte) 2); - IntersectionImpl.wrapInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED, false); + IntersectionImpl.wrapInstance(mem, Util.DEFAULT_UPDATE_SEED, false); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptions2() { final int k = 16; final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); //mem now has non-empty intersection //corrupt empty and CurCount mem.setBits(PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK); mem.putInt(PreambleUtil.RETAINED_ENTRIES_INT, 2); - IntersectionImpl.wrapInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED, false); + IntersectionImpl.wrapInstance(mem, Util.DEFAULT_UPDATE_SEED, false); } //Check Alex's bug intersecting 2 direct full sketches with only overlap of 2 diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 5f608113a..59b9bd96d 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -46,11 +46,11 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -188,14 +188,14 @@ public void checkWrapIllegalFamilyID_direct() { mem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte //try to wrap the corrupted mem - DirectQuickSelectSketch.writableWrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(mem, Util.DEFAULT_UPDATE_SEED); } @Test public void checkHeapifySeedConflict() { int k = 512; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; try (Arena arena = Arena.ofConfined()) { WritableMemory wmem = makeNativeMemory(k, arena); UpdateSketch usk = UpdateSketch.builder().setSeed(seed1).setNominalEntries(k).build(wmem); @@ -216,7 +216,7 @@ public void checkCorruptLgNomLongs() { WritableMemory wmem = makeNativeMemory(k, arena); UpdateSketch.builder().setNominalEntries(k).build(wmem); wmem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(wmem, Util.DEFAULT_UPDATE_SEED); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -768,7 +768,7 @@ public void checkConstructorSrcMemCorruptions() { UpdateSketch usk2; mem1.putByte(FAMILY_BYTE, (byte) 3); //corrupt Family by setting to Compact try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //Pass @@ -776,7 +776,7 @@ public void checkConstructorSrcMemCorruptions() { mem1.putByte(FAMILY_BYTE, (byte) 2); //fix Family mem1.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); //corrupt preLongs try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -784,7 +784,7 @@ public void checkConstructorSrcMemCorruptions() { mem1.putByte(PREAMBLE_LONGS_BYTE, (byte) 3); //fix preLongs mem1.putByte(SER_VER_BYTE, (byte) 2); //corrupt serVer try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -794,7 +794,7 @@ public void checkConstructorSrcMemCorruptions() { mem1.putLong(THETA_LONG, Long.MAX_VALUE >>> 1); //corrupt theta and mem1.putByte(LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -804,7 +804,7 @@ public void checkConstructorSrcMemCorruptions() { byte badFlags = (byte) (BIG_ENDIAN_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK); mem1.putByte(FLAGS_BYTE, badFlags); try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -813,7 +813,7 @@ public void checkConstructorSrcMemCorruptions() { byte[] arr2 = Arrays.copyOfRange(arr1, 0, bytes-1); //corrupt length WritableMemory mem2 = WritableMemory.writableWrap(arr2); try { - usk2 = DirectQuickSelectSketch.writableWrap(mem2, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(mem2, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -832,7 +832,7 @@ public void checkCorruptRFWithInsufficientArray() { usk.update(0); insertLgResizeFactor(mem, 0); // corrupt RF: X1 - UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(mem, Util.DEFAULT_UPDATE_SEED); assertEquals(dqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2 } @@ -861,7 +861,7 @@ public void checkBadLgNomLongs() { WritableMemory mem = WritableMemory.writableWrap(new byte[(k*16) +24]); Sketches.updateSketchBuilder().setNominalEntries(k).build(mem); mem.putByte(LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte - DirectQuickSelectSketch.writableWrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(mem, Util.DEFAULT_UPDATE_SEED); } @Test diff --git a/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java b/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java index 5506090c7..61a7d5911 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectUnionTest.java @@ -34,9 +34,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -500,8 +500,8 @@ public void checkSerVer2Handling() { usk2.update(i); //2*k + 1024 no overlap } - final Memory v2mem1 = convertSerVer3toSerVer2(usk1.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED); - final Memory v2mem2 = convertSerVer3toSerVer2(usk2.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED); + final Memory v2mem1 = convertSerVer3toSerVer2(usk1.compact(true, null), Util.DEFAULT_UPDATE_SEED); + final Memory v2mem2 = convertSerVer3toSerVer2(usk2.compact(true, null), Util.DEFAULT_UPDATE_SEED); final WritableMemory uMem = WritableMemory.writableWrap(new byte[getMaxUnionBytes(k)]); //union memory final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); @@ -530,7 +530,7 @@ public void checkUpdateMemorySpecialCases() { CompactSketch cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), 0.0, 0.0); - final Memory v2mem1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED); + final Memory v2mem1 = convertSerVer3toSerVer2(usk1c, Util.DEFAULT_UPDATE_SEED); uMem = WritableMemory.writableWrap(new byte[getMaxUnionBytes(k)]); //union memory union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); @@ -604,7 +604,7 @@ public void checkEmptySerVer2and3() { Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); union.union(v3mem1); - final Memory v2mem1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED); + final Memory v2mem1 = convertSerVer3toSerVer2(usk1c, Util.DEFAULT_UPDATE_SEED); final WritableMemory v2mem2 = WritableMemory.writableWrap(new byte[16]); v2mem1.copyTo(0, v2mem2, 0, 8); @@ -748,7 +748,7 @@ public void checkPreambleLongsCorruption() { assertEquals(familyID, Family.UNION.getID()); assertEquals(preLongs, Family.UNION.getMaxPreLongs()); PreambleUtil.insertPreLongs(mem, 3); //Corrupt with 3; correct value is 4 - DirectQuickSelectSketch.writableWrap(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(mem, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -759,7 +759,7 @@ public void checkSizeTooSmall() { println(setOp.toString()); final WritableMemory mem2 = WritableMemory.writableWrap(new byte[32]); //for just preamble mem.copyTo(0, mem2, 0, 32); //too small - DirectQuickSelectSketch.writableWrap(mem2, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(mem2, Util.DEFAULT_UPDATE_SEED); } @Test diff --git a/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java index 76dbfc7ba..30325a293 100644 --- a/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java +++ b/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java @@ -25,9 +25,9 @@ import static org.testng.Assert.assertTrue; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -90,7 +90,7 @@ public void checkSerVer1_1Value() { @Test public void checkSerVer2_1PreLong_Empty() { CompactSketch csk = EmptyCompactSketch.getInstance(); - Memory srcMem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory srcMem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); Sketch sketch = Sketch.heapify(srcMem); assertEquals(sketch.isEmpty(), true); assertEquals(sketch.isEstimationMode(), false); @@ -106,7 +106,7 @@ public void checkSerVer2_2PreLongs_Empty() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 2; i++) { usk.update(i); } //exact mode CompactSketch csk = usk.compact(true, null); - Memory srcMem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory srcMem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); WritableMemory srcMemW = WritableMemory.allocate(16); srcMem.copyTo(0, srcMemW, 0, 16); @@ -123,7 +123,7 @@ public void checkSerVer2_3PreLongs_Empty() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 32; i++) { usk.update(i); } //est mode CompactSketch csk = usk.compact(true, null); - Memory srcMem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory srcMem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); WritableMemory srcMemW = WritableMemory.allocate(24); srcMem.copyTo(0, srcMemW, 0, 24); @@ -141,7 +141,7 @@ public void checkSerVer2_2PreLongs_1Value() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); usk.update(1); //exact mode CompactSketch csk = usk.compact(true, null); - Memory srcMem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory srcMem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); Sketch sketch = Sketch.heapify(srcMem); assertEquals(sketch.isEmpty(), false); @@ -158,7 +158,7 @@ public void checkSerVer2_3PreLongs_1Value() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 32; i++) { usk.update(i); } //est mode CompactSketch csk = usk.compact(true, null); - Memory srcMem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory srcMem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); WritableMemory srcMemW = WritableMemory.allocate(32); srcMem.copyTo(0, srcMemW, 0, 32); @@ -182,7 +182,7 @@ public void checkSerVer2_3PreLongs_1Value_ThLessthan1() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 32; i++) { usk.update(i); } //est mode CompactSketch csk = usk.compact(true, null); - Memory srcMem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory srcMem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); WritableMemory srcMemW = WritableMemory.allocate(32); srcMem.copyTo(0, srcMemW, 0, 32); diff --git a/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java index 8682020ef..3ab30702b 100644 --- a/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java @@ -40,6 +40,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -55,7 +56,7 @@ public class HeapAlphaSketchTest { public void checkBadSerVer() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks @@ -93,7 +94,7 @@ public void checkAlphaIncompatibleWithMem() { public void checkIllegalSketchID_UpdateSketch() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks @@ -118,7 +119,7 @@ public void checkIllegalSketchID_UpdateSketch() { public void checkHeapifySeedConflict() { int k = 512; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1) .setNominalEntries(k).build(); byte[] byteArray = usk.toByteArray(); @@ -130,7 +131,7 @@ public void checkHeapifySeedConflict() { public void checkHeapifyByteArrayExact() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); @@ -157,7 +158,7 @@ public void checkHeapifyByteArrayExact() { public void checkHeapifyByteArrayEstimating() { int k = 4096; int u = 2*k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); @@ -186,7 +187,7 @@ public void checkHeapifyByteArrayEstimating() { public void checkHeapifyMemoryEstimating() { int k = 512; int u = 2*k; //thus estimating - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; //int maxBytes = (k << 4) + (Family.ALPHA.getLowPreLongs()); UpdateSketch sk1 = UpdateSketch.builder().setFamily(fam_).setSeed(seed) @@ -204,7 +205,7 @@ public void checkHeapifyMemoryEstimating() { byte[] byteArray = sk1.toByteArray(); Memory mem = Memory.wrap(byteArray); - UpdateSketch sk2 = (UpdateSketch)Sketch.heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch sk2 = (UpdateSketch)Sketch.heapify(mem, Util.DEFAULT_UPDATE_SEED); assertEquals(sk2.getEstimate(), sk1est); assertEquals(sk2.getLowerBound(2), sk1lb); @@ -608,7 +609,7 @@ public void checkMemDeSerExceptions() { final long origThetaLong = mem.getLong(THETA_LONG); try { mem.putLong(THETA_LONG, Long.MAX_VALUE / 2); //Corrupt the theta value - HeapAlphaSketch.heapifyInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + HeapAlphaSketch.heapifyInstance(mem, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { //expected @@ -618,7 +619,7 @@ public void checkMemDeSerExceptions() { WritableMemory mem2 = WritableMemory.writableWrap(byteArray2); mem.copyTo(0, mem2, 0, mem2.getCapacity()); try { - HeapAlphaSketch.heapifyInstance(mem2, ThetaUtil.DEFAULT_UPDATE_SEED); + HeapAlphaSketch.heapifyInstance(mem2, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { //expected @@ -626,7 +627,7 @@ public void checkMemDeSerExceptions() { // force ResizeFactor.X1, and allocated capacity too small insertLgResizeFactor(mem, ResizeFactor.X1.lg()); - UpdateSketch usk = HeapAlphaSketch.heapifyInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch usk = HeapAlphaSketch.heapifyInstance(mem, Util.DEFAULT_UPDATE_SEED); ResizeFactor rf = usk.getResizeFactor(); assertEquals(rf, ResizeFactor.X2);//ResizeFactor recovered to X2, which always works. } @@ -634,7 +635,7 @@ public void checkMemDeSerExceptions() { private static void tryBadMem(WritableMemory mem, int byteOffset, int byteValue) { try { mem.putByte(byteOffset, (byte) byteValue); //Corrupt - HeapAlphaSketch.heapifyInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + HeapAlphaSketch.heapifyInstance(mem, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { //expected diff --git a/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java b/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java index d094e3402..b087d3d54 100644 --- a/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java @@ -30,9 +30,9 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; /** @@ -505,7 +505,7 @@ public void checkGetResult() { @Test public void checkFamily() { - final IntersectionImpl impl = IntersectionImpl.initNewHeapInstance(ThetaUtil.DEFAULT_UPDATE_SEED); + final IntersectionImpl impl = IntersectionImpl.initNewHeapInstance(Util.DEFAULT_UPDATE_SEED); assertEquals(impl.getFamily(), Family.INTERSECTION); } diff --git a/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java index 6cf4c365b..759910f97 100644 --- a/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java @@ -41,6 +41,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -56,7 +57,7 @@ public class HeapQuickSelectSketchTest { public void checkBadSerVer() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed).setNominalEntries(k).build(); HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks @@ -81,7 +82,7 @@ public void checkBadSerVer() { public void checkIllegalSketchID_UpdateSketch() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed).setNominalEntries(k).build(); HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks assertTrue(usk.isEmpty()); @@ -105,7 +106,7 @@ public void checkIllegalSketchID_UpdateSketch() { public void checkHeapifySeedConflict() { int k = 512; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1).setNominalEntries(k).build(); byte[] byteArray = usk.toByteArray(); Memory srcMem = Memory.wrap(byteArray); @@ -117,14 +118,14 @@ public void checkHeapifyCorruptLgNomLongs() { UpdateSketch usk = UpdateSketch.builder().setNominalEntries(16).build(); WritableMemory srcMem = WritableMemory.writableWrap(usk.toByteArray()); srcMem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(srcMem, Util.DEFAULT_UPDATE_SEED); } @Test public void checkHeapifyByteArrayExact() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed).setNominalEntries(k).build(); for (int i=0; i>> 1; + long h = hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1; SingleItemSketch sis = SingleItemSketch.create(i); long halfMax = Long.MAX_VALUE >> 1; int count = sis.getCountLessThanThetaLong(halfMax); @@ -148,7 +148,7 @@ public void checkSerDe() { SingleItemSketch sis = SingleItemSketch.create(1); byte[] byteArr = sis.toByteArray(); Memory mem = Memory.wrap(byteArr); - final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); SingleItemSketch sis2 = SingleItemSketch.heapify(mem, defaultSeedHash); assertEquals(sis2.getEstimate(), 1.0); @@ -304,7 +304,7 @@ public void checkHeapifyInstance() { @Test public void checkSingleItemBadFlags() { - final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); UpdateSketch sk1 = new UpdateSketchBuilder().build(); sk1.update(1); WritableMemory wmem = WritableMemory.allocate(16); diff --git a/src/test/java/org/apache/datasketches/theta/SketchTest.java b/src/test/java/org/apache/datasketches/theta/SketchTest.java index 341d8eb20..8d3d72c75 100644 --- a/src/test/java/org/apache/datasketches/theta/SketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/SketchTest.java @@ -42,6 +42,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -138,7 +139,7 @@ public void checkBuilder() { nameS1 = sk1.getClass().getSimpleName(); assertEquals(nameS1, "HeapQuickSelectSketch"); assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES)); - assertEquals(sk1.getSeed(), ThetaUtil.DEFAULT_UPDATE_SEED); + assertEquals(sk1.getSeed(), Util.DEFAULT_UPDATE_SEED); assertEquals(sk1.getP(), (float)1.0); assertEquals(sk1.getResizeFactor(), ResizeFactor.X8); } @@ -310,7 +311,7 @@ public void checkWrapToHeapifyConversion1() { assertFalse(csk2.hasMemory()); assertEquals(uest1, csk2.getEstimate(), 0.0); - Memory v2mem = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + Memory v2mem = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); csk2 = Sketch.wrap(v2mem); assertFalse(csk2.isDirect()); assertFalse(csk2.hasMemory()); diff --git a/src/test/java/org/apache/datasketches/theta/SketchesTest.java b/src/test/java/org/apache/datasketches/theta/SketchesTest.java index 33b24e289..842bc3450 100644 --- a/src/test/java/org/apache/datasketches/theta/SketchesTest.java +++ b/src/test/java/org/apache/datasketches/theta/SketchesTest.java @@ -38,6 +38,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -81,13 +82,13 @@ public void checkSketchMethods() { CompactSketch csk2 = (CompactSketch)heapifySketch(mem); assertEquals((int)csk2.getEstimate(), k); - csk2 = (CompactSketch)heapifySketch(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + csk2 = (CompactSketch)heapifySketch(mem, Util.DEFAULT_UPDATE_SEED); assertEquals((int)csk2.getEstimate(), k); csk2 = (CompactSketch)wrapSketch(mem); assertEquals((int)csk2.getEstimate(), k); - csk2 = (CompactSketch)wrapSketch(mem, ThetaUtil.DEFAULT_UPDATE_SEED); + csk2 = (CompactSketch)wrapSketch(mem, Util.DEFAULT_UPDATE_SEED); assertEquals((int)csk2.getEstimate(), k); } @@ -114,7 +115,7 @@ public void checkSetOpMethods() { cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); - union2 = (Union)heapifySetOperation(uMem, ThetaUtil.DEFAULT_UPDATE_SEED); + union2 = (Union)heapifySetOperation(uMem, Util.DEFAULT_UPDATE_SEED); cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); @@ -122,7 +123,7 @@ public void checkSetOpMethods() { cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); - union2 = (Union)wrapSetOperation(uMem, ThetaUtil.DEFAULT_UPDATE_SEED); + union2 = (Union)wrapSetOperation(uMem, Util.DEFAULT_UPDATE_SEED); cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java index f7ec2d2eb..5259554fb 100644 --- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java @@ -29,10 +29,10 @@ import java.nio.ByteOrder; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; public class UnionImplTest { @@ -120,7 +120,7 @@ public void checkUpdateWithMemV4Exact() { @Test public void checkFastWrap() { final int k = 16; - final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + final long seed = Util.DEFAULT_UPDATE_SEED; final int unionSize = Sketches.getMaxUnionBytes(k); final WritableMemory srcMem = WritableMemory.writableWrap(new byte[unionSize]); final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(srcMem); @@ -157,7 +157,7 @@ public void checkVer2FamilyException() { sketch.update(i); } final CompactSketch csk = sketch.compact(true, null); - final WritableMemory v2mem = (WritableMemory) convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + final WritableMemory v2mem = (WritableMemory) convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); v2mem.putByte(PreambleUtil.FAMILY_BYTE, (byte)0); //corrupt family @@ -185,7 +185,7 @@ public void checkVer1FamilyException() { public void checkVer2EmptyHandling() { final int k = 16; final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - final Memory mem = convertSerVer3toSerVer2(sketch.compact(), ThetaUtil.DEFAULT_UPDATE_SEED); + final Memory mem = convertSerVer3toSerVer2(sketch.compact(), Util.DEFAULT_UPDATE_SEED); final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); union.union(mem); } @@ -221,7 +221,7 @@ public void checkRestricted() { final Union union = Sketches.setOperationBuilder().buildUnion(); assertTrue(union.isEmpty()); assertEquals(union.getThetaLong(), Long.MAX_VALUE); - assertEquals(union.getSeedHash(), ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + assertEquals(union.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); assertEquals(union.getRetainedEntries(), 0); assertEquals(union.getCache().length, 128); //only applies to stateful } diff --git a/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java b/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java index 90177027e..a41e22340 100644 --- a/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/UpdateSketchTest.java @@ -35,6 +35,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; @@ -162,7 +163,7 @@ public void checkIncompatibleFamily() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); sk.update(1); WritableMemory wmem = WritableMemory.writableWrap(sk.compact().toByteArray()); - UpdateSketch.wrap(wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch.wrap(wmem, Util.DEFAULT_UPDATE_SEED); } @Test @@ -172,13 +173,13 @@ public void checkCorruption() { WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); try { wmem.putByte(SER_VER_BYTE, (byte) 2); - UpdateSketch.wrap(wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch.wrap(wmem, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { } try { wmem.putByte(SER_VER_BYTE, (byte) 3); wmem.putByte(PREAMBLE_LONGS_BYTE, (byte) 2); - UpdateSketch.wrap(wmem, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch.wrap(wmem, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { } } diff --git a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java index ae1730684..b78acc4ca 100644 --- a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java +++ b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java @@ -27,7 +27,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; /** @@ -53,7 +53,7 @@ public void checkExactAnotB_AvalidNoOverlap() { assertTrue(aNb.isEmpty()); //only applies to stateful assertTrue(aNb.getCache().length == 0); //only applies to stateful assertEquals(aNb.getThetaLong(), Long.MAX_VALUE); //only applies to stateful - assertEquals(aNb.getSeedHash(), ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + assertEquals(aNb.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); aNb.setA(usk1); aNb.notB(usk2); @@ -308,7 +308,7 @@ public void checkGetResult() { @Test public void checkGetFamily() { //cheap trick - final AnotBimpl anotb = new AnotBimpl(ThetaUtil.DEFAULT_UPDATE_SEED); + final AnotBimpl anotb = new AnotBimpl(Util.DEFAULT_UPDATE_SEED); assertEquals(anotb.getFamily(), Family.A_NOT_B); } diff --git a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java index 1addaa3b4..c63a639fc 100644 --- a/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java +++ b/src/test/java/org/apache/datasketches/theta2/BackwardConversions.java @@ -27,7 +27,6 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; /** * This class converts current compact sketches into prior SerVer 1 and SerVer 2 format for testing. @@ -188,7 +187,7 @@ public static MemorySegment convertSerVer3toSerVer1(final CompactSketch skV3) { * @return a SerVer2 SetSketch as MemorySegment object. */ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) { - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); MemorySegment wseg = null; if (skV3 instanceof EmptyCompactSketch) { diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java index 2ac9885cf..a0d21664f 100644 --- a/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java @@ -35,14 +35,13 @@ import org.apache.datasketches.common.Util; import org.apache.datasketches.theta2.ConcurrentHeapQuickSelectSketchTest.SharedLocal; import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** * @author eshcar */ public class ConcurrentDirectQuickSelectSketchTest { - private static final long SEED = ThetaUtil.DEFAULT_UPDATE_SEED; + private static final long SEED = Util.DEFAULT_UPDATE_SEED; @Test public void checkDirectCompactConversion() { @@ -568,7 +567,7 @@ public void checkBadLgNomLongs() { boolean useSeg = true; SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte - DirectQuickSelectSketch.writableWrap(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED); } @Test @@ -650,14 +649,14 @@ public void checkWrapIllegalFamilyID_direct() { sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte //try to wrap the corrupted seg - DirectQuickSelectSketch.writableWrap(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifySeedConflict() { int lgK = 9; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; boolean useSeg = true; SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1); UpdateSketch shared = sl.shared; @@ -673,7 +672,7 @@ public void checkCorruptLgNomLongs() { SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(sl.wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(sl.wseg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = UnsupportedOperationException.class) diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java index 76734dfce..25b08e9e4 100644 --- a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java @@ -34,7 +34,6 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -123,7 +122,7 @@ public void checkIllegalSketchID_UpdateSketch() { public void checkHeapifySeedConflict() { int lgK = 9; long seed = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; SharedLocal sl = new SharedLocal(lgK, lgK, seed); byte[] byteArray = sl.shared.toByteArray(); MemorySegment srcSeg = MemorySegment.ofArray(byteArray); @@ -137,7 +136,7 @@ public void checkHeapifyCorruptLgNomLongs() { byte[] serArr = sl.shared.toByteArray(); MemorySegment srcSeg = MemorySegment.ofArray(serArr); srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = UnsupportedOperationException.class) @@ -241,7 +240,7 @@ public void checkHeapifyMemorySegmentEstimating() { byte[] serArr = shared.toByteArray(); MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly(); - UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); final int bytes = Sketch.getMaxUpdateSketchBytes(k); final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); @@ -676,7 +675,7 @@ static void println(String s) { } static class SharedLocal { - static final long DefaultSeed = ThetaUtil.DEFAULT_UPDATE_SEED; + static final long DefaultSeed = Util.DEFAULT_UPDATE_SEED; final UpdateSketch shared; final ConcurrentSharedThetaSketch sharedIf; final UpdateSketch local; diff --git a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java index 0418d2ddd..a706efba4 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java @@ -36,7 +36,6 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -663,14 +662,14 @@ public void checkWrap() { public void checkDefaultMinSize() { final int k = 32; final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptionMinSize() { final int k = 16; final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); } @Test @@ -693,7 +692,7 @@ public void checkFamily() { //cheap trick final int k = 16; final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); - final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); assertEquals(impl.getFamily(), Family.INTERSECTION); } @@ -701,22 +700,22 @@ public void checkFamily() { public void checkExceptions1() { final int k = 16; final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); //corrupt SerVer seg.set(JAVA_BYTE, PreambleUtil.SER_VER_BYTE, (byte) 2); - IntersectionImpl.wrapInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptions2() { final int k = 16; final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, seg); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); //seg now has non-empty intersection //corrupt empty and CurCount Util.setBits(seg, PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK); seg.set(JAVA_INT_UNALIGNED, PreambleUtil.RETAINED_ENTRIES_INT, 2); - IntersectionImpl.wrapInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED, false); + IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false); } //Check Alex's bug intersecting 2 direct full sketches with only overlap of 2 diff --git a/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java index 1eced4555..1db963456 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java @@ -50,8 +50,8 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; /** @@ -189,14 +189,14 @@ public void checkWrapIllegalFamilyID_direct() { seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte //try to wrap the corrupted seg - DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(seg, Util.DEFAULT_UPDATE_SEED); } @Test public void checkHeapifySeedConflict() { int k = 512; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; try (Arena arena = Arena.ofConfined()) { MemorySegment wseg = makeNativeMemorySegment(k, arena); UpdateSketch usk = UpdateSketch.builder().setSeed(seed1).setNominalEntries(k).build(wseg); @@ -217,7 +217,7 @@ public void checkCorruptLgNomLongs() { MemorySegment wseg = makeNativeMemorySegment(k, arena); UpdateSketch.builder().setNominalEntries(k).build(wseg); wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(wseg, Util.DEFAULT_UPDATE_SEED); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -769,7 +769,7 @@ public void checkConstructorSrcSegCorruptions() { UpdateSketch usk2; seg1.set(JAVA_BYTE, FAMILY_BYTE, (byte) 3); //corrupt Family by setting to Compact try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //Pass @@ -777,7 +777,7 @@ public void checkConstructorSrcSegCorruptions() { seg1.set(JAVA_BYTE, FAMILY_BYTE, (byte) 2); //fix Family seg1.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); //corrupt preLongs try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -785,7 +785,7 @@ public void checkConstructorSrcSegCorruptions() { seg1.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 3); //fix preLongs seg1.set(JAVA_BYTE, SER_VER_BYTE, (byte) 2); //corrupt serVer try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -795,7 +795,7 @@ public void checkConstructorSrcSegCorruptions() { seg1.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE >>> 1); //corrupt theta and seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -805,7 +805,7 @@ public void checkConstructorSrcSegCorruptions() { byte badFlags = (byte) (BIG_ENDIAN_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK); seg1.set(JAVA_BYTE, FLAGS_BYTE, badFlags); try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -814,7 +814,7 @@ public void checkConstructorSrcSegCorruptions() { byte[] arr2 = Arrays.copyOfRange(arr1, 0, bytes-1); //corrupt length MemorySegment seg2 = MemorySegment.ofArray(arr2); try { - usk2 = DirectQuickSelectSketch.writableWrap(seg2, ThetaUtil.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg2, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -833,7 +833,7 @@ public void checkCorruptRFWithInsufficientArray() { usk.update(0); insertLgResizeFactor(seg, 0); // corrupt RF: X1 - UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(seg, Util.DEFAULT_UPDATE_SEED); assertEquals(dqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2 } @@ -862,7 +862,7 @@ public void checkBadLgNomLongs() { MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) +24]); Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); seg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte - DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(seg, Util.DEFAULT_UPDATE_SEED); } @Test diff --git a/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java index 7194f70a7..d587172e0 100644 --- a/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java @@ -36,7 +36,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; /** @@ -500,8 +500,8 @@ public void checkSerVer2Handling() { usk2.update(i); //2*k + 1024 no overlap } - final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); - final MemorySegment v2seg2 = convertSerVer3toSerVer2(usk2.compact(true, null), ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1.compact(true, null), Util.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg2 = convertSerVer3toSerVer2(usk2.compact(true, null), Util.DEFAULT_UPDATE_SEED).asReadOnly(); final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); @@ -530,7 +530,7 @@ public void checkUpdateSegmentSpecialCases() { CompactSketch cOut = union.getResult(true, null); assertEquals(cOut.getEstimate(), 0.0, 0.0); - final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1c, Util.DEFAULT_UPDATE_SEED).asReadOnly(); uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); //union segment union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); @@ -602,7 +602,7 @@ public void checkEmptySerVer2and3() { Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(v3seg1); - final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1c, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + final MemorySegment v2seg1 = convertSerVer3toSerVer2(usk1c, Util.DEFAULT_UPDATE_SEED).asReadOnly(); final MemorySegment v2seg2 = MemorySegment.ofArray(new byte[16]); MemorySegment.copy(v2seg1, 0, v2seg2, 0, 8); @@ -746,7 +746,7 @@ public void checkPreambleLongsCorruption() { assertEquals(familyID, Family.UNION.getID()); assertEquals(preLongs, Family.UNION.getMaxPreLongs()); PreambleUtil.insertPreLongs(seg, 3); //Corrupt with 3; correct value is 4 - DirectQuickSelectSketch.writableWrap(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(seg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -757,7 +757,7 @@ public void checkSizeTooSmall() { println(setOp.toString()); final MemorySegment seg2 = MemorySegment.ofArray(new byte[32]); //for just preamble MemorySegment.copy(seg, 0, seg2, 0, 32); //too small - DirectQuickSelectSketch.writableWrap(seg2, ThetaUtil.DEFAULT_UPDATE_SEED); + DirectQuickSelectSketch.writableWrap(seg2, Util.DEFAULT_UPDATE_SEED); } @Test diff --git a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java index 09d7cc88d..a90703a68 100644 --- a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java +++ b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java @@ -29,7 +29,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; /** @@ -93,7 +93,7 @@ public void checkSerVer1_1Value() { @Test public void checkSerVer2_1PreLong_Empty() { CompactSketch csk = EmptyCompactSketch.getInstance(); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); Sketch sketch = Sketch.heapify(srcSeg); assertEquals(sketch.isEmpty(), true); assertEquals(sketch.isEstimationMode(), false); @@ -109,7 +109,7 @@ public void checkSerVer2_2PreLongs_Empty() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 2; i++) { usk.update(i); } //exact mode CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); MemorySegment srcWseg = MemorySegment.ofArray(new byte[16]); MemorySegment.copy(srcSeg, 0, srcWseg, 0, 16); @@ -126,7 +126,7 @@ public void checkSerVer2_3PreLongs_Empty() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 32; i++) { usk.update(i); } //est mode CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); MemorySegment srcWseg = MemorySegment.ofArray(new byte[24]); MemorySegment.copy(srcSeg, 0, srcWseg, 0, 24); @@ -144,7 +144,7 @@ public void checkSerVer2_2PreLongs_1Value() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); usk.update(1); //exact mode CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); Sketch sketch = Sketch.heapify(srcSeg); assertEquals(sketch.isEmpty(), false); @@ -161,7 +161,7 @@ public void checkSerVer2_3PreLongs_1Value() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 32; i++) { usk.update(i); } //est mode CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); @@ -185,7 +185,7 @@ public void checkSerVer2_3PreLongs_1Value_ThLessthan1() { UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); for (int i = 0; i < 32; i++) { usk.update(i); } //est mode CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED).asReadOnly(); + MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); diff --git a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java index fbc80b091..5a6b828f7 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java @@ -44,6 +44,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; @@ -57,7 +58,7 @@ public class HeapAlphaSketchTest { public void checkBadSerVer() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks @@ -95,7 +96,7 @@ public void checkAlphaIncompatibleWithSeg() { public void checkIllegalSketchID_UpdateSketch() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks @@ -120,7 +121,7 @@ public void checkIllegalSketchID_UpdateSketch() { public void checkHeapifySeedConflict() { int k = 512; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1) .setNominalEntries(k).build(); byte[] byteArray = usk.toByteArray(); @@ -132,7 +133,7 @@ public void checkHeapifySeedConflict() { public void checkHeapifyByteArrayExact() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); @@ -159,7 +160,7 @@ public void checkHeapifyByteArrayExact() { public void checkHeapifyByteArrayEstimating() { int k = 4096; int u = 2*k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) .setNominalEntries(k).build(); @@ -188,7 +189,7 @@ public void checkHeapifyByteArrayEstimating() { public void checkHeapifyMemorySegmentEstimating() { int k = 512; int u = 2*k; //thus estimating - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; //int maxBytes = (k << 4) + (Family.ALPHA.getLowPreLongs()); UpdateSketch sk1 = UpdateSketch.builder().setFamily(fam_).setSeed(seed) @@ -206,7 +207,7 @@ public void checkHeapifyMemorySegmentEstimating() { byte[] byteArray = sk1.toByteArray(); MemorySegment seg = MemorySegment.ofArray(byteArray).asReadOnly(); - UpdateSketch sk2 = (UpdateSketch)Sketch.heapify(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch sk2 = (UpdateSketch)Sketch.heapify(seg, Util.DEFAULT_UPDATE_SEED); assertEquals(sk2.getEstimate(), sk1est); assertEquals(sk2.getLowerBound(2), sk1lb); @@ -610,7 +611,7 @@ public void checkSegDeSerExceptions() { final long origThetaLong = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); try { seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE / 2); //Corrupt the theta value - HeapAlphaSketch.heapifyInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + HeapAlphaSketch.heapifyInstance(seg, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { //expected @@ -620,7 +621,7 @@ public void checkSegDeSerExceptions() { MemorySegment seg2 = MemorySegment.ofArray(byteArray2); MemorySegment.copy(seg, 0, seg2, 0, seg2.byteSize()); try { - HeapAlphaSketch.heapifyInstance(seg2, ThetaUtil.DEFAULT_UPDATE_SEED); + HeapAlphaSketch.heapifyInstance(seg2, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { //expected @@ -628,7 +629,7 @@ public void checkSegDeSerExceptions() { // force ResizeFactor.X1, and allocated capacity too small insertLgResizeFactor(seg, ResizeFactor.X1.lg()); - UpdateSketch usk = HeapAlphaSketch.heapifyInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch usk = HeapAlphaSketch.heapifyInstance(seg, Util.DEFAULT_UPDATE_SEED); ResizeFactor rf = usk.getResizeFactor(); assertEquals(rf, ResizeFactor.X2);//ResizeFactor recovered to X2, which always works. } @@ -636,7 +637,7 @@ public void checkSegDeSerExceptions() { private static void tryBadSeg(MemorySegment seg, int byteOffset, int byteValue) { try { seg.set(JAVA_BYTE, byteOffset, (byte) byteValue); //Corrupt - HeapAlphaSketch.heapifyInstance(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + HeapAlphaSketch.heapifyInstance(seg, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { //expected diff --git a/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java index 4dcbe934c..033c3c4f4 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java @@ -33,7 +33,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; /** @@ -506,7 +506,7 @@ public void checkGetResult() { @Test public void checkFamily() { - final IntersectionImpl impl = IntersectionImpl.initNewHeapInstance(ThetaUtil.DEFAULT_UPDATE_SEED); + final IntersectionImpl impl = IntersectionImpl.initNewHeapInstance(Util.DEFAULT_UPDATE_SEED); assertEquals(impl.getFamily(), Family.INTERSECTION); } diff --git a/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java index bbce7862e..fd9129d81 100644 --- a/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java @@ -58,7 +58,7 @@ public class HeapQuickSelectSketchTest { public void checkBadSerVer() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed).setNominalEntries(k).build(); HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks @@ -83,7 +83,7 @@ public void checkBadSerVer() { public void checkIllegalSketchID_UpdateSketch() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed).setNominalEntries(k).build(); HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks assertTrue(usk.isEmpty()); @@ -107,7 +107,7 @@ public void checkIllegalSketchID_UpdateSketch() { public void checkHeapifySeedConflict() { int k = 512; long seed1 = 1021; - long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed2 = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1).setNominalEntries(k).build(); byte[] byteArray = usk.toByteArray(); MemorySegment srcSeg = MemorySegment.ofArray(byteArray).asReadOnly(); @@ -119,14 +119,14 @@ public void checkHeapifyCorruptLgNomLongs() { UpdateSketch usk = UpdateSketch.builder().setNominalEntries(16).build(); MemorySegment srcSeg = MemorySegment.ofArray(usk.toByteArray()); srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + Sketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } @Test public void checkHeapifyByteArrayExact() { int k = 512; int u = k; - long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + long seed = Util.DEFAULT_UPDATE_SEED; UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed).setNominalEntries(k).build(); for (int i=0; i>> 1; + long h = hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1; SingleItemSketch sis = SingleItemSketch.create(i); long halfMax = Long.MAX_VALUE >> 1; int count = sis.getCountLessThanThetaLong(halfMax); @@ -149,7 +149,7 @@ public void checkSerDe() { SingleItemSketch sis = SingleItemSketch.create(1); byte[] byteArr = sis.toByteArray(); MemorySegment seg = MemorySegment.ofArray(byteArr); - final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); SingleItemSketch sis2 = SingleItemSketch.heapify(seg, defaultSeedHash); assertEquals(sis2.getEstimate(), 1.0); @@ -305,7 +305,7 @@ public void checkHeapifyInstance() { @Test public void checkSingleItemBadFlags() { - final short defaultSeedHash = ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED); + final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); UpdateSketch sk1 = new UpdateSketchBuilder().build(); sk1.update(1); MemorySegment wseg = MemorySegment.ofArray(new byte[16]); diff --git a/src/test/java/org/apache/datasketches/theta2/SketchTest.java b/src/test/java/org/apache/datasketches/theta2/SketchTest.java index fca348c45..77a8246e3 100644 --- a/src/test/java/org/apache/datasketches/theta2/SketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SketchTest.java @@ -139,7 +139,7 @@ public void checkBuilder() { nameS1 = sk1.getClass().getSimpleName(); assertEquals(nameS1, "HeapQuickSelectSketch"); assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES)); - assertEquals(sk1.getSeed(), ThetaUtil.DEFAULT_UPDATE_SEED); + assertEquals(sk1.getSeed(), Util.DEFAULT_UPDATE_SEED); assertEquals(sk1.getP(), (float)1.0); assertEquals(sk1.getResizeFactor(), ResizeFactor.X8); } @@ -312,7 +312,7 @@ public void checkWrapToHeapifyConversion1() { assertFalse(csk2.hasMemorySegment()); assertEquals(uest1, csk2.getEstimate(), 0.0); - MemorySegment v2seg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + MemorySegment v2seg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); csk2 = Sketch.wrap(v2seg); assertFalse(csk2.isDirect()); assertFalse(csk2.hasMemorySegment()); diff --git a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java index 074a19459..c9cdef434 100644 --- a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java +++ b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java @@ -40,6 +40,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; @@ -81,13 +82,13 @@ public void checkSketchMethods() { CompactSketch csk2 = (CompactSketch)heapifySketch(seg); assertEquals((int)csk2.getEstimate(), k); - csk2 = (CompactSketch)heapifySketch(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + csk2 = (CompactSketch)heapifySketch(seg, Util.DEFAULT_UPDATE_SEED); assertEquals((int)csk2.getEstimate(), k); csk2 = (CompactSketch)wrapSketch(seg); assertEquals((int)csk2.getEstimate(), k); - csk2 = (CompactSketch)wrapSketch(seg, ThetaUtil.DEFAULT_UPDATE_SEED); + csk2 = (CompactSketch)wrapSketch(seg, Util.DEFAULT_UPDATE_SEED); assertEquals((int)csk2.getEstimate(), k); } @@ -114,7 +115,7 @@ public void checkSetOpMethods() { cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); - union2 = (Union)heapifySetOperation(uSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + union2 = (Union)heapifySetOperation(uSeg, Util.DEFAULT_UPDATE_SEED); cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); @@ -122,7 +123,7 @@ public void checkSetOpMethods() { cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); - union2 = (Union)wrapSetOperation(uSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + union2 = (Union)wrapSetOperation(uSeg, Util.DEFAULT_UPDATE_SEED); cSk = union2.getResult(true, null); assertEquals((int)cSk.getEstimate(), 3*k/2); diff --git a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java index f3a36de5d..0a088f235 100644 --- a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java +++ b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java @@ -34,7 +34,6 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; public class UnionImplTest { @@ -122,7 +121,7 @@ public void checkUpdateWithSegV4Exact() { @Test public void checkFastWrap() { final int k = 16; - final long seed = ThetaUtil.DEFAULT_UPDATE_SEED; + final long seed = Util.DEFAULT_UPDATE_SEED; final int unionSize = Sketches.getMaxUnionBytes(k); final MemorySegment srcSeg = MemorySegment.ofArray(new byte[unionSize]); final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(srcSeg); @@ -159,7 +158,7 @@ public void checkVer2FamilyException() { sketch.update(i); } final CompactSketch csk = sketch.compact(true, null); - final MemorySegment v2seg = convertSerVer3toSerVer2(csk, ThetaUtil.DEFAULT_UPDATE_SEED); + final MemorySegment v2seg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); v2seg.set(JAVA_BYTE, PreambleUtil.FAMILY_BYTE, (byte)0); //corrupt family @@ -187,7 +186,7 @@ public void checkVer1FamilyException() { public void checkVer2EmptyHandling() { final int k = 16; final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - final MemorySegment seg = convertSerVer3toSerVer2(sketch.compact(), ThetaUtil.DEFAULT_UPDATE_SEED); + final MemorySegment seg = convertSerVer3toSerVer2(sketch.compact(), Util.DEFAULT_UPDATE_SEED); final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); union.union(seg); } @@ -227,7 +226,7 @@ public void checkRestricted() { final Union union = Sketches.setOperationBuilder().buildUnion(); assertTrue(union.isEmpty()); assertEquals(union.getThetaLong(), Long.MAX_VALUE); - assertEquals(union.getSeedHash(), ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + assertEquals(union.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); assertEquals(union.getRetainedEntries(), 0); assertEquals(union.getCache().length, 128); //only applies to stateful } diff --git a/src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java b/src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java index b0b4f5d0f..280e0d75e 100644 --- a/src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java @@ -38,6 +38,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon2.ThetaUtil; import org.testng.annotations.Test; @@ -158,7 +159,7 @@ public void checkIncompatibleFamily() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); sk.update(1); MemorySegment wseg = MemorySegment.ofArray(sk.compact().toByteArray()); - UpdateSketch.wrap(wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch.wrap(wseg, Util.DEFAULT_UPDATE_SEED); } @Test @@ -168,13 +169,13 @@ public void checkCorruption() { MemorySegment wseg = MemorySegment.ofArray(sk.toByteArray()); try { wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 2); - UpdateSketch.wrap(wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch.wrap(wseg, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { } try { wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 3); wseg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 2); - UpdateSketch.wrap(wseg, ThetaUtil.DEFAULT_UPDATE_SEED); + UpdateSketch.wrap(wseg, Util.DEFAULT_UPDATE_SEED); fail(); } catch (SketchesArgumentException e) { } } diff --git a/src/test/java/org/apache/datasketches/thetacommon/QuickSelectTest.java b/src/test/java/org/apache/datasketches/thetacommon/QuickSelectTest.java index 2d7e3c77e..f812a7509 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/QuickSelectTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon/QuickSelectTest.java @@ -20,9 +20,9 @@ package org.apache.datasketches.thetacommon; import static java.lang.String.format; -import static org.apache.datasketches.thetacommon.QuickSelect.select; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; -import static org.apache.datasketches.thetacommon.QuickSelect.selectIncludingZeros; +import static org.apache.datasketches.common.QuickSelect.select; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; +import static org.apache.datasketches.common.QuickSelect.selectIncludingZeros; import java.util.Random; diff --git a/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java b/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java index e4ca129b1..71afa354b 100644 --- a/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java @@ -20,9 +20,9 @@ package org.apache.datasketches.thetacommon2; import static java.lang.String.format; -import static org.apache.datasketches.thetacommon.QuickSelect.select; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; -import static org.apache.datasketches.thetacommon.QuickSelect.selectIncludingZeros; +import static org.apache.datasketches.common.QuickSelect.select; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; +import static org.apache.datasketches.common.QuickSelect.selectIncludingZeros; import java.util.Random; diff --git a/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java index 2a905694a..5a896ba86 100644 --- a/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java +++ b/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java @@ -59,6 +59,6 @@ public void validateFamilyWrongPreambleLength() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkBadSeedHash() { - org.apache.datasketches.tuple.Util.computeSeedHash(50541); + org.apache.datasketches.common.Util.computeSeedHash(50541); } } diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java index f9818bb38..e05574595 100644 --- a/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java @@ -23,9 +23,9 @@ import static org.testng.Assert.assertTrue; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.Util; import org.apache.datasketches.theta.UpdateSketch; import org.apache.datasketches.theta.UpdateSketchBuilder; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.AnotB; import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; @@ -625,6 +625,6 @@ private static void checkInvalidUpdate(float p, long updateKey) { } static long getLongHash(long v) { - return (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + return (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; } } diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java index 43e281c2c..f4a7b765d 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java @@ -21,9 +21,9 @@ import static org.testng.Assert.assertEquals; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -116,7 +116,7 @@ public void checkGetValuesAndKeysMethods() { Memory hqsskMem = Memory.wrap(hqssk.toByteArray()); DirectArrayOfDoublesQuickSelectSketchR dqssk = - (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskMem, ThetaUtil.DEFAULT_UPDATE_SEED); + (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskMem, Util.DEFAULT_UPDATE_SEED); final double[][] values3 = dqssk.getValues(); final double[] values1d3 = dqssk.getValuesAsOneDimension(); final long[] keys3 = dqssk.getKeys(); @@ -127,7 +127,7 @@ public void checkGetValuesAndKeysMethods() { Memory hcskMem = Memory.wrap(hcsk.toByteArray()); DirectArrayOfDoublesCompactSketch dcsk2 = - (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskMem, ThetaUtil.DEFAULT_UPDATE_SEED); + (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskMem, Util.DEFAULT_UPDATE_SEED); final double[][] values4 = dqssk.getValues(); final double[] values1d4 = dqssk.getValuesAsOneDimension(); final long[] keys4 = dqssk.getKeys(); diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java index 635a41c64..5d647716a 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java @@ -23,7 +23,7 @@ import static org.apache.datasketches.hash.MurmurHash3.hash; import static org.testng.Assert.assertTrue; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; public class CornerCaseArrayOfDoublesSetOperationsTest { @@ -551,13 +551,13 @@ public void printTable() { println(LS + "Longs"); for (long v = 1L; v < 10; v++) { - long hash = (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + long hash = (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; printLong(v, hash); } } static long getLongHash(long v) { - return (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + return (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; } static void printLong(long v, long hash) { diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java index b86a6839f..34160a41d 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java @@ -19,10 +19,12 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static org.apache.datasketches.common.Util.computeSeedHash; +import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; + import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.datasketches.tuple.Util; +//import org.apache.datasketches.tuple.Util; import org.testng.Assert; import org.testng.annotations.Test; @@ -68,7 +70,7 @@ public void exactModeFromQuickSelectSketch() { Assert.assertEquals(sketch.getRetainedEntries(), 3); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + Assert.assertEquals(sketch.getSeedHash(), computeSeedHash(DEFAULT_UPDATE_SEED)); double[][] values = sketch.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { diff --git a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java index 65311b17c..b43863d12 100644 --- a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java @@ -54,6 +54,6 @@ public void validateFamilyWrongPreambleLength() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkBadSeedHash() { - org.apache.datasketches.tuple.Util.computeSeedHash(50541); + org.apache.datasketches.common.Util.computeSeedHash(50541); } } diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java index fc90e4215..635fe19b2 100644 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java @@ -23,9 +23,9 @@ import static org.testng.Assert.assertTrue; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.common.Util; import org.apache.datasketches.theta2.UpdateSketch; import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.thetacommon2.ThetaUtil; import org.apache.datasketches.tuple2.AnotB; import org.apache.datasketches.tuple2.CompactSketch; import org.apache.datasketches.tuple2.Intersection; @@ -625,6 +625,6 @@ private static void checkInvalidUpdate(float p, long updateKey) { } static long getLongHash(long v) { - return (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + return (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; } } diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java index 5aea6c90e..49699311d 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java @@ -23,7 +23,7 @@ import java.lang.foreign.MemorySegment; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.Assert; import org.testng.annotations.Test; @@ -116,7 +116,7 @@ public void checkGetValuesAndKeysMethods() { MemorySegment hqsskSeg = MemorySegment.ofArray(hqssk.toByteArray()); DirectArrayOfDoublesQuickSelectSketchR dqssk = - (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskSeg, Util.DEFAULT_UPDATE_SEED); final double[][] values3 = dqssk.getValues(); final double[] values1d3 = dqssk.getValuesAsOneDimension(); final long[] keys3 = dqssk.getKeys(); @@ -127,7 +127,7 @@ public void checkGetValuesAndKeysMethods() { MemorySegment hcskSeg = MemorySegment.ofArray(hcsk.toByteArray()); DirectArrayOfDoublesCompactSketch dcsk2 = - (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskSeg, ThetaUtil.DEFAULT_UPDATE_SEED); + (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskSeg, Util.DEFAULT_UPDATE_SEED); final double[][] values4 = dqssk.getValues(); final double[] values1d4 = dqssk.getValuesAsOneDimension(); final long[] keys4 = dqssk.getKeys(); diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java index fa9412207..d03e9aa52 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java @@ -23,7 +23,7 @@ import static org.apache.datasketches.hash.MurmurHash3.hash; import static org.testng.Assert.assertTrue; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.annotations.Test; public class CornerCaseArrayOfDoublesSetOperationsTest { @@ -551,13 +551,13 @@ public void printTable() { println(LS + "Longs"); for (long v = 1L; v < 10; v++) { - long hash = (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + long hash = (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; printLong(v, hash); } } static long getLongHash(long v) { - return (hash(v, ThetaUtil.DEFAULT_UPDATE_SEED)[0]) >>> 1; + return (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; } static void printLong(long v, long hash) { diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java index fa2cca8ab..39a584fec 100644 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java @@ -22,7 +22,7 @@ import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; +import org.apache.datasketches.common.Util; import org.testng.Assert; import org.testng.annotations.Test; @@ -68,7 +68,7 @@ public void exactModeFromQuickSelectSketch() { Assert.assertEquals(sketch.getRetainedEntries(), 3); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getSeedHash(), ThetaUtil.computeSeedHash(ThetaUtil.DEFAULT_UPDATE_SEED)); + Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); double[][] values = sketch.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { From 5f199a35903b99e8707584054204ca1f1f4cd8f4 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 16:40:06 -0700 Subject: [PATCH 21/25] More prep before deletions. --- .../tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java | 1 - .../tuple/arrayofdoubles/ArrayOfDoublesIntersection.java | 1 - .../tuple/arrayofdoubles/ArrayOfDoublesUnion.java | 1 - .../arrayofdoubles/DirectArrayOfDoublesCompactSketch.java | 6 ++++-- .../DirectArrayOfDoublesQuickSelectSketch.java | 5 ++++- .../arrayofdoubles/HeapArrayOfDoublesCompactSketch.java | 1 - .../arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java | 4 +++- 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java index 4bd241768..b4552cc73 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java @@ -35,7 +35,6 @@ import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; import org.apache.datasketches.thetacommon.ThetaUtil; -import org.apache.datasketches.tuple.Util; /** * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches. diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java index 386b70d3b..26f0f2275 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java @@ -24,7 +24,6 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.tuple.Util; /** * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles. diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java index d76754b85..b0a95eecf 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java @@ -27,7 +27,6 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.tuple.SerializerDeserializer; -import org.apache.datasketches.tuple.Util; /** * The base class for unions of tuple sketches of type ArrayOfDoubles. diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index f3497709c..2e0dc3628 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -19,6 +19,9 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static org.apache.datasketches.common.Util.checkSeedHashes; +import static org.apache.datasketches.common.Util.computeSeedHash; + import java.nio.ByteOrder; import org.apache.datasketches.common.Family; @@ -26,7 +29,6 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.tuple.SerializerDeserializer; -import org.apache.datasketches.tuple.Util; /** * Direct Compact Sketch of type ArrayOfDoubles. @@ -178,7 +180,7 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - org.apache.datasketches.common.Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), org.apache.datasketches.common.Util.computeSeedHash(seed)); + checkSeedHashes(mem.getShort(SEED_HASH_SHORT), computeSeedHash(seed)); isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; thetaLong_ = mem_.getLong(THETA_LONG); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 8073a1b38..2730dc391 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -19,6 +19,9 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static org.apache.datasketches.common.Util.checkSeedHashes; +import static org.apache.datasketches.common.Util.computeSeedHash; + import java.nio.ByteOrder; import java.util.Arrays; @@ -152,7 +155,7 @@ private DirectArrayOfDoublesQuickSelectSketch( SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - org.apache.datasketches.common.Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), org.apache.datasketches.common.Util.computeSeedHash(seed)); + checkSeedHashes(mem.getShort(SEED_HASH_SHORT), computeSeedHash(seed)); keysOffset_ = ENTRIES_START; valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); // to do: make parent take care of its own parts diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java index f69babc97..e57dd9552 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java @@ -29,7 +29,6 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.tuple.SerializerDeserializer; -import org.apache.datasketches.tuple.Util; /** * The on-heap implementation of tuple Compact Sketch of type ArrayOfDoubles. diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java index ab955a0ee..0aad041ec 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java @@ -20,6 +20,8 @@ package org.apache.datasketches.tuple.arrayofdoubles; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import static org.apache.datasketches.common.Util.checkSeedHashes; +import static org.apache.datasketches.common.Util.computeSeedHash; import static org.apache.datasketches.common.Util.exactLog2OfLong; import java.nio.ByteOrder; @@ -98,7 +100,7 @@ final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelec if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - org.apache.datasketches.common.Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), org.apache.datasketches.common.Util.computeSeedHash(seed)); + checkSeedHashes(mem.getShort(SEED_HASH_SHORT), computeSeedHash(seed)); isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0; lgNomEntries_ = mem.getByte(LG_NOM_ENTRIES_BYTE); thetaLong_ = mem.getLong(THETA_LONG); From e5669653bb9ccfeca6d40840b125005d40b9e1d1 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 17:15:51 -0700 Subject: [PATCH 22/25] more cleanup --- .../datasketches/tuple2/SerializerDeserializerTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java index b43863d12..7fab43ac4 100644 --- a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java +++ b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java @@ -19,6 +19,8 @@ package org.apache.datasketches.tuple2; +import static org.apache.datasketches.common.Util.computeSeedHash; + import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; @@ -54,6 +56,6 @@ public void validateFamilyWrongPreambleLength() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkBadSeedHash() { - org.apache.datasketches.common.Util.computeSeedHash(50541); + computeSeedHash(50541); } } From 357a8a7122d029c69973c6f74c77054e878ba9d3 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 17:32:57 -0700 Subject: [PATCH 23/25] Prepare for merge --- .../apache/datasketches/fdt/FdtSketch.java | 8 +- .../apache/datasketches/fdt2/FdtSketch.java | 166 - .../org/apache/datasketches/fdt2/Group.java | 140 - .../datasketches/fdt2/PostProcessor.java | 186 - .../datasketches/fdt2/package-info.java | 23 - .../org/apache/datasketches/theta/AnotB.java | 19 +- .../apache/datasketches/theta/AnotBimpl.java | 23 +- .../apache/datasketches/theta/BitPacking.java | 4 +- .../BytesCompactCompressedHashIterator.java | 2 +- .../theta/BytesCompactHashIterator.java | 2 +- .../datasketches/theta/CompactOperations.java | 110 +- .../datasketches/theta/CompactSketch.java | 202 +- .../ConcurrentBackgroundThetaPropagation.java | 2 +- .../ConcurrentDirectQuickSelectSketch.java | 18 +- .../theta/ConcurrentHeapThetaBuffer.java | 10 +- .../theta/ConcurrentSharedThetaSketch.java | 8 +- .../theta/DirectCompactCompressedSketch.java | 67 +- .../theta/DirectCompactSketch.java | 91 +- .../theta/DirectQuickSelectSketch.java | 177 +- .../theta/DirectQuickSelectSketchR.java | 118 +- .../theta/EmptyCompactSketch.java | 29 +- .../theta/ForwardCompatibility.java | 75 +- .../datasketches/theta/HeapAlphaSketch.java | 56 +- .../theta/HeapCompactHashIterator.java | 2 +- .../datasketches/theta/HeapCompactSketch.java | 26 +- .../datasketches/theta/HeapHashIterator.java | 2 +- .../theta/HeapQuickSelectSketch.java | 53 +- .../datasketches/theta/HeapUpdateSketch.java | 34 +- .../datasketches/theta/Intersection.java | 78 +- .../datasketches/theta/IntersectionImpl.java | 257 +- .../datasketches/theta/JaccardSimilarity.java | 2 + .../MemoryCompactCompressedHashIterator.java | 107 - .../theta/MemoryHashIterator.java | 60 - ...ySegmentCompactCompressedHashIterator.java | 4 +- .../MemorySegmentHashIterator.java | 2 +- .../datasketches/theta/PreambleUtil.java | 221 +- .../apache/datasketches/theta/Rebuilder.java | 120 +- .../datasketches/theta/SetOperation.java | 147 +- .../theta/SetOperationBuilder.java | 63 +- .../datasketches/theta/SingleItemSketch.java | 44 +- .../org/apache/datasketches/theta/Sketch.java | 172 +- .../apache/datasketches/theta/Sketches.java | 312 +- .../org/apache/datasketches/theta/Union.java | 79 +- .../apache/datasketches/theta/UnionImpl.java | 166 +- .../datasketches/theta/UpdateSketch.java | 129 +- .../theta/UpdateSketchBuilder.java | 82 +- .../theta/WrappedCompactCompressedSketch.java | 8 +- .../theta/WrappedCompactSketch.java | 37 +- .../org/apache/datasketches/theta2/AnotB.java | 203 - .../apache/datasketches/theta2/AnotBimpl.java | 251 - .../datasketches/theta2/BitPacking.java | 6294 ----------------- .../BytesCompactCompressedHashIterator.java | 93 - .../theta2/BytesCompactHashIterator.java | 53 - .../theta2/CompactOperations.java | 388 - .../datasketches/theta2/CompactSketch.java | 492 -- .../ConcurrentBackgroundThetaPropagation.java | 110 - .../ConcurrentDirectQuickSelectSketch.java | 270 - .../ConcurrentHeapQuickSelectSketch.java | 266 - .../theta2/ConcurrentHeapThetaBuffer.java | 229 - .../theta2/ConcurrentPropagationService.java | 72 - .../theta2/ConcurrentSharedThetaSketch.java | 159 - .../theta2/DirectCompactCompressedSketch.java | 142 - .../theta2/DirectCompactSketch.java | 180 - .../theta2/DirectQuickSelectSketch.java | 329 - .../theta2/DirectQuickSelectSketchR.java | 288 - .../theta2/EmptyCompactSketch.java | 143 - .../theta2/ForwardCompatibility.java | 166 - .../datasketches/theta2/HashIterator.java | 40 - .../datasketches/theta2/HeapAlphaSketch.java | 596 -- .../theta2/HeapCompactHashIterator.java | 41 - .../theta2/HeapCompactSketch.java | 153 - .../datasketches/theta2/HeapHashIterator.java | 54 - .../theta2/HeapQuickSelectSketch.java | 321 - .../datasketches/theta2/HeapUpdateSketch.java | 140 - .../datasketches/theta2/Intersection.java | 247 - .../datasketches/theta2/IntersectionImpl.java | 569 -- .../theta2/JaccardSimilarity.java | 181 - .../datasketches/theta2/PreambleUtil.java | 532 -- .../apache/datasketches/theta2/Rebuilder.java | 175 - .../datasketches/theta2/SetOperation.java | 246 - .../theta2/SetOperationBuilder.java | 276 - .../datasketches/theta2/SingleItemSketch.java | 408 -- .../apache/datasketches/theta2/Sketch.java | 668 -- .../apache/datasketches/theta2/Sketches.java | 406 -- .../org/apache/datasketches/theta2/Union.java | 292 - .../apache/datasketches/theta2/UnionImpl.java | 377 - .../theta2/UpdateReturnState.java | 79 - .../datasketches/theta2/UpdateSketch.java | 485 -- .../theta2/UpdateSketchBuilder.java | 493 -- .../WrappedCompactCompressedSketch.java | 111 - .../theta2/WrappedCompactSketch.java | 144 - .../datasketches/theta2/package-info.java | 27 - .../thetacommon/HashOperations.java | 49 +- .../thetacommon2/BinomialBoundsN.java | 284 - .../BoundsOnRatiosInThetaSketchedSets.java | 121 - .../BoundsOnRatiosInTupleSketchedSets.java | 204 - .../thetacommon2/EquivTables.java | 292 - .../thetacommon2/HashOperations.java | 401 -- .../thetacommon2/SetOperationCornerCases.java | 313 - .../datasketches/thetacommon2/ThetaUtil.java | 96 - .../thetacommon2/package-info.java | 24 - .../org/apache/datasketches/tuple/AnotB.java | 2 +- .../datasketches/tuple/CompactSketch.java | 45 +- .../datasketches/tuple/QuickSelectSketch.java | 54 +- .../tuple/SerializerDeserializer.java | 13 +- .../org/apache/datasketches/tuple/Sketch.java | 2 +- .../apache/datasketches/tuple/Sketches.java | 26 +- .../tuple/SummaryDeserializer.java | 14 +- .../datasketches/tuple/UpdatableSketch.java | 8 +- .../tuple/adouble/DoubleSketch.java | 11 +- .../tuple/adouble/DoubleSummary.java | 16 +- .../adouble/DoubleSummaryDeserializer.java | 7 +- .../tuple/aninteger/IntegerSketch.java | 11 +- .../tuple/aninteger/IntegerSummary.java | 16 +- .../aninteger/IntegerSummaryDeserializer.java | 7 +- .../arrayofdoubles/ArrayOfDoublesAnotB.java | 8 +- .../ArrayOfDoublesAnotBImpl.java | 11 +- .../ArrayOfDoublesIntersection.java | 16 +- .../ArrayOfDoublesQuickSelectSketch.java | 5 +- .../ArrayOfDoublesSetOperationBuilder.java | 27 +- .../arrayofdoubles/ArrayOfDoublesSketch.java | 62 +- .../ArrayOfDoublesSketches.java | 125 +- .../arrayofdoubles/ArrayOfDoublesUnion.java | 89 +- .../ArrayOfDoublesUpdatableSketch.java | 52 +- .../ArrayOfDoublesUpdatableSketchBuilder.java | 9 +- .../DirectArrayOfDoublesCompactSketch.java | 176 +- .../DirectArrayOfDoublesIntersection.java | 16 +- ...DirectArrayOfDoublesQuickSelectSketch.java | 204 +- ...irectArrayOfDoublesQuickSelectSketchR.java | 8 +- .../DirectArrayOfDoublesSketchIterator.java | 31 +- .../DirectArrayOfDoublesUnion.java | 59 +- .../DirectArrayOfDoublesUnionR.java | 11 +- .../HeapArrayOfDoublesCompactSketch.java | 87 +- .../HeapArrayOfDoublesIntersection.java | 3 +- .../HeapArrayOfDoublesQuickSelectSketch.java | 87 +- .../HeapArrayOfDoublesUnion.java | 22 +- .../tuple/strings/ArrayOfStringsSketch.java | 11 +- .../tuple/strings/ArrayOfStringsSummary.java | 112 +- .../ArrayOfStringsSummaryDeserializer.java | 17 +- .../org/apache/datasketches/tuple2/AnotB.java | 636 -- .../datasketches/tuple2/CompactSketch.java | 256 - .../tuple2/DeserializeResult.java | 55 - .../apache/datasketches/tuple2/Filter.java | 76 - .../datasketches/tuple2/HashTables.java | 169 - .../datasketches/tuple2/Intersection.java | 254 - .../tuple2/JaccardSimilarity.java | 370 - .../tuple2/QuickSelectSketch.java | 621 -- .../tuple2/SerializerDeserializer.java | 99 - .../apache/datasketches/tuple2/Sketch.java | 224 - .../apache/datasketches/tuple2/Sketches.java | 72 - .../apache/datasketches/tuple2/Summary.java | 46 - .../tuple2/SummaryDeserializer.java | 42 - .../datasketches/tuple2/SummaryFactory.java | 34 - .../tuple2/SummarySetOperations.java | 56 - .../tuple2/TupleSketchIterator.java | 75 - .../org/apache/datasketches/tuple2/Union.java | 225 - .../datasketches/tuple2/UpdatableSketch.java | 191 - .../tuple2/UpdatableSketchBuilder.java | 107 - .../datasketches/tuple2/UpdatableSummary.java | 36 - .../org/apache/datasketches/tuple2/Util.java | 140 - .../tuple2/adouble/DoubleSketch.java | 85 - .../tuple2/adouble/DoubleSummary.java | 162 - .../adouble/DoubleSummaryDeserializer.java | 38 - .../tuple2/adouble/DoubleSummaryFactory.java | 46 - .../adouble/DoubleSummarySetOperations.java | 83 - .../tuple2/adouble/package-info.java | 23 - .../tuple2/aninteger/IntegerSketch.java | 86 - .../tuple2/aninteger/IntegerSummary.java | 162 - .../aninteger/IntegerSummaryDeserializer.java | 38 - .../aninteger/IntegerSummaryFactory.java | 46 - .../IntegerSummarySetOperations.java | 67 - .../tuple2/aninteger/package-info.java | 23 - .../arrayofdoubles/ArrayOfDoublesAnotB.java | 57 - .../ArrayOfDoublesAnotBImpl.java | 237 - .../ArrayOfDoublesCombiner.java | 35 - .../ArrayOfDoublesCompactSketch.java | 64 - .../ArrayOfDoublesIntersection.java | 184 - .../ArrayOfDoublesQuickSelectSketch.java | 196 - .../ArrayOfDoublesSetOperationBuilder.java | 135 - .../arrayofdoubles/ArrayOfDoublesSketch.java | 292 - .../ArrayOfDoublesSketchIterator.java | 50 - .../ArrayOfDoublesSketches.java | 151 - .../arrayofdoubles/ArrayOfDoublesUnion.java | 208 - .../ArrayOfDoublesUpdatableSketch.java | 233 - .../ArrayOfDoublesUpdatableSketchBuilder.java | 132 - .../DirectArrayOfDoublesCompactSketch.java | 288 - .../DirectArrayOfDoublesIntersection.java | 52 - ...DirectArrayOfDoublesQuickSelectSketch.java | 435 -- ...irectArrayOfDoublesQuickSelectSketchR.java | 42 - .../DirectArrayOfDoublesSketchIterator.java | 83 - .../DirectArrayOfDoublesUnion.java | 92 - .../DirectArrayOfDoublesUnionR.java | 47 - .../tuple2/arrayofdoubles/HashTables.java | 130 - .../HeapArrayOfDoublesCompactSketch.java | 232 - .../HeapArrayOfDoublesIntersection.java | 42 - .../HeapArrayOfDoublesQuickSelectSketch.java | 365 - .../HeapArrayOfDoublesSketchIterator.java | 65 - .../HeapArrayOfDoublesUnion.java | 73 - .../tuple2/arrayofdoubles/package-info.java | 24 - .../datasketches/tuple2/package-info.java | 25 - .../tuple2/strings/ArrayOfStringsSketch.java | 103 - .../tuple2/strings/ArrayOfStringsSummary.java | 185 - .../ArrayOfStringsSummaryDeserializer.java | 51 - .../strings/ArrayOfStringsSummaryFactory.java | 35 - .../ArrayOfStringsSummarySetOperations.java | 40 - .../tuple2/strings/package-info.java | 24 - .../datasketches/fdt/FdtSketchTest.java | 9 +- .../apache/datasketches/fdt/GroupTest.java | 1 + .../datasketches/fdt2/FdtSketchTest.java | 184 - .../apache/datasketches/fdt2/GroupTest.java | 67 - .../datasketches/theta/AnotBimplTest.java | 41 +- .../theta/BackwardConversions.java | 100 +- .../datasketches/theta/BitPackingTest.java | 1 + .../datasketches/theta/CompactSketchTest.java | 217 +- ...ConcurrentDirectQuickSelectSketchTest.java | 238 +- .../ConcurrentHeapQuickSelectSketchTest.java | 129 +- .../CornerCaseThetaSetOperationsTest.java | 8 + .../theta/DirectIntersectionTest.java | 281 +- .../theta/DirectQuickSelectSketchTest.java | 349 +- .../datasketches/theta/DirectUnionTest.java | 291 +- .../apache/datasketches/theta/EmptyTest.java | 41 +- .../datasketches/theta/ExamplesTest.java | 7 + .../theta/ForwardCompatibilityTest.java | 114 +- .../theta/HeapAlphaSketchTest.java | 127 +- .../theta/HeapIntersectionTest.java | 66 +- .../theta/HeapQuickSelectSketchTest.java | 131 +- .../datasketches/theta/HeapUnionTest.java | 123 +- .../theta/HeapifyWrapSerVer1and2Test.java | 494 +- .../datasketches/theta/IteratorTest.java | 14 +- .../theta/JaccardSimilarityTest.java | 2 + .../theta/PairwiseSetOperationsTest.java | 8 + .../datasketches/theta/PreambleUtilTest.java | 112 +- .../ReadOnlyMemorySegmentTest.java | 8 +- .../theta/ReadOnlyMemoryTest.java | 200 - .../datasketches/theta/SetOperationTest.java | 131 +- .../theta/SetOpsCornerCasesTest.java | 88 +- .../theta/SingleItemSketchTest.java | 83 +- .../apache/datasketches/theta/SketchTest.java | 153 +- .../datasketches/theta/SketchesTest.java | 77 +- .../theta/ThetaSketchCrossLanguageTest.java | 19 +- .../datasketches/theta/UnionImplTest.java | 145 +- .../datasketches/theta/UpdateSketchTest.java | 65 +- .../datasketches/theta2/AnotBimplTest.java | 333 - .../theta2/BackwardConversions.java | 237 - .../datasketches/theta2/BitPackingTest.java | 166 - .../theta2/CompactSketchTest.java | 674 -- ...ConcurrentDirectQuickSelectSketchTest.java | 717 -- .../ConcurrentHeapQuickSelectSketchTest.java | 744 -- .../CornerCaseThetaSetOperationsTest.java | 518 -- .../theta2/DirectIntersectionTest.java | 768 -- .../theta2/DirectQuickSelectSketchTest.java | 936 --- .../datasketches/theta2/DirectUnionTest.java | 827 --- .../apache/datasketches/theta2/EmptyTest.java | 169 - .../datasketches/theta2/ExamplesTest.java | 124 - .../theta2/ForwardCompatibilityTest.java | 218 - .../theta2/HeapAlphaSketchTest.java | 697 -- .../theta2/HeapIntersectionTest.java | 534 -- .../theta2/HeapQuickSelectSketchTest.java | 642 -- .../datasketches/theta2/HeapUnionTest.java | 669 -- .../theta2/HeapifyWrapSerVer1and2Test.java | 607 -- .../datasketches/theta2/IteratorTest.java | 133 - .../theta2/JaccardSimilarityTest.java | 248 - .../theta2/PairwiseSetOperationsTest.java | 410 -- .../datasketches/theta2/PreambleUtilTest.java | 231 - .../datasketches/theta2/SetOperationTest.java | 438 -- .../theta2/SetOpsCornerCasesTest.java | 501 -- .../theta2/SingleItemSketchTest.java | 377 - .../datasketches/theta2/SketchTest.java | 439 -- .../datasketches/theta2/SketchesTest.java | 203 - .../theta2/ThetaSketchCrossLanguageTest.java | 121 - .../datasketches/theta2/UnionImplTest.java | 319 - .../datasketches/theta2/UpdateSketchTest.java | 237 - .../thetacommon/BinomialBoundsNTest.java | 1 + ...BoundsOnRatiosInThetaSketchedSetsTest.java | 1 + ...BoundsOnRatiosInTupleSketchedSetsTest.java | 3 +- .../thetacommon/HashOperationsTest.java | 39 +- .../thetacommon/ThetaUtilTest.java | 1 + .../thetacommon2/BinomialBoundsNTest.java | 181 - ...BoundsOnRatiosInThetaSketchedSetsTest.java | 94 - ...BoundsOnRatiosInTupleSketchedSetsTest.java | 159 - .../thetacommon2/HashOperationsTest.java | 218 - .../thetacommon2/QuickSelectTest.java | 446 -- .../thetacommon2/ThetaUtilTest.java | 44 - .../CompactSketchWithDoubleSummaryTest.java | 17 +- .../datasketches/tuple/IntegerSummary.java | 15 +- .../tuple/IntegerSummaryDeserializer.java | 9 +- .../tuple/IntegerSummaryFactory.java | 2 + .../tuple/JaccardSimilarityTest.java | 10 +- .../apache/datasketches/tuple/MiscTest.java | 5 +- ...st.java => ReadOnlyMemorySegmentTest.java} | 13 +- .../tuple/SerializerDeserializerTest.java | 18 +- .../tuple/TupleCrossLanguageTest.java | 22 +- .../tuple/TupleExamples2Test.java | 8 +- .../datasketches/tuple/TupleExamplesTest.java | 8 +- .../tuple/adouble/AdoubleAnotBTest.java | 2 + .../adouble/AdoubleIntersectionTest.java | 5 +- .../tuple/adouble/AdoubleTest.java | 16 +- .../tuple/adouble/AdoubleUnionTest.java | 3 + .../tuple/adouble/FilterTest.java | 2 + .../CornerCaseTupleSetOperationsTest.java | 3 + .../tuple/aninteger/EngagementTest.java | 3 + .../tuple/aninteger/IntegerSketchTest.java | 10 +- .../tuple/aninteger/MikhailsBugTupleTest.java | 3 + .../tuple/aninteger/ParameterLeakageTest.java | 4 +- .../AodSketchCrossLanguageTest.java | 20 +- .../ArrayOfDoublesAnotBTest.java | 14 +- .../ArrayOfDoublesCompactSketchTest.java | 28 +- .../ArrayOfDoublesIntersectionTest.java | 33 +- .../ArrayOfDoublesQuickSelectSketchTest.java | 19 +- .../ArrayOfDoublesUnionTest.java | 95 +- ...erCaseArrayOfDoublesSetOperationsTest.java | 8 + ...DirectArrayOfDoublesCompactSketchTest.java | 46 +- ...ctArrayOfDoublesQuickSelectSketchTest.java | 37 +- .../HeapArrayOfDoublesCompactSketchTest.java | 24 +- ...apArrayOfDoublesQuickSelectSketchTest.java | 14 +- .../strings/ArrayOfStringsSketchTest.java | 10 +- .../strings/ArrayOfStringsSummaryTest.java | 17 +- .../CompactSketchWithDoubleSummaryTest.java | 189 - .../datasketches/tuple2/IntegerSummary.java | 81 - .../tuple2/IntegerSummaryDeserializer.java | 31 - .../tuple2/IntegerSummaryFactory.java | 32 - .../tuple2/JaccardSimilarityTest.java | 457 -- .../apache/datasketches/tuple2/MiscTest.java | 95 - .../tuple2/ReadOnlyMemorySegmentTest.java | 122 - .../tuple2/SerializerDeserializerTest.java | 61 - .../tuple2/TupleCrossLanguageTest.java | 126 - .../tuple2/TupleExamples2Test.java | 287 - .../tuple2/TupleExamplesTest.java | 191 - .../tuple2/adouble/AdoubleAnotBTest.java | 299 - .../adouble/AdoubleIntersectionTest.java | 305 - .../tuple2/adouble/AdoubleTest.java | 421 -- .../tuple2/adouble/AdoubleUnionTest.java | 173 - .../tuple2/adouble/FilterTest.java | 152 - .../CornerCaseTupleSetOperationsTest.java | 630 -- .../tuple2/aninteger/EngagementTest.java | 143 - .../tuple2/aninteger/IntegerSketchTest.java | 140 - .../aninteger/MikhailsBugTupleTest.java | 74 - .../aninteger/ParameterLeakageTest.java | 180 - .../AodSketchCrossLanguageTest.java | 118 - .../ArrayOfDoublesAnotBTest.java | 323 - .../ArrayOfDoublesCompactSketchTest.java | 139 - .../ArrayOfDoublesIntersectionTest.java | 311 - .../ArrayOfDoublesQuickSelectSketchTest.java | 164 - .../ArrayOfDoublesUnionTest.java | 513 -- ...erCaseArrayOfDoublesSetOperationsTest.java | 581 -- ...DirectArrayOfDoublesCompactSketchTest.java | 133 - ...ctArrayOfDoublesQuickSelectSketchTest.java | 281 - .../HeapArrayOfDoublesCompactSketchTest.java | 137 - ...apArrayOfDoublesQuickSelectSketchTest.java | 244 - .../strings/ArrayOfStringsSketchTest.java | 134 - .../strings/ArrayOfStringsSummaryTest.java | 98 - 351 files changed, 4620 insertions(+), 55082 deletions(-) delete mode 100644 src/main/java/org/apache/datasketches/fdt2/FdtSketch.java delete mode 100644 src/main/java/org/apache/datasketches/fdt2/Group.java delete mode 100644 src/main/java/org/apache/datasketches/fdt2/PostProcessor.java delete mode 100644 src/main/java/org/apache/datasketches/fdt2/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java delete mode 100644 src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java rename src/main/java/org/apache/datasketches/{theta2 => theta}/MemorySegmentCompactCompressedHashIterator.java (96%) rename src/main/java/org/apache/datasketches/{theta2 => theta}/MemorySegmentHashIterator.java (97%) delete mode 100644 src/main/java/org/apache/datasketches/theta2/AnotB.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/AnotBimpl.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/BitPacking.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/CompactOperations.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/CompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HashIterator.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/Intersection.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/PreambleUtil.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/Rebuilder.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/SetOperation.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/Sketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/Sketches.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/Union.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/UnionImpl.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/theta2/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java delete mode 100644 src/main/java/org/apache/datasketches/thetacommon2/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/AnotB.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/CompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Filter.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/HashTables.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Intersection.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Sketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Sketches.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Summary.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Union.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/Util.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java delete mode 100644 src/main/java/org/apache/datasketches/tuple2/strings/package-info.java delete mode 100644 src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/fdt2/GroupTest.java rename src/test/java/org/apache/datasketches/{theta2 => theta}/ReadOnlyMemorySegmentTest.java (95%) delete mode 100644 src/test/java/org/apache/datasketches/theta/ReadOnlyMemoryTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/BackwardConversions.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/BitPackingTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/CornerCaseThetaSetOperationsTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/DirectQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/DirectUnionTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/EmptyTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/ExamplesTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/HeapIntersectionTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/HeapQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/HeapUnionTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/HeapifyWrapSerVer1and2Test.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/IteratorTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/JaccardSimilarityTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/PreambleUtilTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/SetOperationTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/SketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/SketchesTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/ThetaSketchCrossLanguageTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/UnionImplTest.java delete mode 100644 src/test/java/org/apache/datasketches/theta2/UpdateSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/thetacommon2/BinomialBoundsNTest.java delete mode 100644 src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java delete mode 100644 src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java delete mode 100644 src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java delete mode 100644 src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java delete mode 100644 src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java rename src/test/java/org/apache/datasketches/tuple/{ReadOnlyMemoryTest.java => ReadOnlyMemorySegmentTest.java} (92%) delete mode 100644 src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/MiscTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketchTest.java delete mode 100644 src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java diff --git a/src/main/java/org/apache/datasketches/fdt/FdtSketch.java b/src/main/java/org/apache/datasketches/fdt/FdtSketch.java index e4cac32a1..9dc6aecca 100644 --- a/src/main/java/org/apache/datasketches/fdt/FdtSketch.java +++ b/src/main/java/org/apache/datasketches/fdt/FdtSketch.java @@ -19,10 +19,10 @@ package org.apache.datasketches.fdt; +import java.lang.foreign.MemorySegment; import java.util.List; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.tuple.strings.ArrayOfStringsSketch; @@ -59,14 +59,14 @@ public FdtSketch(final int lgK) { /** * Used by deserialization. - * @param mem the image of a FdtSketch + * @param seg the image of a FdtSketch * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ @Deprecated - FdtSketch(final Memory mem) { - super(mem); + FdtSketch(final MemorySegment seg) { + super(seg); } /** diff --git a/src/main/java/org/apache/datasketches/fdt2/FdtSketch.java b/src/main/java/org/apache/datasketches/fdt2/FdtSketch.java deleted file mode 100644 index a4ecab83d..000000000 --- a/src/main/java/org/apache/datasketches/fdt2/FdtSketch.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.fdt2; - -import java.lang.foreign.MemorySegment; -import java.util.List; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; -import org.apache.datasketches.tuple2.strings.ArrayOfStringsSketch; - -/** - * A Frequent Distinct Tuples sketch. - * - *

              Suppose our data is a stream of pairs {IP address, User ID} and we want to identify the - * IP addresses that have the most distinct User IDs. Or conversely, we would like to identify - * the User IDs that have the most distinct IP addresses. This is a common challenge in the - * analysis of big data and the FDT sketch helps solve this problem using probabilistic techniques. - * - *

              More generally, given a multiset of tuples with dimensions {d1,d2, d3, ..., dN}, - * and a primary subset of dimensions M < N, our task is to identify the combinations of - * M subset dimensions that have the most frequent number of distinct combinations of - * the N-M non-primary dimensions. - * - *

              Please refer to the web page - * - * https://datasketches.apache.org/docs/Frequency/FrequentDistinctTuplesSketch.html for a more - * complete discussion about this sketch. - * - * @author Lee Rhodes - */ -public final class FdtSketch extends ArrayOfStringsSketch { - - /** - * Create new instance of Frequent Distinct Tuples sketch with the given - * Log-base2 of required nominal entries. - * @param lgK Log-base2 of required nominal entries. - */ - public FdtSketch(final int lgK) { - super(lgK); - } - - /** - * Used by deserialization. - * @param seg the image of a FdtSketch - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Heapifying a CompactSketch is not deprecated. - */ - @Deprecated - FdtSketch(final MemorySegment seg) { - super(seg); - } - - /** - * Create a new instance of Frequent Distinct Tuples sketch with a size determined by the given - * threshold and rse. - * @param threshold : the fraction, between zero and 1.0, of the total distinct stream length - * that defines a "Frequent" (or heavy) item. - * @param rse the maximum Relative Standard Error for the estimate of the distinct population of a - * reported tuple (selected with a primary key) at the threshold. - */ - public FdtSketch(final double threshold, final double rse) { - super(computeLgK(threshold, rse)); - } - - /** - * Copy Constructor - * @param sketch the sketch to copy - */ - public FdtSketch(final FdtSketch sketch) { - super(sketch); - } - - /** - * @return a deep copy of this sketch - */ - @Override - public FdtSketch copy() { - return new FdtSketch(this); - } - - /** - * Update the sketch with the given string array tuple. - * @param tuple the given string array tuple. - */ - public void update(final String[] tuple) { - super.update(tuple, tuple); - } - - /** - * Returns an ordered List of Groups of the most frequent distinct population of subset tuples - * represented by the count of entries of each group. - * @param priKeyIndices these indices define the dimensions used for the Primary Keys. - * @param limit the maximum number of groups to return. If this value is ≤ 0, all - * groups will be returned. - * @param numStdDev the number of standard deviations for the upper and lower error bounds, - * this value is an integer and must be one of 1, 2, or 3. - * See Number of Standard Deviations - * @param sep the separator character - * @return an ordered List of Groups of the most frequent distinct population of subset tuples - * represented by the count of entries of each group. - */ - public List getResult(final int[] priKeyIndices, final int limit, final int numStdDev, - final char sep) { - final PostProcessor proc = new PostProcessor(this, new Group(), sep); - return proc.getGroupList(priKeyIndices, numStdDev, limit); - } - - /** - * Returns the PostProcessor that enables multiple queries against the sketch results. - * This assumes the default Group and the default separator character '|'. - * @return the PostProcessor - */ - public PostProcessor getPostProcessor() { - return getPostProcessor(new Group(), '|'); - } - - /** - * Returns the PostProcessor that enables multiple queries against the sketch results. - * @param group the Group class to use during post processing. - * @param sep the separator character. - * @return the PostProcessor - */ - public PostProcessor getPostProcessor(final Group group, final char sep) { - return new PostProcessor(this, group, sep); - } - - // Restricted - - /** - * Computes LgK given the threshold and RSE. - * @param threshold the fraction, between zero and 1.0, of the total stream length that defines - * a "Frequent" (or heavy) tuple. - * @param rse the maximum Relative Standard Error for the estimate of the distinct population of a - * reported tuple (selected with a primary key) at the threshold. - * @return LgK - */ - static int computeLgK(final double threshold, final double rse) { - final double v = Math.ceil(1.0 / (threshold * rse * rse)); - final int lgK = (int) Math.ceil(Math.log(v) / Math.log(2)); - if (lgK > ThetaUtil.MAX_LG_NOM_LONGS) { - throw new SketchesArgumentException("Requested Sketch (LgK = " + lgK + " > 2^26), " - + "either increase the threshold, the rse or both."); - } - return lgK; - } - -} diff --git a/src/main/java/org/apache/datasketches/fdt2/Group.java b/src/main/java/org/apache/datasketches/fdt2/Group.java deleted file mode 100644 index 7ddded1db..000000000 --- a/src/main/java/org/apache/datasketches/fdt2/Group.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.fdt2; - -/** - * Defines a Group from a Frequent Distinct Tuple query. This class is called internally during - * post processing and is not intended to be called by the user. - * @author Lee Rhodes - */ -public class Group implements Comparable { - private int count = 0; - private double est = 0; - private double ub = 0; - private double lb = 0; - private double fraction = 0; - private double rse = 0; - private String priKey = null; - private final static String fmt = - "%,12d" + "%,15.2f" + "%,15.2f" + "%,15.2f" + "%12.6f" + "%12.6f" + " %s"; - private final static String hfmt = - "%12s" + "%15s" + "%15s" + "%15s" + "%12s" + "%12s" + " %s"; - - /** - * Construct an empty Group - */ - public Group() { } - - /** - * Specifies the parameters to be listed as columns - * @param priKey the primary key of the FDT sketch - * @param count the number of retained rows associated with this group - * @param estimate the estimate of the original population associated with this group - * @param ub the upper bound of the estimate - * @param lb the lower bound of the estimate - * @param fraction the fraction of all retained rows of the sketch associated with this group - * @param rse the estimated Relative Standard Error for this group. - * @return return this - */ - public Group init(final String priKey, final int count, final double estimate, final double ub, - final double lb, final double fraction, final double rse) { - this.count = count; - est = estimate; - this.ub = ub; - this.lb = lb; - this.fraction = fraction; - this.rse = rse; - this.priKey = priKey; - return this; - } - - /** - * Gets the primary key of type String - * @return priKey of type String - */ - public String getPrimaryKey() { return priKey; } - - /** - * Returns the count - * @return the count - */ - public int getCount() { return count; } - - /** - * Returns the estimate - * @return the estimate - */ - public double getEstimate() { return est; } - - /** - * Returns the upper bound - * @return the upper bound - */ - public double getUpperBound() { return ub; } - - /** - * Returns the lower bound - * @return the lower bound - */ - public double getLowerBound() { return lb; } - - /** - * Returns the fraction for this group - * @return the fraction for this group - */ - public double getFraction() { return fraction; } - - /** - * Returns the RSE - * @return the RSE - */ - public double getRse() { return rse; } - - /** - * Returns the descriptive header - * @return the descriptive header - */ - public String getHeader() { - return String.format(hfmt,"Count", "Est", "UB", "LB", "Fraction", "RSE", "PriKey"); - } - - @Override - public String toString() { - return String.format(fmt, count, est, ub, lb, fraction, rse, priKey); - } - - @Override - public int compareTo(final Group that) { - return that.count - count; //decreasing - } - - @Override - public boolean equals(final Object that) { - if (this == that) { return true; } - if (!(that instanceof Group)) { return false; } - return ((Group)that).count == count; - } - - @Override - public int hashCode() { - return Integer.MAX_VALUE - count; //MAX_VALUE is a Double Mersenne Prime = 2^31 - 1 = M_M_5 - } - -} diff --git a/src/main/java/org/apache/datasketches/fdt2/PostProcessor.java b/src/main/java/org/apache/datasketches/fdt2/PostProcessor.java deleted file mode 100644 index cde4eaa21..000000000 --- a/src/main/java/org/apache/datasketches/fdt2/PostProcessor.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.fdt2; - -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsert; -import static org.apache.datasketches.tuple2.Util.stringHash; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Objects; - -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.strings.ArrayOfStringsSummary; - -/** - * This processes the contents of a FDT sketch to extract the - * primary keys with the most frequent unique combinations of the non-primary dimensions. - * The source sketch is not modified. - * - * @author Lee Rhodes - */ -public class PostProcessor { - private final FdtSketch sketch; - private final char sep; - private int groupCount; - @SuppressWarnings("unused") - private Group group; //uninitialized - - //simple hash-map - private boolean mapValid; - private final int mapArrSize; - private final long[] hashArr; - private final String[] priKeyArr; - private final int[] counterArr; - - /** - * Construct with a populated FdtSketch - * @param sketch the given sketch to query. - * @param group the Group - * @param sep the separator character - */ - public PostProcessor(final FdtSketch sketch, final Group group, final char sep) { - Objects.requireNonNull(sketch, "sketch must be non-null"); - Objects.requireNonNull(group, "group must be non-null"); - this.sketch = sketch.copy(); - this.sep = sep; - final int numEntries = sketch.getRetainedEntries(); - mapArrSize = ceilingPowerOf2((int)(numEntries / 0.75)); - hashArr = new long[mapArrSize]; - priKeyArr = new String[mapArrSize]; - counterArr = new int[mapArrSize]; - mapValid = false; - this.group = group; - } - - /** - * Returns the number of groups in the final sketch. - * @return the number of groups in the final sketch. - */ - public int getGroupCount() { - return groupCount; - } - - /** - * Return the most frequent Groups associated with Primary Keys based on the size of the groups. - * @param priKeyIndices the indices of the primary dimensions - * @param numStdDev the number of standard deviations for the error bounds, this value is an - * integer and must be one of 1, 2, or 3. - * See Number of Standard Deviations - * @param limit the maximum number of rows to return. If ≤ 0, all rows will be returned. - * @return the most frequent Groups associated with Primary Keys based on the size of the groups. - */ - public List getGroupList(final int[] priKeyIndices, final int numStdDev, - final int limit) { - //allows subsequent queries with different priKeyIndices without rebuilding the map - if (!mapValid) { populateMap(priKeyIndices); } - return populateList(numStdDev, limit); - } - - /** - * Scan each entry in the sketch. Count the number of duplicate occurrences of each - * primary key in a hash map. - * @param priKeyIndices identifies the primary key indices - */ - private void populateMap(final int[] priKeyIndices) { - final TupleSketchIterator it = sketch.iterator(); - Arrays.fill(hashArr, 0L); - Arrays.fill(priKeyArr, null); - Arrays.fill(counterArr, 0); - groupCount = 0; - final int lgMapArrSize = Integer.numberOfTrailingZeros(mapArrSize); - - while (it.next()) { - //getSummary() is not a copy, but getValue() is - final String[] arr = it.getSummary().getValue(); - final String priKey = getPrimaryKey(arr, priKeyIndices, sep); - final long hash = stringHash(priKey); - final int index = hashSearchOrInsert(hashArr, lgMapArrSize, hash); - if (index < 0) { //was empty, hash inserted - final int idx = -(index + 1); //actual index - counterArr[idx] = 1; - groupCount++; - priKeyArr[idx] = priKey; - } else { //found, duplicate - counterArr[index]++; //increment - } - } - mapValid = true; - } - - /** - * Create the list of groups along with the error statistics - * @param numStdDev number of standard deviations - * @param limit the maximum size of the list to return - * @return the list of groups along with the error statistics - */ - private List populateList(final int numStdDev, final int limit) { - final List list = new ArrayList<>(); - for (int i = 0; i < mapArrSize; i++) { - if (hashArr[i] != 0) { - final String priKey = priKeyArr[i]; - final int count = counterArr[i]; - final double est = sketch.getEstimate(count); - final double ub = sketch.getUpperBound(numStdDev, count); - final double lb = sketch.getLowerBound(numStdDev, count); - final double thresh = (double) count / sketch.getRetainedEntries(); - final double rse = (sketch.getUpperBound(1, count) / est) - 1.0; - final Group gp = new Group(); - gp.init(priKey, count, est, ub, lb, thresh, rse); - list.add(gp); - } - } - list.sort(null); //Comparable implemented in Group - final int totLen = list.size(); - - final List returnList; - if ((limit > 0) && (limit < totLen)) { - returnList = list.subList(0, limit); - } else { - returnList = list; - } - return returnList; - } - - /** - * Extract simple string Primary Key defined by the priKeyIndices from the given tuple. - * @param tuple the given tuple containing the Primary Key - * @param priKeyIndices the indices indicating the ordering and selection of dimensions defining - * the Primary Key - * @param sep the separator character - * @return a simple string Primary Key defined by the priKeyIndices from the given tuple. - */ - //also used by test - private static String getPrimaryKey(final String[] tuple, final int[] priKeyIndices, - final char sep) { - assert priKeyIndices.length < tuple.length; - final StringBuilder sb = new StringBuilder(); - final int keys = priKeyIndices.length; - for (int i = 0; i < keys; i++) { - final int idx = priKeyIndices[i]; - sb.append(tuple[idx]); - if ((i + 1) < keys) { sb.append(sep); } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/fdt2/package-info.java b/src/main/java/org/apache/datasketches/fdt2/package-info.java deleted file mode 100644 index 85a9d7028..000000000 --- a/src/main/java/org/apache/datasketches/fdt2/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * Frequent Distinct Tuples Sketch - */ -package org.apache.datasketches.fdt2; diff --git a/src/main/java/org/apache/datasketches/theta/AnotB.java b/src/main/java/org/apache/datasketches/theta/AnotB.java index d9ab37416..cb195acd1 100644 --- a/src/main/java/org/apache/datasketches/theta/AnotB.java +++ b/src/main/java/org/apache/datasketches/theta/AnotB.java @@ -19,8 +19,9 @@ package org.apache.datasketches.theta; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; -import org.apache.datasketches.memory.WritableMemory; /** * Computes a set difference, A-AND-NOT-B, of two theta sketches. @@ -132,15 +133,14 @@ public Family getFamily() { * @param dstOrdered If true, the result will be an ordered {@link CompactSketch}. * See Destination Ordered. * - * @param dstMem if not null the given Memory will be the target location of the result. - * See Destination Memory. + * @param dstSeg if not null the given MemorySegment will be the target location of the result. * * @param reset If true, clears this operator to the empty state after this result is * returned. Set this to false if you wish to obtain an intermediate result. * - * @return the result of this operation as a {@link CompactSketch} in the given dstMem. + * @return the result of this operation as a {@link CompactSketch} in the given dstSeg. */ - public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem, boolean reset); + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg, boolean reset); /** * Perform A-and-not-B set operation on the two given sketches and return the result as an @@ -149,7 +149,7 @@ public Family getFamily() { *

              This a stateless operation and has no impact on the internal state of this operator. * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)}, * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or - * {@link #getResult(boolean, WritableMemory, boolean)} methods.

              + * {@link #getResult(boolean, MemorySegment, boolean)} methods.

              * *

              If either argument is null an exception is thrown.

              * @@ -177,7 +177,7 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB) { *

              This a stateless operation and has no impact on the internal state of this operator. * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)}, * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or - * {@link #getResult(boolean, WritableMemory, boolean)} methods.

              + * {@link #getResult(boolean, MemorySegment, boolean)} methods.

              * *

              If either argument is null an exception is thrown.

              * @@ -194,11 +194,10 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB) { * @param skB The incoming sketch for the second argument. It must not be null. * @param dstOrdered * See Destination Ordered. - * @param dstMem - * See Destination Memory. + * @param dstSeg the destination MemorySegment * @return the result as a CompactSketch. */ public abstract CompactSketch aNotB(Sketch skA, Sketch skB, boolean dstOrdered, - WritableMemory dstMem); + MemorySegment dstSeg); } diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java index d3075bb13..07904f0ad 100644 --- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java @@ -26,11 +26,11 @@ import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsert; import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -104,17 +104,17 @@ public CompactSketch getResult(final boolean reset) { } @Override - public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem, + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg, final boolean reset) { final CompactSketch result = CompactOperations.componentsToCompact( - thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstMem, hashArr_.clone()); + thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstSeg, hashArr_.clone()); if (reset) { reset(); } return result; } @Override public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dstOrdered, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { if (skA == null || skB == null) { throw new SketchesArgumentException("Neither argument may be null"); } @@ -122,12 +122,12 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dst final long minThetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong()); - if (skA.isEmpty()) { return skA.compact(dstOrdered, dstMem); } + if (skA.isEmpty()) { return skA.compact(dstOrdered, dstSeg); } //A is not Empty Util.checkSeedHashes(skA.getSeedHash(), seedHash_); if (skB.isEmpty()) { - return skA.compact(dstOrdered, dstMem); + return skA.compact(dstOrdered, dstSeg); } Util.checkSeedHashes(skB.getSeedHash(), seedHash_); //Both skA & skB are not empty @@ -142,7 +142,7 @@ public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dst final boolean empty = countOut == 0 && minThetaLong == Long.MAX_VALUE; final CompactSketch result = CompactOperations.componentsToCompact( - minThetaLong, countOut, seedHash_, empty, true, false, dstOrdered, dstMem, hashArrOut); + minThetaLong, countOut, seedHash_, empty, true, false, dstOrdered, dstSeg, hashArrOut); return result; } @@ -234,6 +234,15 @@ long getThetaLong() { return thetaLong_; } + @Override + public boolean hasMemorySegment() { return false; } + + @Override + public boolean isDirect() { return false; } + + @Override + public boolean isSameResource( final MemorySegment that) { return false; } + @Override boolean isEmpty() { return empty_; diff --git a/src/main/java/org/apache/datasketches/theta/BitPacking.java b/src/main/java/org/apache/datasketches/theta/BitPacking.java index 99bcfb105..cd7dfe1c9 100644 --- a/src/main/java/org/apache/datasketches/theta/BitPacking.java +++ b/src/main/java/org/apache/datasketches/theta/BitPacking.java @@ -24,7 +24,9 @@ /** * Used as part of Theta compression. */ -public class BitPacking { +public final class BitPacking { + + private BitPacking() { } /** * The bit packing operation diff --git a/src/main/java/org/apache/datasketches/theta/BytesCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta/BytesCompactCompressedHashIterator.java index 97792da26..5b8e97be7 100644 --- a/src/main/java/org/apache/datasketches/theta/BytesCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/BytesCompactCompressedHashIterator.java @@ -22,7 +22,7 @@ /* * This is to uncompress serial version 4 sketch incrementally */ -class BytesCompactCompressedHashIterator implements HashIterator { +final class BytesCompactCompressedHashIterator implements HashIterator { private byte[] bytes; private int offset; private int entryBits; diff --git a/src/main/java/org/apache/datasketches/theta/BytesCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta/BytesCompactHashIterator.java index 20e21da11..cd880de17 100644 --- a/src/main/java/org/apache/datasketches/theta/BytesCompactHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/BytesCompactHashIterator.java @@ -24,7 +24,7 @@ /* * This is to iterate over serial version 3 sketch representation */ -class BytesCompactHashIterator implements HashIterator { +final class BytesCompactHashIterator implements HashIterator { final private byte[] bytes; final private int offset; final private int numEntries; diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java index 2b52f59fa..4ab7fdb31 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java +++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java @@ -19,6 +19,9 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE; @@ -43,13 +46,12 @@ import static org.apache.datasketches.theta.PreambleUtil.insertSerVer; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * @author Lee Rhodes @@ -66,10 +68,10 @@ static CompactSketch componentsToCompact( //No error checking final boolean srcCompact, final boolean srcOrdered, final boolean dstOrdered, - final WritableMemory dstMem, + final MemorySegment dstWSeg, final long[] hashArr) //may not be compacted, ordered or unordered, may be null { - final boolean direct = dstMem != null; + final boolean direct = dstWSeg != null; final boolean empty = srcEmpty || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); final boolean single = (curCount == 1) && (thetaLong == Long.MAX_VALUE); final long[] hashArrOut; @@ -90,9 +92,9 @@ static CompactSketch componentsToCompact( //No error checking flags |= dstOrderedOut ? ORDERED_FLAG_MASK : 0; flags |= single ? SINGLEITEM_FLAG_MASK : 0; - final Memory mem = - loadCompactMemory(hashArrOut, seedHash, curCount, thetaLong, dstMem, (byte)flags, preLongs); - return new DirectCompactSketch(mem); + final MemorySegment seg = + loadCompactMemorySegment(hashArrOut, seedHash, curCount, thetaLong, dstWSeg, (byte)flags, preLongs); + return new DirectCompactSketch(seg); } else { //Heap if (empty) { @@ -106,26 +108,26 @@ static CompactSketch componentsToCompact( //No error checking } /** - * Heapify or convert a source Theta Sketch Memory image into a heap or target Memory CompactSketch. + * Heapify or convert a source Theta Sketch MemorySegment image into a heap or target MemorySegment CompactSketch. * This assumes hashSeed is OK; serVer = 3. - * @param srcMem the given input source Memory image + * @param srcSeg the given input source MemorySegment image. Can be Read Only. * @param dstOrdered the desired ordering of the resulting CompactSketch - * @param dstMem Used for the target CompactSketch if it is Memory-based. + * @param dstWSeg Used for the target CompactSketch if it is MemorySegment-based. Must be Writable. * @return a CompactSketch of the correct form. */ @SuppressWarnings("unused") - static CompactSketch memoryToCompact( - final Memory srcMem, + static CompactSketch segmentToCompact( + final MemorySegment srcSeg, final boolean dstOrdered, - final WritableMemory dstMem) + final MemorySegment dstWSeg) { //extract Pre0 fields and Flags from srcMem - final int srcPreLongs = extractPreLongs(srcMem); - final int srcSerVer = extractSerVer(srcMem); //not used - final int srcFamId = extractFamilyID(srcMem); - final int srcLgArrLongs = extractLgArrLongs(srcMem); - final int srcFlags = extractFlags(srcMem); - final short srcSeedHash = (short) extractSeedHash(srcMem); + final int srcPreLongs = extractPreLongs(srcSeg); + final int srcSerVer = extractSerVer(srcSeg); //not used + final int srcFamId = extractFamilyID(srcSeg); + final int srcLgArrLongs = extractLgArrLongs(srcSeg); + final int srcFlags = extractFlags(srcSeg); + final short srcSeedHash = (short) extractSeedHash(srcSeg); //srcFlags final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0; @@ -138,8 +140,8 @@ static CompactSketch memoryToCompact( || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags); //extract pre1 and pre2 fields - final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcMem) : 0; - final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcMem) : Long.MAX_VALUE; + final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcSeg) : 0; + final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcSeg) : Long.MAX_VALUE; //do some basic checks ... if (srcEmptyFlag) { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); } @@ -150,19 +152,19 @@ static CompactSketch memoryToCompact( //Note: for empty and single we always output the ordered form. final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered; if (srcEmptyFlag) { - if (dstMem != null) { - dstMem.putByteArray(0, EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, 8); - return new DirectCompactSketch(dstMem); + if (dstWSeg != null) { + MemorySegment.copy(EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); + return new DirectCompactSketch(dstWSeg); } else { return EmptyCompactSketch.getInstance(); } } if (single) { - final long hash = srcMem.getLong(srcPreLongs << 3); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, srcPreLongs << 3); final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash); - if (dstMem != null) { - dstMem.putByteArray(0, sis.toByteArray(), 0, 16); - return new DirectCompactSketch(dstMem); + if (dstWSeg != null) { + MemorySegment.copy(sis.toByteArray(), 0, dstWSeg, JAVA_BYTE, 0, 16); + return new DirectCompactSketch(dstWSeg); } else { //heap return sis; } @@ -172,11 +174,11 @@ static CompactSketch memoryToCompact( final long[] hashArr; if (srcCompactFlag) { hashArr = new long[curCount]; - srcMem.getLongArray(srcPreLongs << 3, hashArr, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, hashArr, 0, curCount); } else { //update sketch, thus hashTable form final int srcCacheLen = 1 << srcLgArrLongs; final long[] tempHashArr = new long[srcCacheLen]; - srcMem.getLongArray(srcPreLongs << 3, tempHashArr, 0, srcCacheLen); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, tempHashArr, 0, srcCacheLen); hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut); } @@ -184,10 +186,10 @@ static CompactSketch memoryToCompact( | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0); //load the destination. - if (dstMem != null) { - final Memory tgtMem = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstMem, + if (dstWSeg != null) { + final MemorySegment tgtSeg = loadCompactMemorySegment(hashArr, srcSeedHash, curCount, thetaLong, dstWSeg, (byte)flagsOut, srcPreLongs); - return new DirectCompactSketch(tgtMem); + return new DirectCompactSketch(tgtSeg); } else { //heap return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong, dstOrderedOut); @@ -213,27 +215,28 @@ private static final void checkFamilyAndFlags( } //All arguments must be valid and correct including flags. - // Used as helper to create byte arrays as well as loading Memory for direct compact sketches - static final Memory loadCompactMemory( + // Used as helper to create byte arrays as well as loading MemorySegment for direct compact sketches + //Input must be writable, return can be Read Only + static final MemorySegment loadCompactMemorySegment( final long[] compactHashArr, final short seedHash, final int curCount, final long thetaLong, - final WritableMemory dstMem, + final MemorySegment dstWSeg, final byte flags, final int preLongs) { - assert (dstMem != null) && (compactHashArr != null); + assert (dstWSeg != null) && (compactHashArr != null); final int outLongs = preLongs + curCount; final int outBytes = outLongs << 3; - final int dstBytes = (int) dstMem.getCapacity(); + final int dstBytes = (int) dstWSeg.byteSize(); if (outBytes > dstBytes) { - throw new SketchesArgumentException("Insufficient Memory: " + dstBytes + throw new SketchesArgumentException("Insufficient Space in MemorySegment: " + dstBytes + ", Need: " + outBytes); } final byte famID = (byte) Family.COMPACT.getID(); - //Caution: The following loads directly into Memory without creating a heap byte[] first, + //Caution: The following loads directly into a MemorySegment without creating a heap byte[] first, // which would act as a pre-clearing, initialization mechanism. So it is important to make sure // that all fields are initialized, even those that are not used by the CompactSketch. // Otherwise, uninitialized fields could be filled with off-heap garbage, which could cause @@ -241,30 +244,31 @@ static final Memory loadCompactMemory( // As written below, all fields are initialized avoiding an extra copy. //The first 8 bytes (pre0) - insertPreLongs(dstMem, preLongs); //RF not used = 0 - insertSerVer(dstMem, SER_VER); - insertFamilyID(dstMem, famID); + insertPreLongs(dstWSeg, preLongs); //RF not used = 0 + insertSerVer(dstWSeg, SER_VER); + insertFamilyID(dstWSeg, famID); //The following initializes the lgNomLongs and lgArrLongs to 0. //They are not used in CompactSketches. - dstMem.putShort(LG_NOM_LONGS_BYTE, (short)0); - insertFlags(dstMem, flags); - insertSeedHash(dstMem, seedHash); + dstWSeg.set(JAVA_SHORT_UNALIGNED, LG_NOM_LONGS_BYTE, (short)0); + insertFlags(dstWSeg, flags); + insertSeedHash(dstWSeg, seedHash); if ((preLongs == 1) && (curCount == 1)) { //singleItem, theta = 1.0 - dstMem.putLong(8, compactHashArr[0]); - return dstMem; + dstWSeg.set(JAVA_LONG_UNALIGNED, 8, compactHashArr[0]); + return dstWSeg; } if (preLongs > 1) { - insertCurCount(dstMem, curCount); - insertP(dstMem, (float) 1.0); + insertCurCount(dstWSeg, curCount); + insertP(dstWSeg, (float) 1.0); } if (preLongs > 2) { - insertThetaLong(dstMem, thetaLong); + insertThetaLong(dstWSeg, thetaLong); } if (curCount > 0) { //theta could be < 1.0. - dstMem.putLongArray(preLongs << 3, compactHashArr, 0, curCount); + //dstWSeg.putLongArray(preLongs << 3, compactHashArr, 0, curCount); + MemorySegment.copy(compactHashArr, 0, dstWSeg, JAVA_LONG_UNALIGNED, preLongs << 3, curCount); } - return dstMem; //if prelongs == 3 & curCount == 0, theta could be < 1.0. + return dstWSeg; //if prelongs == 3 & curCount == 0, theta could be < 1.0. This can be RO } /** diff --git a/src/main/java/org/apache/datasketches/theta/CompactSketch.java b/src/main/java/org/apache/datasketches/theta/CompactSketch.java index 4079f22f2..413023042 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/CompactSketch.java @@ -19,6 +19,9 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; import static org.apache.datasketches.common.Family.idToFamily; import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK; @@ -28,22 +31,22 @@ import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE; import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SEED_HASH_SHORT; +import static org.apache.datasketches.theta.PreambleUtil.extractEntryBitsV4; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; +import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash; import static org.apache.datasketches.theta.PreambleUtil.extractSerVer; -import static org.apache.datasketches.theta.PreambleUtil.extractEntryBitsV4; -import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4; import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4; import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * The parent class of all the CompactSketches. CompactSketches are never created directly. @@ -61,9 +64,9 @@ public abstract class CompactSketch extends Sketch { /** - * Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch. + * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch. * - *

              The resulting sketch will not retain any link to the source Memory and all of its data will be + *

              The resulting sketch will not retain any link to the source MemorySegment and all of its data will be * copied to the heap CompactSketch.

              * *

              This method assumes that the sketch image was created with the correct hash seed, so it is not checked. @@ -71,65 +74,63 @@ public abstract class CompactSketch extends Sketch { * However, Serial Version 1 sketch images do not have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.

              * - * @param srcMem an image of a CompactSketch. - * See Memory. + * @param srcSeg an image of a CompactSketch. * @return a CompactSketch on the heap. */ - public static CompactSketch heapify(final Memory srcMem) { - return heapify(srcMem, Util.DEFAULT_UPDATE_SEED, false); + public static CompactSketch heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED, false); } /** - * Heapify takes a CompactSketch image in Memory and instantiates an on-heap CompactSketch. + * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch. * - *

              The resulting sketch will not retain any link to the source Memory and all of its data will be + *

              The resulting sketch will not retain any link to the source MemorySegment and all of its data will be * copied to the heap CompactSketch.

              * - *

              This method checks if the given expectedSeed was used to create the source Memory image. + *

              This method checks if the given expectedSeed was used to create the source MemorySegment image. * However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

              * - * @param srcMem an image of a CompactSketch that was created using the given expectedSeed. - * See Memory. - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a CompactSketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * @return a CompactSketch on the heap. */ - public static CompactSketch heapify(final Memory srcMem, final long expectedSeed) { - return heapify(srcMem, expectedSeed, true); + public static CompactSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + return heapify(srcSeg, expectedSeed, true); } - private static CompactSketch heapify(final Memory srcMem, final long seed, final boolean enforceSeed) { - final int serVer = extractSerVer(srcMem); - final int familyID = extractFamilyID(srcMem); + private static CompactSketch heapify(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int serVer = extractSerVer(srcSeg); + final int familyID = extractFamilyID(srcSeg); final Family family = idToFamily(familyID); if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } if (serVer == 4) { - return heapifyV4(srcMem, seed, enforceSeed); + return heapifyV4(srcSeg, seed, enforceSeed); } if (serVer == 3) { - final int flags = extractFlags(srcMem); + final int flags = extractFlags(srcSeg); final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0; final boolean empty = (flags & EMPTY_FLAG_MASK) != 0; - if (enforceSeed && !empty) { PreambleUtil.checkMemorySeedHash(srcMem, seed); } - return CompactOperations.memoryToCompact(srcMem, srcOrdered, null); + if (enforceSeed && !empty) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } + return CompactOperations.segmentToCompact(srcSeg, srcOrdered, null); } //not SerVer 3, assume compact stored form final short seedHash = Util.computeSeedHash(seed); if (serVer == 1) { - return ForwardCompatibility.heapify1to3(srcMem, seedHash); + return ForwardCompatibility.heapify1to3(srcSeg, seedHash); } if (serVer == 2) { - return ForwardCompatibility.heapify2to3(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return ForwardCompatibility.heapify2to3(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } throw new SketchesArgumentException("Unknown Serialization Version: " + serVer); } /** - * Wrap takes the CompactSketch image in given Memory and refers to it directly. + * Wrap takes the CompactSketch image in given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -140,22 +141,21 @@ private static CompactSketch heapify(final Memory srcMem, final long seed, final * *

              Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

              + * This is actually faster and consumes less overall space.

              * *

              This method assumes that the sketch image was created with the correct hash seed, so it is not checked. * However, Serial Version 1 sketch images do not have a seedHash field, * so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.

              * - * @param srcMem an image of a Sketch. - * See Memory. - * @return a CompactSketch backed by the given Memory except as above. + * @param srcSeg an image of a Sketch. + * @return a CompactSketch backed by the given MemorySegment except as above. */ - public static CompactSketch wrap(final Memory srcMem) { - return wrap(srcMem, Util.DEFAULT_UPDATE_SEED, false); + public static CompactSketch wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED, false); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -166,25 +166,24 @@ public static CompactSketch wrap(final Memory srcMem) { * *

              Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

              + * This is actually faster and consumes less overall space.

              * - *

              This method checks if the given expectedSeed was used to create the source Memory image. + *

              This method checks if the given expectedSeed was used to create the source MemorySegment image. * However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

              * - * @param srcMem an image of a Sketch that was created using the given expectedSeed. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a CompactSketch backed by the given Memory except as above. + * @return a CompactSketch backed by the given MemorySegment except as above. */ - public static CompactSketch wrap(final Memory srcMem, final long expectedSeed) { - return wrap(srcMem, expectedSeed, true); + public static CompactSketch wrap(final MemorySegment srcSeg, final long expectedSeed) { + return wrap(srcSeg, expectedSeed, true); } - private static CompactSketch wrap(final Memory srcMem, final long seed, final boolean enforceSeed) { - final int serVer = extractSerVer(srcMem); - final int familyID = extractFamilyID(srcMem); + private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int serVer = extractSerVer(srcSeg); + final int familyID = extractFamilyID(srcSeg); final Family family = Family.idToFamily(familyID); if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); @@ -192,18 +191,18 @@ private static CompactSketch wrap(final Memory srcMem, final long seed, final bo final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { - return DirectCompactCompressedSketch.wrapInstance(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return DirectCompactCompressedSketch.wrapInstance(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } else if (serVer == 3) { - if (PreambleUtil.isEmptyFlag(srcMem)) { - return EmptyCompactSketch.getHeapInstance(srcMem); + if (PreambleUtil.isEmptyFlag(srcSeg)) { + return EmptyCompactSketch.getHeapInstance(srcSeg); } - if (otherCheckForSingleItem(srcMem)) { - return SingleItemSketch.heapify(srcMem, enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + if (otherCheckForSingleItem(srcSeg)) { + return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } //not empty & not singleItem - final int flags = extractFlags(srcMem); + final int flags = extractFlags(srcSeg); final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; if (!compactFlag) { throw new SketchesArgumentException( @@ -214,22 +213,22 @@ else if (serVer == 3) { throw new SketchesArgumentException( "Corrupted: COMPACT family sketch image must have Read-Only flag set"); } - return DirectCompactSketch.wrapInstance(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return DirectCompactSketch.wrapInstance(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } //end of serVer 3 else if (serVer == 1) { - return ForwardCompatibility.heapify1to3(srcMem, seedHash); + return ForwardCompatibility.heapify1to3(srcSeg, seedHash); } else if (serVer == 2) { - return ForwardCompatibility.heapify2to3(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return ForwardCompatibility.heapify2to3(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } throw new SketchesArgumentException( "Corrupted: Serialization Version " + serVer + " not recognized."); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -240,23 +239,22 @@ else if (serVer == 2) { * *

              Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

              + * This is actually faster and consumes less overall space.

              * - *

              This method checks if the DEFAULT_UPDATE_SEED was used to create the source Memory image. + *

              This method checks if the DEFAULT_UPDATE_SEED was used to create the source MemorySegment image. * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.

              * * @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED. - * See Memory * - * @return a CompactSketch backed by the given Memory except as above. + * @return a CompactSketch backed by the given MemorySegment except as above. */ public static CompactSketch wrap(final byte[] bytes) { return wrap(bytes, Util.DEFAULT_UPDATE_SEED, false); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -267,17 +265,16 @@ public static CompactSketch wrap(final byte[] bytes) { * *

              Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

              + * This is actually faster and consumes less overall space.

              * - *

              This method checks if the given expectedSeed was used to create the source Memory image. + *

              This method checks if the given expectedSeed was used to create the source MemorySegment image. * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

              * * @param bytes a byte array image of a Sketch that was created using the given expectedSeed. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a CompactSketch backed by the given Memory except as above. + * @return a CompactSketch backed by the given MemorySegment except as above. */ public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) { return wrap(bytes, expectedSeed, true); @@ -296,11 +293,11 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo } else if (serVer == 3) { final int flags = bytes[FLAGS_BYTE]; if ((flags & EMPTY_FLAG_MASK) > 0) { - return EmptyCompactSketch.getHeapInstance(Memory.wrap(bytes)); + return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes)); } final int preLongs = bytes[PREAMBLE_LONGS_BYTE]; if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) { - return SingleItemSketch.heapify(Memory.wrap(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); + return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); } //not empty & not singleItem final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; @@ -316,9 +313,9 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo return WrappedCompactSketch.wrapInstance(bytes, enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); } else if (serVer == 1) { - return ForwardCompatibility.heapify1to3(Memory.wrap(bytes), seedHash); + return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash); } else if (serVer == 2) { - return ForwardCompatibility.heapify2to3(Memory.wrap(bytes), + return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); } throw new SketchesArgumentException( @@ -328,7 +325,7 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo //Sketch Overrides @Override - public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem); + public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); @Override public int getCompactBytes() { @@ -345,11 +342,26 @@ public Family getFamily() { return Family.COMPACT; } + @Override + public boolean hasMemorySegment() { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).hasMemorySegment()); + } + @Override public boolean isCompact() { return true; } + @Override + public boolean isDirect() { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isDirect()); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isSameResource(that)); + } + @Override public double getEstimate() { return Sketch.estimate(getThetaLong(), getRetainedEntries()); @@ -390,23 +402,23 @@ private byte[] toByteArrayV4() { final int sizeBytes = preambleLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(compressedBits); final byte[] bytes = new byte[sizeBytes]; - final WritableMemory mem = WritableMemory.writableWrap(bytes); + final MemorySegment wseg = MemorySegment.ofArray(bytes); int offsetBytes = 0; - mem.putByte(offsetBytes++, (byte) preambleLongs); - mem.putByte(offsetBytes++, (byte) 4); // to do: add constant - mem.putByte(offsetBytes++, (byte) Family.COMPACT.getID()); - mem.putByte(offsetBytes++, (byte) entryBits); - mem.putByte(offsetBytes++, (byte) numEntriesBytes); - mem.putByte(offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK)); - mem.putShort(offsetBytes, getSeedHash()); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) preambleLongs); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) 4); // to do: add constant + wseg.set(JAVA_BYTE, offsetBytes++, (byte) Family.COMPACT.getID()); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) entryBits); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) numEntriesBytes); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK)); + wseg.set(JAVA_SHORT_UNALIGNED, offsetBytes, getSeedHash()); offsetBytes += Short.BYTES; if (isEstimationMode()) { - mem.putLong(offsetBytes, getThetaLong()); + wseg.set(JAVA_LONG_UNALIGNED, offsetBytes, getThetaLong()); offsetBytes += Long.BYTES; } int numEntries = getRetainedEntries(); for (int i = 0; i < numEntriesBytes; i++) { - mem.putByte(offsetBytes++, (byte) (numEntries & 0xff)); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) (numEntries & 0xff)); numEntries >>>= 8; } long previous = 0; @@ -434,32 +446,32 @@ private byte[] toByteArrayV4() { return bytes; } - private static CompactSketch heapifyV4(final Memory srcMem, final long seed, final boolean enforceSeed) { - final int preLongs = extractPreLongs(srcMem); - final int entryBits = extractEntryBitsV4(srcMem); - final int numEntriesBytes = extractNumEntriesBytesV4(srcMem); - final short seedHash = (short) extractSeedHash(srcMem); - if (enforceSeed) { PreambleUtil.checkMemorySeedHash(srcMem, seed); } + private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int preLongs = extractPreLongs(srcSeg); + final int entryBits = extractEntryBitsV4(srcSeg); + final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg); + final short seedHash = (short) extractSeedHash(srcSeg); + if (enforceSeed) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } int offsetBytes = 8; long theta = Long.MAX_VALUE; if (preLongs > 1) { - theta = extractThetaLongV4(srcMem); + theta = extractThetaLongV4(srcSeg); offsetBytes += Long.BYTES; } int numEntries = 0; for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(srcMem.getByte(offsetBytes++)) << (i << 3); + numEntries |= Byte.toUnsignedInt(srcSeg.get(JAVA_BYTE, offsetBytes++)) << (i << 3); } final long[] entries = new long[numEntries]; final byte[] bytes = new byte[entryBits]; // temporary buffer for unpacking int i; for (i = 0; i + 7 < numEntries; i += 8) { - srcMem.getByteArray(offsetBytes, bytes, 0, entryBits); + MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, entryBits); BitPacking.unpackBitsBlock8(entries, i, bytes, 0, entryBits); offsetBytes += entryBits; } if (i < numEntries) { - srcMem.getByteArray(offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits)); + MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits)); int offsetBits = 0; offsetBytes = 0; for (; i < numEntries; i++) { diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java index 9ae9c9c57..c3d8bfaa5 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java @@ -29,7 +29,7 @@ * * @author eshcar */ -class ConcurrentBackgroundThetaPropagation implements Runnable { +final class ConcurrentBackgroundThetaPropagation implements Runnable { // Shared sketch to absorb the data private final ConcurrentSharedThetaSketch sharedThetaSketch; diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java index dbdedebd5..d151ffdf5 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java @@ -19,15 +19,16 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import java.lang.foreign.MemorySegment; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.memory.WritableMemory; /** * A concurrent shared sketch that is based on DirectQuickSelectSketch. @@ -63,18 +64,18 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch private volatile long epoch_; /** - * Construct a new sketch instance and initialize the given Memory as its backing store. + * Construct a new sketch instance and initialize the given MemorySegment as its backing store. * * @param lgNomLongs See lgNomLongs. * @param seed See Update Hash Seed. * @param maxConcurrencyError the max error value including error induced by concurrency. - * @param dstMem the given Memory object destination. It cannot be null. + * @param dstSeg the given MemorySegment object destination. It cannot be null. */ ConcurrentDirectQuickSelectSketch(final int lgNomLongs, final long seed, - final double maxConcurrencyError, final WritableMemory dstMem) { + final double maxConcurrencyError, final MemorySegment dstSeg) { super(lgNomLongs, seed, 1.0F, //p ResizeFactor.X1, //rf, - null, dstMem, false); //unionGadget + dstSeg, false); //unionGadget volatileThetaLong_ = Long.MAX_VALUE; volatileEstimate_ = 0; @@ -86,11 +87,10 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch } ConcurrentDirectQuickSelectSketch(final UpdateSketch sketch, final long seed, - final double maxConcurrencyError, final WritableMemory dstMem) { + final double maxConcurrencyError, final MemorySegment dstSeg) { super(sketch.getLgNomLongs(), seed, 1.0F, //p ResizeFactor.X1, //rf, - null, //mem Req Svr - dstMem, + dstSeg, false); //unionGadget exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), @@ -101,7 +101,7 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch for (final long hashIn : sketch.getCache()) { propagate(hashIn); } - wmem_.putLong(THETA_LONG, sketch.getThetaLong()); + wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, sketch.getThetaLong()); updateVolatileTheta(); updateEstimationSnapshot(); } diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java index e7b3ddaac..d3b706aa8 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java @@ -23,6 +23,7 @@ import static org.apache.datasketches.theta.UpdateReturnState.ConcurrentPropagated; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; +import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.datasketches.common.ResizeFactor; @@ -148,8 +149,8 @@ public double getUpperBound(final int numStdDev) { } @Override - public boolean hasMemory() { - return shared.hasMemory(); + public boolean hasMemorySegment() { + return shared.hasMemorySegment(); } @Override @@ -167,6 +168,11 @@ public boolean isEstimationMode() { return shared.isEstimationMode(); } + @Override + public boolean isSameResource(final MemorySegment that) { + return shared.isSameResource(that); + } + //End of proxies @Override diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java index 1bbdcc12a..3dda0c31f 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java @@ -19,10 +19,10 @@ package org.apache.datasketches.theta; +import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.datasketches.common.MemoryStatus; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.MemorySegmentStatus; /** * An internal interface to define the API of a concurrent shared theta sketch. @@ -31,7 +31,7 @@ * * @author eshcar */ -interface ConcurrentSharedThetaSketch extends MemoryStatus { +interface ConcurrentSharedThetaSketch extends MemorySegmentStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; @@ -150,7 +150,7 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s CompactSketch compact(); - CompactSketch compact(boolean ordered, WritableMemory wmem); + CompactSketch compact(boolean ordered, MemorySegment wseg); UpdateSketch rebuild(); diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java index 60c38afb2..2bf154215 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractEntryBitsV4; import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; @@ -26,57 +27,57 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4; import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. * *

              This sketch can only be associated with a Serialization Version 4 format binary image.

              * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              */ -class DirectCompactCompressedSketch extends DirectCompactSketch { +final class DirectCompactCompressedSketch extends DirectCompactSketch { /** - * Construct this sketch with the given memory. - * @param mem Read-only Memory object. + * Construct this sketch with the given MemorySegment. + * @param seg Read-only MemorySegment object. */ - DirectCompactCompressedSketch(final Memory mem) { - super(mem); + DirectCompactCompressedSketch(final MemorySegment seg) { + super(seg); } /** - * Wraps the given Memory, which must be a SerVer 4 compressed CompactSketch image. - * Must check the validity of the Memory before calling. - * @param srcMem See Memory + * Wraps the given MemorySegment, which must be a SerVer 4 compressed CompactSketch image. + * Must check the validity of the MemorySegment before calling. + * @param srcSeg The source MemorySegment * @param seedHash The update seedHash. * See Seed Hash. * @return this sketch */ - static DirectCompactCompressedSketch wrapInstance(final Memory srcMem, final short seedHash) { - Util.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); - return new DirectCompactCompressedSketch(srcMem); + static DirectCompactCompressedSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { + Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + return new DirectCompactCompressedSketch(srcSeg); } //Sketch Overrides @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - if (dstMem != null) { - mem_.copyTo(0, dstMem, 0, getCurrentBytes()); - return new DirectCompactSketch(dstMem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg != null) { + MemorySegment.copy(seg_, 0, dstSeg, 0, getCurrentBytes()); + return new DirectCompactSketch(dstSeg); } - return CompactSketch.heapify(mem_); + return CompactSketch.heapify(seg_); } @Override public int getCurrentBytes() { - final int preLongs = extractPreLongs(mem_); - final int entryBits = extractEntryBitsV4(mem_); - final int numEntriesBytes = extractNumEntriesBytesV4(mem_); + final int preLongs = extractPreLongs(seg_); + final int entryBits = extractEntryBitsV4(seg_); + final int numEntriesBytes = extractNumEntriesBytesV4(seg_); return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits); } @@ -88,20 +89,20 @@ public int getRetainedEntries(final boolean valid) { //compact is always valid // number of entries is stored using variable length encoding // most significant bytes with all zeros are not stored // one byte in the preamble has the number of non-zero bytes used - final int preLongs = extractPreLongs(mem_); // if > 1 then the second long has theta - final int numEntriesBytes = extractNumEntriesBytesV4(mem_); + final int preLongs = extractPreLongs(seg_); // if > 1 then the second long has theta + final int numEntriesBytes = extractNumEntriesBytesV4(seg_); int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE; int numEntries = 0; for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(mem_.getByte(offsetBytes++)) << (i << 3); + numEntries |= Byte.toUnsignedInt(seg_.get(JAVA_BYTE, offsetBytes++)) << (i << 3); } return numEntries; } @Override public long getThetaLong() { - final int preLongs = extractPreLongs(mem_); - return (preLongs > 1) ? extractThetaLongV4(mem_) : Long.MAX_VALUE; + final int preLongs = extractPreLongs(seg_); + return (preLongs > 1) ? extractThetaLongV4(seg_) : Long.MAX_VALUE; } @Override @@ -116,11 +117,11 @@ public boolean isOrdered() { @Override public HashIterator iterator() { - return new MemoryCompactCompressedHashIterator( - mem_, - (extractPreLongs(mem_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) - + extractNumEntriesBytesV4(mem_), - extractEntryBitsV4(mem_), + return new MemorySegmentCompactCompressedHashIterator( + seg_, + (extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) + + extractNumEntriesBytesV4(seg_), + extractEntryBitsV4(seg_), getRetainedEntries() ); } diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java index e2f3efc86..693329110 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java @@ -19,8 +19,10 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta.CompactOperations.segmentToCompact; import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; @@ -29,9 +31,9 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered @@ -39,78 +41,78 @@ * *

              This sketch can only be associated with a Serialization Version 3 format binary image.

              * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              * * @author Lee Rhodes */ class DirectCompactSketch extends CompactSketch { - final Memory mem_; + final MemorySegment seg_; /** - * Construct this sketch with the given memory. - * @param mem Read-only Memory object with the order bit properly set. + * Construct this sketch with the given MemorySegment. + * @param seg Read-only MemorySegment object with the order bit properly set. */ - DirectCompactSketch(final Memory mem) { - mem_ = mem; + DirectCompactSketch(final MemorySegment seg) { + seg_ = seg; } /** - * Wraps the given Memory, which must be a SerVer 3, CompactSketch image. - * Must check the validity of the Memory before calling. The order bit must be set properly. - * @param srcMem See Memory + * Wraps the given MemorySegment, which must be a SerVer 3, CompactSketch image. + * Must check the validity of the MemorySegment before calling. The order bit must be set properly. + * @param srcSeg the given MemorySegment * @param seedHash The update seedHash. * See Seed Hash. * @return this sketch */ - static DirectCompactSketch wrapInstance(final Memory srcMem, final short seedHash) { - Util.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); - return new DirectCompactSketch(srcMem); + static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { + Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + return new DirectCompactSketch(srcSeg); } //Sketch Overrides @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - return memoryToCompact(mem_, dstOrdered, dstMem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + return segmentToCompact(seg_, dstOrdered, dstSeg); } @Override public int getCurrentBytes() { - if (otherCheckForSingleItem(mem_)) { return 16; } - final int preLongs = extractPreLongs(mem_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_); + if (otherCheckForSingleItem(seg_)) { return 16; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); return (preLongs + curCount) << 3; } @Override public int getRetainedEntries(final boolean valid) { //compact is always valid - if (otherCheckForSingleItem(mem_)) { return 1; } - final int preLongs = extractPreLongs(mem_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_); + if (otherCheckForSingleItem(seg_)) { return 1; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); return curCount; } @Override public long getThetaLong() { - final int preLongs = extractPreLongs(mem_); - return (preLongs > 2) ? extractThetaLong(mem_) : Long.MAX_VALUE; + final int preLongs = extractPreLongs(seg_); + return (preLongs > 2) ? extractThetaLong(seg_) : Long.MAX_VALUE; } @Override - public boolean hasMemory() { - return mem_ != null; + public boolean hasMemorySegment() { + return seg_ != null && seg_.scope().isAlive(); } @Override public boolean isDirect() { - return hasMemory() ? mem_.isDirect() : false; + return hasMemorySegment() && seg_.isNative(); } @Override public boolean isEmpty() { - final boolean emptyFlag = PreambleUtil.isEmptyFlag(mem_); + final boolean emptyFlag = PreambleUtil.isEmptyFlag(seg_); final long thetaLong = getThetaLong(); final int curCount = getRetainedEntries(true); return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); @@ -118,17 +120,18 @@ public boolean isEmpty() { @Override public boolean isOrdered() { - return (extractFlags(mem_) & ORDERED_FLAG_MASK) > 0; + return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0; } @Override - public boolean isSameResource(final Memory that) { - return hasMemory() ? mem_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(seg_, that); + } @Override public HashIterator iterator() { - return new MemoryHashIterator(mem_, getRetainedEntries(true), getThetaLong()); + return new MemorySegmentHashIterator(seg_, getRetainedEntries(true), getThetaLong()); } @Override @@ -136,7 +139,7 @@ public byte[] toByteArray() { checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries()); final int outBytes = getCurrentBytes(); final byte[] byteArrOut = new byte[outBytes]; - mem_.getByteArray(0, byteArrOut, 0, outBytes); + MemorySegment.copy(seg_, JAVA_BYTE, 0, byteArrOut, 0, outBytes); return byteArrOut; } @@ -144,12 +147,12 @@ public byte[] toByteArray() { @Override long[] getCache() { - if (otherCheckForSingleItem(mem_)) { return new long[] { mem_.getLong(8) }; } - final int preLongs = extractPreLongs(mem_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_); + if (otherCheckForSingleItem(seg_)) { return new long[] { seg_.get(JAVA_LONG_UNALIGNED, 8) }; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); if (curCount > 0) { final long[] cache = new long[curCount]; - mem_.getLongArray(preLongs << 3, cache, 0, curCount); + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, preLongs << 3, cache, 0, curCount); return cache; } return new long[0]; @@ -157,21 +160,21 @@ long[] getCache() { @Override int getCompactPreambleLongs() { - return extractPreLongs(mem_); + return extractPreLongs(seg_); } @Override int getCurrentPreambleLongs() { - return extractPreLongs(mem_); + return extractPreLongs(seg_); } @Override - Memory getMemory() { - return mem_; + MemorySegment getMemorySegment() { + return seg_; } @Override short getSeedHash() { - return (short) extractSeedHash(mem_); + return (short) extractSeedHash(seg_); } } diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index cd18a093d..7356d4d5f 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -19,6 +19,10 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; @@ -30,7 +34,7 @@ import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs; import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta.PreambleUtil.getSegBytes; import static org.apache.datasketches.theta.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID; import static org.apache.datasketches.theta.PreambleUtil.insertFlags; @@ -53,13 +57,12 @@ import static org.apache.datasketches.theta.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -67,35 +70,31 @@ * The default Theta Sketch using the QuickSelect algorithm. * This subclass implements methods, which affect the state (update, rebuild, reset) * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              * * @author Lee Rhodes * @author Kevin Lang */ class DirectQuickSelectSketch extends DirectQuickSelectSketchR { - MemoryRequestServer memReqSvr_ = null; //never serialized private DirectQuickSelectSketch( final long seed, - final WritableMemory wmem) { - super(seed, wmem); + final MemorySegment wseg) { + super(seed, wseg); } /** - * Construct a new sketch instance and initialize the given Memory as its backing store. + * Construct a new sketch instance and initialize the given MemorySegment as its backing store. * * @param lgNomLongs See lgNomLongs. * @param seed See Update Hash Seed. * @param p * See Sampling Probability, p - * @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid - * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the - * dstMem must be large enough for a full sketch. + * @param rf Resize Factor * See Resize Factor - * @param memReqSvr the given MemoryRequestServer - * @param dstMem the given Memory object destination. It cannot be null. + * @param dstSeg the given MemorySegment object destination. It cannot be null. * It will be cleared prior to use. * @param unionGadget true if this sketch is implementing the Union gadget function. * Otherwise, it is behaving as a normal QuickSelectSketch. @@ -105,19 +104,17 @@ private DirectQuickSelectSketch( final long seed, final float p, final ResizeFactor rf, - final MemoryRequestServer memReqSvr, - final WritableMemory dstMem, + final MemorySegment dstSeg, final boolean unionGadget) { this( - checkMemSize(lgNomLongs, rf, dstMem, unionGadget), + checkSegSize(lgNomLongs, rf, dstSeg, unionGadget), //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J lgNomLongs, seed, p, rf, - memReqSvr, - dstMem, + dstSeg, unionGadget); } @@ -127,10 +124,9 @@ private DirectQuickSelectSketch( final long seed, final float p, final ResizeFactor rf, - final MemoryRequestServer memReqSvr, - final WritableMemory dstMem, + final MemorySegment dstSeg, final boolean unionGadget) { - super(seed, dstMem); + super(seed, dstSeg); //Choose family, preambleLongs final Family family; final int preambleLongs; @@ -149,86 +145,83 @@ private DirectQuickSelectSketch( //@formatter:off //Build preamble - insertPreLongs(dstMem, preambleLongs); //byte 0 - insertLgResizeFactor(dstMem, lgRF); //byte 0 - insertSerVer(dstMem, SER_VER); //byte 1 - insertFamilyID(dstMem, family.getID()); //byte 2 - insertLgNomLongs(dstMem, lgNomLongs); //byte 3 - insertLgArrLongs(dstMem, lgArrLongs); //byte 4 + insertPreLongs(dstSeg, preambleLongs); //byte 0 + insertLgResizeFactor(dstSeg, lgRF); //byte 0 + insertSerVer(dstSeg, SER_VER); //byte 1 + insertFamilyID(dstSeg, family.getID()); //byte 2 + insertLgNomLongs(dstSeg, lgNomLongs); //byte 3 + insertLgArrLongs(dstSeg, lgArrLongs); //byte 4 //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4 - insertFlags(dstMem, EMPTY_FLAG_MASK); //byte 5 - insertSeedHash(dstMem, Util.computeSeedHash(seed)); //bytes 6,7 - insertCurCount(dstMem, 0); //bytes 8-11 - insertP(dstMem, p); //bytes 12-15 + insertFlags(dstSeg, EMPTY_FLAG_MASK); //byte 5 + insertSeedHash(dstSeg, Util.computeSeedHash(seed)); //bytes 6,7 + insertCurCount(dstSeg, 0); //bytes 8-11 + insertP(dstSeg, p); //bytes 12-15 final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); - insertThetaLong(dstMem, thetaLong); //bytes 16-23 + insertThetaLong(dstSeg, thetaLong); //bytes 16-23 if (unionGadget) { - insertUnionThetaLong(dstMem, thetaLong); + insertUnionThetaLong(dstSeg, thetaLong); } //@formatter:on //clear hash table area - dstMem.clear(preambleLongs << 3, 8 << lgArrLongs); + dstSeg.asSlice(preambleLongs << 3, Long.BYTES << lgArrLongs).fill((byte)0); hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - memReqSvr_ = memReqSvr; } - private static final boolean checkMemSize( - final int lgNomLongs, final ResizeFactor rf, final Memory dstMem, final boolean unionGadget) { + private static final boolean checkSegSize( + final int lgNomLongs, final ResizeFactor rf, final MemorySegment dstSeg, final boolean unionGadget) { final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); final int lgRF = rf.lg(); final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); - final long curMemCapBytes = dstMem.getCapacity(); - if (curMemCapBytes < minReqBytes) { + final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); + final long curSegCapBytes = dstSeg.byteSize(); + if (curSegCapBytes < minReqBytes) { throw new SketchesArgumentException( - "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); + "MemorySegment capacity is too small: " + curSegCapBytes + " < " + minReqBytes); } return true; } /** - * Wrap a sketch around the given source Memory containing sketch data that originated from + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from * this sketch. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * @param srcSeg The given MemorySegment object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final long seed) { - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - UpdateSketch.checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); + UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); - if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) { + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { //If incorrect it sets it to X2 which always works. - insertLgResizeFactor(srcMem, ResizeFactor.X2.lg()); + insertLgResizeFactor(srcSeg, ResizeFactor.X2.lg()); } final DirectQuickSelectSketch dqss = - new DirectQuickSelectSketch(seed, srcMem); + new DirectQuickSelectSketch(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } /** - * Fast-wrap a sketch around the given source Memory containing sketch data that originated from - * this sketch. This does NO validity checking of the given Memory. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. This does NO validity checking of the given MemorySegment. + * @param srcSeg The given MemorySegment must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, final long seed) { - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static DirectQuickSelectSketch fastWritableWrap(final MemorySegment srcSeg, final long seed) { + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 final DirectQuickSelectSketch dqss = - new DirectQuickSelectSketch(seed, srcMem); + new DirectQuickSelectSketch(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -240,9 +233,9 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin @Override public UpdateSketch rebuild() { final int lgNomLongs = getLgNomLongs(); - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; if (getRetainedEntries(true) > (1 << lgNomLongs)) { - quickSelectAndRebuild(wmem_, preambleLongs, lgNomLongs); + quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); } return this; } @@ -254,15 +247,15 @@ public void reset() { //lgArrLongs stays the same //thetaLongs resets to p final int arrLongs = 1 << getLgArrLongs(); - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final int preBytes = preambleLongs << 3; - wmem_.clear(preBytes, arrLongs * 8L); //clear data array + wseg_.asSlice(preBytes, arrLongs * 8L).fill((byte)0); //flags: bigEndian = readOnly = compact = ordered = false; empty = true. - wmem_.putByte(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); - wmem_.putInt(RETAINED_ENTRIES_INT, 0); - final float p = wmem_.getFloat(P_FLOAT); + wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); + wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + final float p = wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); final long thetaLong = (long) (p * LONG_MAX_VALUE_AS_DOUBLE); - wmem_.putLong(THETA_LONG, thetaLong); + wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); } //restricted methods @@ -271,7 +264,7 @@ public void reset() { UpdateReturnState hashUpdate(final long hash) { HashOperations.checkHashCorruption(hash); - wmem_.putByte(FLAGS_BYTE, (byte) (wmem_.getByte(FLAGS_BYTE) & ~EMPTY_FLAG_MASK)); + wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) (wseg_.get(JAVA_BYTE, FLAGS_BYTE) & ~EMPTY_FLAG_MASK)); final long thetaLong = getThetaLong(); final int lgNomLongs = getLgNomLongs(); //The over-theta test @@ -280,64 +273,54 @@ UpdateReturnState hashUpdate(final long hash) { } final int lgArrLongs = getLgArrLongs(); - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //The duplicate test final int index = - HashOperations.hashSearchOrInsertMemory(wmem_, lgArrLongs, hash, preambleLongs << 3); + HashOperations.hashSearchOrInsertMemorySegment(wseg_, lgArrLongs, hash, preambleLongs << 3); if (index >= 0) { return RejectedDuplicate; //Duplicate, not inserted } //insertion occurred, increment curCount final int curCount = getRetainedEntries(true) + 1; - wmem_.putInt(RETAINED_ENTRIES_INT, curCount); //update curCount + wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); //update curCount if (isOutOfSpace(curCount)) { //we need to do something, we are out of space if (lgArrLongs > lgNomLongs) { //at full size, rebuild //Assumes no dirty values, changes thetaLong, curCount_ - assert (lgArrLongs == (lgNomLongs + 1)) - : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs; + assert (lgArrLongs == (lgNomLongs + 1)) : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs; //rebuild, refresh curCount based on # values in the hashtable. - quickSelectAndRebuild(wmem_, preambleLongs, lgNomLongs); + quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); return InsertedCountIncrementedRebuilt; } //end of rebuild, exit - else { //Not at full size, resize. Should not get here if lgRF = 0 and memCap is too small. + else { //Not at full size, resize. Should not get here if lgRF = 0 and segCap is too small. final int lgRF = getLgRF(); - final int actLgRF = actLgResizeFactor(wmem_.getCapacity(), lgArrLongs, preambleLongs, lgRF); + final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF); int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1); - if (actLgRF > 0) { //Expand in current Memory + if (actLgRF > 0) { //Expand in current MemorySegment //lgArrLongs will change; thetaLong, curCount will not - resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs); + resize(wseg_, preambleLongs, lgArrLongs, tgtLgArrLongs); hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; - } //end of Expand in current memory, exit. + } //end of Expand in current MemorySegment, exit. else { - //Request more memory, then resize. lgArrLongs will change; thetaLong, curCount will not + //Request more space, then resize. lgArrLongs will change; thetaLong, curCount will not final int preBytes = preambleLongs << 3; tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1); final int tgtArrBytes = 8 << tgtLgArrLongs; final int reqBytes = tgtArrBytes + preBytes; + final MemorySegment newDstSeg = MemorySegment.ofArray(new byte[reqBytes]); - memReqSvr_ = (memReqSvr_ == null) ? wmem_.getMemoryRequestServer() : memReqSvr_; - - if (memReqSvr_ == null) { //in case the MRS is not enabled or null. - throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); - } - - final WritableMemory newDstMem = memReqSvr_.request(wmem_, reqBytes); - - moveAndResize(wmem_, preambleLongs, lgArrLongs, newDstMem, tgtLgArrLongs, thetaLong); - - memReqSvr_.requestClose(wmem_); + moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong); + wseg_ = newDstSeg; - wmem_ = newDstMem; hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; - } //end of Request more memory to resize + } //end of Request more space to resize } //end of resize } //end of isOutOfSpace return InsertedCountIncremented; diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index fb2aed2a5..2c25ee618 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -19,6 +19,10 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.CompactOperations.checkIllegalCurCountAndEmpty; import static org.apache.datasketches.theta.CompactOperations.computeCompactPreLongs; import static org.apache.datasketches.theta.CompactOperations.correctThetaOnCompact; @@ -37,20 +41,21 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** * The default Theta Sketch using the QuickSelect algorithm. * This is the read-only implementation with non-functional methods, which affect the state. * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              * * @author Lee Rhodes @@ -60,50 +65,48 @@ class DirectQuickSelectSketchR extends UpdateSketch { static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space final long seed_; //provided, kept only on heap, never serialized. int hashTableThreshold_; //computed, kept only on heap, never serialized. - WritableMemory wmem_; //A WritableMemory for child class, but no write methods here + MemorySegment wseg_; //A MemorySegment for child class, but no write methods here //only called by DirectQuickSelectSketch and below - DirectQuickSelectSketchR(final long seed, final WritableMemory wmem) { + DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) { seed_ = seed; - wmem_ = wmem; + wseg_ = wseg; } /** - * Wrap a sketch around the given source Memory containing sketch data that originated from + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from * this sketch. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * @param srcSeg the source MemorySegment. + * The given MemorySegment object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long seed) { - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - UpdateSketch.checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); + UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); final DirectQuickSelectSketchR dqssr = - new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); + new DirectQuickSelectSketchR(seed, srcSeg); dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqssr; } /** - * Fast-wrap a sketch around the given source Memory containing sketch data that originated from - * this sketch. This does NO validity checking of the given Memory. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. This does NO validity checking of the given MemorySegment. + * @param srcSeg The given MemorySegment object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long seed) { - final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF; - final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF; + static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, final long seed) { + final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; + final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - final DirectQuickSelectSketchR dqss = - new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); + final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -113,70 +116,70 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long @Override public int getCurrentBytes() { //not compact - final byte lgArrLongs = wmem_.getByte(LG_ARR_LONGS_BYTE); - final int preLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final byte lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE); + final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final int lengthBytes = (preLongs + (1 << lgArrLongs)) << 3; return lengthBytes; } @Override public double getEstimate() { - final int curCount = extractCurCount(wmem_); - final long thetaLong = extractThetaLong(wmem_); + final int curCount = extractCurCount(wseg_); + final long thetaLong = extractThetaLong(wseg_); return Sketch.estimate(thetaLong, curCount); } @Override public Family getFamily() { - final int familyID = wmem_.getByte(FAMILY_BYTE) & 0XFF; + final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; return Family.idToFamily(familyID); } @Override public int getRetainedEntries(final boolean valid) { //always valid - return wmem_.getInt(RETAINED_ENTRIES_INT); + return wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); } @Override public long getThetaLong() { - return isEmpty() ? Long.MAX_VALUE : wmem_.getLong(THETA_LONG); + return isEmpty() ? Long.MAX_VALUE : wseg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); } @Override - public boolean hasMemory() { - return wmem_ != null; + public boolean hasMemorySegment() { + return wseg_ != null && wseg_.scope().isAlive(); } @Override public boolean isDirect() { - return hasMemory() ? wmem_.isDirect() : false; + return hasMemorySegment() && wseg_.isNative(); } @Override public boolean isEmpty() { - return PreambleUtil.isEmptyFlag(wmem_); + return PreambleUtil.isEmptyFlag(wseg_); } @Override - public boolean isSameResource(final Memory that) { - return hasMemory() ? wmem_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(wseg_, that); } @Override public HashIterator iterator() { - return new MemoryHashIterator(wmem_, 1 << getLgArrLongs(), getThetaLong()); + return new MemorySegmentHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); } @Override - public byte[] toByteArray() { //MY_FAMILY is stored in wmem_ - checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wmem_)); + public byte[] toByteArray() { //MY_FAMILY is stored in wseg_ + checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wseg_)); final int lengthBytes = getCurrentBytes(); final byte[] byteArray = new byte[lengthBytes]; - final WritableMemory mem = WritableMemory.writableWrap(byteArray); - wmem_.copyTo(0, mem, 0, lengthBytes); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + MemorySegment.copy(wseg_, 0, seg, 0, lengthBytes); final long thetaLong = - correctThetaOnCompact(isEmpty(), extractCurCount(wmem_), extractThetaLong(wmem_)); - insertThetaLong(wmem_, thetaLong); + correctThetaOnCompact(isEmpty(), extractCurCount(wseg_), extractThetaLong(wseg_)); + insertThetaLong(wseg_, thetaLong); return byteArray; } @@ -184,12 +187,12 @@ public byte[] toByteArray() { //MY_FAMILY is stored in wmem_ @Override public final int getLgNomLongs() { - return PreambleUtil.extractLgNomLongs(wmem_); + return PreambleUtil.extractLgNomLongs(wseg_); } @Override float getP() { - return wmem_.getFloat(P_FLOAT); + return wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); } @Override @@ -216,11 +219,10 @@ public void reset() { @Override long[] getCache() { - final long lgArrLongs = wmem_.getByte(LG_ARR_LONGS_BYTE) & 0XFF; - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final long[] cacheArr = new long[1 << lgArrLongs]; - final WritableMemory mem = WritableMemory.writableWrap(cacheArr); - wmem_.copyTo(preambleLongs << 3, mem, 0, 8 << lgArrLongs); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3, cacheArr, 0, 1 << lgArrLongs); return cacheArr; } @@ -231,17 +233,17 @@ int getCompactPreambleLongs() { @Override int getCurrentPreambleLongs() { - return PreambleUtil.extractPreLongs(wmem_); + return PreambleUtil.extractPreLongs(wseg_); } @Override - WritableMemory getMemory() { - return wmem_; + MemorySegment getMemorySegment() { + return wseg_; } @Override short getSeedHash() { - return (short) PreambleUtil.extractSeedHash(wmem_); + return (short) PreambleUtil.extractSeedHash(wseg_); } @Override @@ -256,11 +258,11 @@ boolean isOutOfSpace(final int numEntries) { @Override int getLgArrLongs() { - return wmem_.getByte(LG_ARR_LONGS_BYTE) & 0XFF; + return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; } int getLgRF() { //only Direct needs this - return (wmem_.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; + return (wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java index 8f6e4972a..45a17d40d 100644 --- a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java @@ -19,9 +19,12 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Singleton empty CompactSketch. @@ -46,14 +49,14 @@ static synchronized EmptyCompactSketch getInstance() { } //This should be a heapify - static synchronized EmptyCompactSketch getHeapInstance(final Memory srcMem) { - final long pre0 = srcMem.getLong(0); + static synchronized EmptyCompactSketch getHeapInstance(final MemorySegment srcSeg) { + final long pre0 = srcSeg.get(JAVA_LONG_UNALIGNED, 0); if (testCandidatePre0(pre0)) { return EMPTY_COMPACT_SKETCH; } final long maskedPre0 = pre0 & EMPTY_SKETCH_MASK; - throw new SketchesArgumentException("Input Memory does not match required Preamble. " - + "Memory Pre0: " + Long.toHexString(maskedPre0) + throw new SketchesArgumentException("Input MemorySegment does not match required Preamble. " + + "MemorySegment Pre0: " + Long.toHexString(maskedPre0) + ", required Pre0: " + Long.toHexString(EMPTY_SKETCH_TEST)); } @@ -61,10 +64,11 @@ static synchronized EmptyCompactSketch getHeapInstance(final Memory srcMem) { // This returns with ordered flag = true independent of dstOrdered. // This is required for fast detection. // The hashSeed is ignored and set == 0. - public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) { - if (wmem == null) { return EmptyCompactSketch.getInstance(); } - wmem.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8); - return new DirectCompactSketch(wmem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { + if (dstWSeg == null) { return EmptyCompactSketch.getInstance(); } + //dstWSeg.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8); + MemorySegment.copy(EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); + return new DirectCompactSketch(dstWSeg); } //static @@ -131,11 +135,6 @@ int getCurrentPreambleLongs() { return 1; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return 0; diff --git a/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java index 16172d247..723a8b651 100644 --- a/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java +++ b/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java @@ -19,13 +19,15 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; /** * Used to convert older serialization versions 1 and 2 to version 3. The Serialization @@ -36,13 +38,15 @@ */ final class ForwardCompatibility { + private ForwardCompatibility() { } + /** * Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch. * Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored * in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will * be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit. * - * @param srcMem the image of a SerVer 1 sketch + * @param srcSeg the image of a SerVer 1 sketch * * @param seedHash See Seed Hash. * The seedHash that matches the seedHash of the original seed used to construct the sketch. @@ -50,35 +54,35 @@ final class ForwardCompatibility { * MUST be derived from the actual seed that was used when the SerVer 1 sketches were built. * @return a SerVer 3 {@link CompactSketch}. */ - static final CompactSketch heapify1to3(final Memory srcMem, final short seedHash) { - final int memCap = (int) srcMem.getCapacity(); - final int preLongs = extractPreLongs(srcMem); //always 3 for serVer 1 + static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short seedHash) { + final int segCap = (int) srcSeg.byteSize(); + final int preLongs = extractPreLongs(srcSeg); //always 3 for serVer 1 if (preLongs != 3) { throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs); } - final int familyId = extractFamilyID(srcMem); //1,2,3 + final int familyId = extractFamilyID(srcSeg); //1,2,3 if ((familyId < 1) || (familyId > 3)) { throw new SketchesArgumentException("Family ID (Sketch Type) must be 1 to 3: " + familyId); } - final int curCount = extractCurCount(srcMem); - final long thetaLong = extractThetaLong(srcMem); + final int curCount = extractCurCount(srcSeg); + final long thetaLong = extractThetaLong(srcSeg); final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE); - if (empty || (memCap <= 24)) { //return empty + if (empty || (segCap <= 24)) { //return empty return EmptyCompactSketch.getInstance(); } final int reqCap = (curCount + preLongs) << 3; - validateInputSize(reqCap, memCap); + validateInputSize(reqCap, segCap); if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) { - final long hash = srcMem.getLong(preLongs << 3); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); return new SingleItemSketch(hash, seedHash); } //theta < 1.0 and/or curCount > 1 final long[] compactOrderedCache = new long[curCount]; - srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); } @@ -86,15 +90,15 @@ static final CompactSketch heapify1to3(final Memory srcMem, final short seedHash * Convert a serialization version (SerVer) 2 sketch to a SerVer 3 HeapCompactOrderedSketch. * Note: SerVer 2 sketches can have metadata-longs of 1,2 or 3 and are always stored * in a compact ordered form (not as a hash table), but with 4 different sketch types. - * @param srcMem the image of a SerVer 2 sketch + * @param srcSeg the image of a SerVer 2 sketch * @param seedHash See Seed Hash. * The seed used for building the sketch image in srcMem * @return a SerVer 3 HeapCompactOrderedSketch */ - static final CompactSketch heapify2to3(final Memory srcMem, final short seedHash) { - final int memCap = (int) srcMem.getCapacity(); - final int preLongs = extractPreLongs(srcMem); //1,2 or 3 - final int familyId = extractFamilyID(srcMem); //1,2,3,4 + static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short seedHash) { + final int segCap = (int) srcSeg.byteSize(); + final int preLongs = extractPreLongs(srcSeg); //1,2 or 3 + final int familyId = extractFamilyID(srcSeg); //1,2,3,4 if ((familyId < 1) || (familyId > 4)) { throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId); } @@ -103,58 +107,59 @@ static final CompactSketch heapify2to3(final Memory srcMem, final short seedHash long thetaLong = Long.MAX_VALUE; if (preLongs == 1) { reqBytesIn = 8; - validateInputSize(reqBytesIn, memCap); + validateInputSize(reqBytesIn, segCap); return EmptyCompactSketch.getInstance(); } if (preLongs == 2) { //includes pre0 + count, no theta (== 1.0) reqBytesIn = preLongs << 3; - validateInputSize(reqBytesIn, memCap); - curCount = extractCurCount(srcMem); + validateInputSize(reqBytesIn, segCap); + curCount = extractCurCount(srcSeg); if (curCount == 0) { return EmptyCompactSketch.getInstance(); } if (curCount == 1) { reqBytesIn = (preLongs + 1) << 3; - validateInputSize(reqBytesIn, memCap); - final long hash = srcMem.getLong(preLongs << 3); + validateInputSize(reqBytesIn, segCap); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); return new SingleItemSketch(hash, seedHash); } //curCount > 1 reqBytesIn = (curCount + preLongs) << 3; - validateInputSize(reqBytesIn, memCap); + validateInputSize(reqBytesIn, segCap); final long[] compactOrderedCache = new long[curCount]; - srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong,true); } if (preLongs == 3) { //pre0 + count + theta reqBytesIn = (preLongs) << 3; // - validateInputSize(reqBytesIn, memCap); - curCount = extractCurCount(srcMem); - thetaLong = extractThetaLong(srcMem); + validateInputSize(reqBytesIn, segCap); + curCount = extractCurCount(srcSeg); + thetaLong = extractThetaLong(srcSeg); if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) { return EmptyCompactSketch.getInstance(); } if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) { reqBytesIn = (preLongs + 1) << 3; - validateInputSize(reqBytesIn, memCap); - final long hash = srcMem.getLong(preLongs << 3); + validateInputSize(reqBytesIn, segCap); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); return new SingleItemSketch(hash, seedHash); } //curCount > 1 and/or theta < 1.0 reqBytesIn = (curCount + preLongs) << 3; - validateInputSize(reqBytesIn, memCap); + validateInputSize(reqBytesIn, segCap); final long[] compactOrderedCache = new long[curCount]; - srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + //srcSeg.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); } throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs); } - private static final void validateInputSize(final int reqBytesIn, final int memCap) { - if (reqBytesIn > memCap) { + private static final void validateInputSize(final int reqBytesIn, final int segCap) { + if (reqBytesIn > segCap) { throw new SketchesArgumentException( - "Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn - + ", bytesIn: " + memCap); + "Input MemorySegment or byte[] size is too small: Required Bytes: " + reqBytesIn + + ", bytesIn: " + segCap); } } diff --git a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java index 3ee7c4880..b4b7848c5 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java @@ -22,6 +22,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.Math.sqrt; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.common.Util.checkBounds; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; @@ -38,13 +39,12 @@ import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; import static org.apache.datasketches.thetacommon.HashOperations.STRIDE_MASK; +import java.lang.foreign.MemorySegment; import java.util.Objects; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -113,44 +113,43 @@ static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, fi } /** - * Heapify a sketch from a Memory object containing sketch data. - * @param srcMem The source Memory object. - * See Memory + * Heapify a sketch from a MemorySegment object containing sketch data. + * @param srcSeg The source MemorySegment object. * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return instance of this sketch */ - static HeapAlphaSketch heapifyInstance(final Memory srcMem, final long expectedSeed) { - Objects.requireNonNull(srcMem, "Source Memory must not be null"); - checkBounds(0, 24, srcMem.getCapacity()); - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { + Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); + checkBounds(0, 24, srcSeg.byteSize()); + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - checkAlphaFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); + checkAlphaFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); - final float p = extractP(srcMem); //bytes 12-15 - final int memlgRF = extractLgResizeFactor(srcMem); //byte 0 - ResizeFactor memRF = ResizeFactor.getRF(memlgRF); + final float p = extractP(srcSeg); //bytes 12-15 + final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor segRF = ResizeFactor.getRF(seglgRF); final double nomLongs = (1L << lgNomLongs); final double alpha = nomLongs / (nomLongs + 1.0); final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); - if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) { - memRF = ResizeFactor.X2; //X2 always works. + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + segRF = ResizeFactor.X2; //X2 always works. } - final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, memRF, alpha, split1); + final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, segRF, alpha, split1); has.lgArrLongs_ = lgArrLongs; has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); - has.curCount_ = extractCurCount(srcMem); - has.thetaLong_ = extractThetaLong(srcMem); - has.empty_ = PreambleUtil.isEmptyFlag(srcMem); + has.curCount_ = extractCurCount(srcSeg); + has.thetaLong_ = extractThetaLong(srcSeg); + has.empty_ = PreambleUtil.isEmptyFlag(srcSeg); has.cache_ = new long[1 << lgArrLongs]; - srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table return has; } @@ -294,11 +293,6 @@ int getCurrentPreambleLongs() { return Family.ALPHA.getMinPreLongs(); } - @Override - WritableMemory getMemory() { - return null; - } - @Override long[] getCache() { return cache_; @@ -576,9 +570,9 @@ private static final int setHashTableThreshold(final int lgNomLongs, final int l return (int) Math.floor(fraction * (1 << lgArrLongs)); } - static void checkAlphaFamily(final Memory mem, final int preambleLongs, final int lgNomLongs) { + static void checkAlphaFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { //Check Family - final int familyID = extractFamilyID(mem); //byte 2 + final int familyID = extractFamilyID(seg); //byte 2 final Family family = Family.idToFamily(familyID); if (family.equals(Family.ALPHA)) { if (preambleLongs != Family.ALPHA.getMinPreLongs()) { diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java index d5a6289c7..e8e5d8305 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java @@ -19,7 +19,7 @@ package org.apache.datasketches.theta; -class HeapCompactHashIterator implements HashIterator { +final class HeapCompactHashIterator implements HashIterator { private long[] cache; private int index; diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java index 2572ce5d5..6b5708901 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java @@ -24,22 +24,21 @@ import static org.apache.datasketches.theta.CompactOperations.computeCompactPreLongs; import static org.apache.datasketches.theta.CompactOperations.correctThetaOnCompact; import static org.apache.datasketches.theta.CompactOperations.isSingleItem; -import static org.apache.datasketches.theta.CompactOperations.loadCompactMemory; +import static org.apache.datasketches.theta.CompactOperations.loadCompactMemorySegment; import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import java.lang.foreign.MemorySegment; /** * Parent class of the Heap Compact Sketches. * * @author Lee Rhodes */ -class HeapCompactSketch extends CompactSketch { +final class HeapCompactSketch extends CompactSketch { private final long thetaLong_; //computed private final int curCount_; private final int preLongs_; //computed @@ -76,10 +75,10 @@ class HeapCompactSketch extends CompactSketch { //Sketch @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - if (dstMem == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), - true, ordered_, dstOrdered, dstMem, getCache().clone()); + true, ordered_, dstOrdered, dstSeg, getCache().clone()); } @Override @@ -129,30 +128,25 @@ int getCompactPreambleLongs() { return preLongs_; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return seedHash_; } - //use of Memory is convenient. The byteArray and Memory are loaded simultaneously. + //use of a MemorySegment is convenient. The byteArray and MemorySegment are loaded simultaneously. @Override public byte[] toByteArray() { final int bytes = getCurrentBytes(); final byte[] byteArray = new byte[bytes]; - final WritableMemory dstMem = WritableMemory.writableWrap(byteArray); + final MemorySegment dstSeg = MemorySegment.ofArray(byteArray); final int emptyBit = isEmpty() ? EMPTY_FLAG_MASK : 0; final int orderedBit = ordered_ ? ORDERED_FLAG_MASK : 0; final int singleItemBit = singleItem_ ? SINGLEITEM_FLAG_MASK : 0; final byte flags = (byte) (emptyBit | READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | orderedBit | singleItemBit); final int preLongs = getCompactPreambleLongs(); - loadCompactMemory(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), - dstMem, flags, preLongs); + loadCompactMemorySegment(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), + dstSeg, flags, preLongs); return byteArray; } diff --git a/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java index 9562da7d0..d689b912e 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java @@ -22,7 +22,7 @@ /** * @author Lee Rhodes */ -class HeapHashIterator implements HashIterator { +final class HeapHashIterator implements HashIterator { private long[] cache; private long thetaLong; private int index; diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java index cf7249ed8..46e469004 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java @@ -21,6 +21,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; @@ -37,10 +38,10 @@ import static org.apache.datasketches.theta.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -100,40 +101,39 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float } /** - * Heapify a sketch from a Memory UpdateSketch or Union object + * Heapify a sketch from a MemorySegment UpdateSketch or Union object * containing sketch data. - * @param srcMem The source Memory object. - * See Memory + * @param srcSeg The source MemorySegment object. * @param seed See seed * @return instance of this sketch */ - static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long seed) { - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 - - checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); - - final float p = extractP(srcMem); //bytes 12-15 - final int memlgRF = extractLgResizeFactor(srcMem); //byte 0 - ResizeFactor memRF = ResizeFactor.getRF(memlgRF); - final int familyID = extractFamilyID(srcMem); + static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + + final float p = extractP(srcSeg); //bytes 12-15 + final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor segRF = ResizeFactor.getRF(seglgRF); + final int familyID = extractFamilyID(srcSeg); final Family family = Family.idToFamily(familyID); - if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) { - memRF = ResizeFactor.X2; //X2 always works. + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + segRF = ResizeFactor.X2; //X2 always works. } - final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF, + final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, segRF, preambleLongs, family); hqss.lgArrLongs_ = lgArrLongs; hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); - hqss.curCount_ = extractCurCount(srcMem); - hqss.thetaLong_ = extractThetaLong(srcMem); - hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem); + hqss.curCount_ = extractCurCount(srcSeg); + hqss.thetaLong_ = extractThetaLong(srcSeg); + hqss.empty_ = PreambleUtil.isEmptyFlag(srcSeg); hqss.cache_ = new long[1 << lgArrLongs]; - srcMem.getLongArray(preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table return hqss; } @@ -230,11 +230,6 @@ int getLgArrLongs() { return lgArrLongs_; } - @Override - WritableMemory getMemory() { - return null; - } - @Override UpdateReturnState hashUpdate(final long hash) { HashOperations.checkHashCorruption(hash); diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java index b17af35db..87e1892b8 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.CompactOperations.checkIllegalCurCountAndEmpty; import static org.apache.datasketches.theta.CompactOperations.correctThetaOnCompact; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; @@ -35,9 +36,10 @@ import static org.apache.datasketches.theta.PreambleUtil.insertSerVer; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -103,33 +105,35 @@ byte[] toByteArray(final int preLongs, final byte familyID) { final int preBytes = (preLongs << 3) & 0X3F; //24 bytes final int dataBytes = getCurrentDataLongs() << 3; final byte[] byteArrOut = new byte[preBytes + dataBytes]; - final WritableMemory memOut = WritableMemory.writableWrap(byteArrOut); + + final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); //preamble first 8 bytes. Note: only compact can be reduced to 8 bytes. final int lgRf = getResizeFactor().lg() & 0x3; - insertPreLongs(memOut, preLongs); //byte 0 low 6 bits - insertLgResizeFactor(memOut, lgRf); //byte 0 high 2 bits - insertSerVer(memOut, SER_VER); //byte 1 - insertFamilyID(memOut, familyID); //byte 2 - insertLgNomLongs(memOut, getLgNomLongs()); //byte 3 - insertLgArrLongs(memOut, getLgArrLongs()); //byte 4 - insertSeedHash(memOut, getSeedHash()); //bytes 6 & 7 - - insertCurCount(memOut, this.getRetainedEntries(true)); - insertP(memOut, getP()); + insertPreLongs(segOut, preLongs); //byte 0 low 6 bits + insertLgResizeFactor(segOut, lgRf); //byte 0 high 2 bits + insertSerVer(segOut, SER_VER); //byte 1 + insertFamilyID(segOut, familyID); //byte 2 + insertLgNomLongs(segOut, getLgNomLongs()); //byte 3 + insertLgArrLongs(segOut, getLgArrLongs()); //byte 4 + insertSeedHash(segOut, getSeedHash()); //bytes 6 & 7 + + insertCurCount(segOut, this.getRetainedEntries(true)); + insertP(segOut, getP()); final long thetaLong = correctThetaOnCompact(isEmpty(), getRetainedEntries(true), getThetaLong()); - insertThetaLong(memOut, thetaLong); + insertThetaLong(segOut, thetaLong); //Flags: BigEnd=0, ReadOnly=0, Empty=X, compact=0, ordered=0 final byte flags = isEmpty() ? (byte) EMPTY_FLAG_MASK : 0; - insertFlags(memOut, flags); + insertFlags(segOut, flags); //Data final int arrLongs = 1 << getLgArrLongs(); final long[] cache = getCache(); - memOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut + //segOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut + MemorySegment.copy(cache, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, arrLongs); return byteArrOut; } diff --git a/src/main/java/org/apache/datasketches/theta/Intersection.java b/src/main/java/org/apache/datasketches/theta/Intersection.java index 111a9ce55..a31dc3ef9 100644 --- a/src/main/java/org/apache/datasketches/theta/Intersection.java +++ b/src/main/java/org/apache/datasketches/theta/Intersection.java @@ -19,21 +19,23 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.common.Util.floorPowerOf2; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; import static org.apache.datasketches.theta.PreambleUtil.extractSerVer; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -61,7 +63,7 @@ public CompactSketch getResult() { } /** - * Gets the result of this operation as a CompactSketch in the given dstMem. + * Gets the result of this operation as a CompactSketch in the given dstSeg. * This does not disturb the underlying data structure of this intersection. * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an * exception will be thrown. This is because a virgin Intersection object represents the @@ -77,13 +79,12 @@ public CompactSketch getResult() { * @param dstOrdered * See Destination Ordered * - * @param dstMem - * See Destination Memory. + * @param dstSeg the destination MemorySegment. * - * @return the result of this operation as a CompactSketch stored in the given dstMem, + * @return the result of this operation as a CompactSketch stored in the given dstSeg, * which can be either on or off-heap.. */ - public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem); + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); /** * Returns true if there is a valid intersection result available @@ -131,32 +132,57 @@ public CompactSketch intersect(final Sketch a, final Sketch b) { * @param b The second sketch argument * @param dstOrdered * See Destination Ordered. - * @param dstMem - * See Destination Memory. + * @param dstSeg the destination MemorySegment. * @return the result as a CompactSketch. */ public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered, - WritableMemory dstMem); + MemorySegment dstSeg); + + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. + * @return an Intersection that wraps a source MemorySegment that contains an Intersection image + */ + public static Intersection wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); + } + + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. + * @param expectedSeed See seed + * @return an Intersection that wraps a source MemorySegment that contains an Intersection image + */ + public static Intersection wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); + } // Restricted /** - * Returns the maximum lgArrLongs given the capacity of the Memory. - * @param dstMem the given Memory - * @return the maximum lgArrLongs given the capacity of the Memory + * Returns the maximum lgArrLongs given the capacity of the MemorySegment. + * @param dstSeg the given MemorySegment + * @return the maximum lgArrLongs given the capacity of the MemorySegment */ - protected static int getMaxLgArrLongs(final Memory dstMem) { + protected static int getMaxLgArrLongs(final MemorySegment dstSeg) { final int preBytes = CONST_PREAMBLE_LONGS << 3; - final long cap = dstMem.getCapacity(); + final long cap = dstSeg.byteSize(); return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3); } - protected static void checkMinSizeMemory(final Memory mem) { + protected static void checkMinSizeMemorySegment(final MemorySegment seg) { final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << ThetaUtil.MIN_LG_ARR_LONGS);//280 - final long cap = mem.getCapacity(); + final long cap = seg.byteSize(); if (cap < minBytes) { throw new SketchesArgumentException( - "Memory must be at least " + minBytes + " bytes. Actual capacity: " + cap); + "MemorySegment must be at least " + minBytes + " bytes. Actual capacity: " + cap); } } @@ -191,19 +217,19 @@ static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs return cacheOut; } - protected static void memChecks(final Memory srcMem) { + protected static void segChecks(final MemorySegment srcSeg) { //Get Preamble //Note: Intersection does not use lgNomLongs (or k), per se. //seedHash loaded and checked in private constructor - final int preLongs = extractPreLongs(srcMem); - final int serVer = extractSerVer(srcMem); - final int famID = extractFamilyID(srcMem); - final boolean empty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; - final int curCount = extractCurCount(srcMem); + final int preLongs = extractPreLongs(srcSeg); + final int serVer = extractSerVer(srcSeg); + final int famID = extractFamilyID(srcSeg); + final boolean empty = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; + final int curCount = extractCurCount(srcSeg); //Checks if (preLongs != CONST_PREAMBLE_LONGS) { throw new SketchesArgumentException( - "Memory PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs); + "MemorySegment PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs); } if (serVer != SER_VER) { throw new SketchesArgumentException("Serialization Version must equal " + SER_VER); @@ -212,7 +238,7 @@ protected static void memChecks(final Memory srcMem) { if (empty) { if (curCount != 0) { throw new SketchesArgumentException( - "srcMem empty state inconsistent with curCount: " + empty + "," + curCount); + "srcSeg empty state inconsistent with curCount: " + empty + "," + curCount); } //empty = true AND curCount_ = 0: OK } //else empty = false, curCount could be anything diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index ce63bd03b..6bf922be0 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -20,6 +20,13 @@ package org.apache.datasketches.theta; import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.common.Util.clearBits; +import static org.apache.datasketches.common.Util.setBits; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; @@ -47,10 +54,11 @@ import static org.apache.datasketches.theta.PreambleUtil.setEmpty; import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemory; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemorySegment; import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Family; @@ -58,26 +66,24 @@ import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** * Intersection operation for Theta Sketches. * - *

              This implementation uses data either on-heap or off-heap in a given Memory + *

              This implementation uses data either on-heap or off-heap in a given MemorySegment * that is owned and managed by the caller. - * The off-heap Memory, which if managed properly, will greatly reduce the need for + * The off-heap MemorySegment, which if managed properly, will greatly reduce the need for * the JVM to perform garbage collection.

              * * @author Lee Rhodes * @author Kevin Lang */ -class IntersectionImpl extends Intersection { +final class IntersectionImpl extends Intersection { protected final short seedHash_; protected final boolean readOnly_; //True if this sketch is to be treated as read only - protected final WritableMemory wmem_; - protected final int maxLgArrLongs_; //only used with WritableMemory, not serialized + protected final MemorySegment wseg_; + protected final int maxLgArrLongs_; //only used with MemorySegment, not serialized //Note: Intersection does not use lgNomLongs or k, per se. protected int lgArrLongs_; //current size of hash table @@ -88,28 +94,28 @@ class IntersectionImpl extends Intersection { /** * Constructor: Sets the class finals and computes, sets and checks the seedHash. - * @param wmem Can be either a Source(e.g. wrap) or Destination (new Direct) WritableMemory. + * @param wseg Can be either a Source(e.g. wrap) or Destination (new offHeap) MemorySegment. * @param seed Used to validate incoming sketch arguments. - * @param dstMemFlag The given memory is a Destination (new Direct) WritableMemory. - * @param readOnly True if memory is to be treated as read only. + * @param dstSegFlag The given MemorySegment is a Destination (new offHeap) MemorySegment. + * @param readOnly True if MemorySegment is to be treated as read only. */ - protected IntersectionImpl(final WritableMemory wmem, final long seed, final boolean dstMemFlag, + protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstSegFlag, final boolean readOnly) { readOnly_ = readOnly; - if (wmem != null) { - wmem_ = wmem; - if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking - checkMinSizeMemory(wmem); - maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wmem) : 0; //Only Off Heap + if (wseg != null) { + wseg_ = wseg; + if (dstSegFlag) { //DstSeg: compute & store seedHash, no seedHash checking + checkMinSizeMemorySegment(wseg); + maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap seedHash_ = Util.computeSeedHash(seed); - wmem_.putShort(SEED_HASH_SHORT, seedHash_); - } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed - seedHash_ = wmem_.getShort(SEED_HASH_SHORT); + wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + } else { //SrcSeg:gets and stores the seedHash, checks seg_seedHash against the seed + seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); //check for seed hash conflict maxLgArrLongs_ = 0; } } else { //compute & store seedHash - wmem_ = null; + wseg_ = null; maxLgArrLongs_ = 0; seedHash_ = Util.computeSeedHash(seed); } @@ -123,105 +129,101 @@ protected IntersectionImpl(final WritableMemory wmem, final long seed, final boo * @return a new IntersectionImpl on the Java heap */ static IntersectionImpl initNewHeapInstance(final long seed) { - final boolean dstMemFlag = false; + final boolean dstSegFlag = false; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); impl.hardReset(); return impl; } /** - * Factory: Construct a new Intersection target direct to the given destination Memory. + * Factory: Construct a new Intersection target direct to the given destination MemorySegment. * Called by SetOperationBuilder, test. * * @param seed See Seed - * @param dstMem destination Memory - * See Memory + * @param dstSeg destination MemorySegment * @return a new IntersectionImpl that may be off-heap */ - static IntersectionImpl initNewDirectInstance(final long seed, final WritableMemory dstMem) { + static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegment dstSeg) { //Load Preamble //Pre0 - dstMem.clear(0, CONST_PREAMBLE_LONGS << 3); - insertPreLongs(dstMem, CONST_PREAMBLE_LONGS); //RF not used = 0 - insertSerVer(dstMem, SER_VER); - insertFamilyID(dstMem, Family.INTERSECTION.getID()); + dstSeg.asSlice(0, CONST_PREAMBLE_LONGS << 3).fill((byte)0); + insertPreLongs(dstSeg, CONST_PREAMBLE_LONGS); //RF not used = 0 + insertSerVer(dstSeg, SER_VER); + insertFamilyID(dstSeg, Family.INTERSECTION.getID()); //lgNomLongs not used by Intersection //lgArrLongs set by hardReset //flags are already 0: bigEndian = readOnly = compact = ordered = empty = false; //seedHash loaded and checked in IntersectionImpl constructor //Pre1 //CurCount set by hardReset - insertP(dstMem, (float) 1.0); //not used by intersection + insertP(dstSeg, (float) 1.0); //not used by intersection //Pre2 //thetaLong set by hardReset //Initialize - final boolean dstMemFlag = true; + final boolean dstSegFlag = true; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(dstMem, seed, dstMemFlag, readOnly); + final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstSegFlag, readOnly); impl.hardReset(); return impl; } /** - * Factory: Heapify an intersection target from a Memory image containing data. - * @param srcMem The source Memory object. - * See Memory + * Factory: Heapify an intersection target from a MemorySegment image containing data. + * @param srcSeg The source MemorySegment object. * @param seed See seed * @return a IntersectionImpl instance on the Java heap */ - static IntersectionImpl heapifyInstance(final Memory srcMem, final long seed) { - final boolean dstMemFlag = false; + static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long seed) { + final boolean dstSegFlag = false; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); - memChecks(srcMem); + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); + segChecks(srcSeg); //Initialize - impl.lgArrLongs_ = extractLgArrLongs(srcMem); - impl.curCount_ = extractCurCount(srcMem); - impl.thetaLong_ = extractThetaLong(srcMem); - impl.empty_ = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; + impl.lgArrLongs_ = extractLgArrLongs(srcSeg); + impl.curCount_ = extractCurCount(srcSeg); + impl.thetaLong_ = extractThetaLong(srcSeg); + impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; if (!impl.empty_) { if (impl.curCount_ > 0) { impl.hashTable_ = new long[1 << impl.lgArrLongs_]; - srcMem.getLongArray(CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_); } } return impl; } /** - * Factory: Wrap an Intersection target around the given source WritableMemory containing - * intersection data. - * @param srcMem The source WritableMemory image. - * See Memory + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. * @param seed See seed - * @param readOnly True if memory is to be treated as read only - * @return a IntersectionImpl that wraps a source WritableMemory that contains an Intersection image + * @param readOnly True if MemorySegment is to be treated as read only + * @return a IntersectionImpl that wraps a source MemorySegment that contains an Intersection image */ static IntersectionImpl wrapInstance( - final WritableMemory srcMem, + final MemorySegment srcSeg, final long seed, final boolean readOnly) { - final boolean dstMemFlag = false; - final IntersectionImpl impl = new IntersectionImpl(srcMem, seed, dstMemFlag, readOnly); - memChecks(srcMem); - impl.lgArrLongs_ = extractLgArrLongs(srcMem); - impl.curCount_ = extractCurCount(srcMem); - impl.thetaLong_ = extractThetaLong(srcMem); - impl.empty_ = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; + final boolean dstSegFlag = false; + final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstSegFlag, readOnly); + segChecks(srcSeg); + impl.lgArrLongs_ = extractLgArrLongs(srcSeg); + impl.curCount_ = extractCurCount(srcSeg); + impl.thetaLong_ = extractThetaLong(srcSeg); + impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; return impl; } @Override - public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, - final WritableMemory dstMem) { - if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); } + public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, final MemorySegment dstSeg) { + if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } hardReset(); intersect(a); intersect(b); - final CompactSketch csk = getResult(dstOrdered, dstMem); + final CompactSketch csk = getResult(dstOrdered, dstSeg); hardReset(); return csk; } @@ -231,7 +233,7 @@ public void intersect(final Sketch sketchIn) { if (sketchIn == null) { throw new SketchesArgumentException("Intersection argument must not be null."); } - if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); } + if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } if (empty_ || sketchIn.isEmpty()) { //empty rule //Because of the def of null above and the Empty Rule (which is OR), empty_ must be true. //Whatever the current internal state, we make our local empty. @@ -242,9 +244,9 @@ public void intersect(final Sketch sketchIn) { //Set minTheta thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule empty_ = false; - if (wmem_ != null) { - insertThetaLong(wmem_, thetaLong_); - clearEmpty(wmem_); //false + if (wseg_ != null) { + insertThetaLong(wseg_, thetaLong_); + clearEmpty(wseg_); //false } // The truth table for the following state machine. MinTheta is set above. @@ -262,8 +264,8 @@ public void intersect(final Sketch sketchIn) { //states 1,2,3,6 if (curCount_ == 0 || sketchInEntries == 0) { curCount_ = 0; - if (wmem_ != null) { insertCurCount(wmem_, 0); } - hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid + if (wseg_ != null) { insertCurCount(wseg_, 0); } + hashTable_ = null; //No need for a HT. Don't bother clearing seg if valid } //end of states 1,2,3,6 // state 5 @@ -273,17 +275,17 @@ else if (curCount_ < 0 && sketchInEntries > 0) { final int priorLgArrLongs = lgArrLongs_; //prior only used in error message lgArrLongs_ = requiredLgArrLongs; - if (wmem_ != null) { //Off heap, check if current dstMem is large enough - insertCurCount(wmem_, curCount_); - insertLgArrLongs(wmem_, lgArrLongs_); + if (wseg_ != null) { //Off heap, check if current dstSeg is large enough + insertCurCount(wseg_, curCount_); + insertLgArrLongs(wseg_, lgArrLongs_); if (requiredLgArrLongs <= maxLgArrLongs_) { - wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear only what required + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); } - else { //not enough space in dstMem + else { //not enough space in dstSeg final int requiredBytes = (8 << requiredLgArrLongs) + 24; final int givenBytes = (8 << priorLgArrLongs) + 24; throw new SketchesArgumentException( - "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes); + "Insufficient internal MemorySegment space: " + requiredBytes + " > " + givenBytes); } } else { //On the heap, allocate a HT @@ -304,7 +306,10 @@ else if (curCount_ > 0 && sketchInEntries > 0) { } @Override - public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) { + MemorySegment getMemorySegment() { return wseg_; } + + @Override + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { if (curCount_ < 0) { throw new SketchesStateException( "Calling getResult() with no intervening intersections would represent the infinite set, " @@ -315,17 +320,17 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds if (curCount_ == 0) { compactCache = new long[0]; srcCompact = true; - srcOrdered = false; //hashTable, even tho empty + srcOrdered = false; //hashTable, even though empty return CompactOperations.componentsToCompact( thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, - dstMem, compactCache); + dstSeg, compactCache); } //else curCount > 0 final long[] hashTable; - if (wmem_ != null) { + if (wseg_ != null) { final int htLen = 1 << lgArrLongs_; hashTable = new long[htLen]; - wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); } else { hashTable = hashTable_; } @@ -334,27 +339,27 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds srcOrdered = dstOrdered; return CompactOperations.componentsToCompact( thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, - dstMem, compactCache); + dstSeg, compactCache); } @Override - public boolean hasMemory() { - return wmem_ != null; + public boolean hasMemorySegment() { + return wseg_ != null && wseg_.scope().isAlive(); } @Override public boolean hasResult() { - return hasMemory() ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + return hasMemorySegment() ? wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; } @Override public boolean isDirect() { - return hasMemory() ? wmem_.isDirect() : false; + return hasMemorySegment() && wseg_.isNative(); } @Override - public boolean isSameResource(final Memory that) { - return hasMemory() ? wmem_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(wseg_, that); } @Override @@ -367,28 +372,28 @@ public byte[] toByteArray() { final int preBytes = CONST_PREAMBLE_LONGS << 3; final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0; final byte[] byteArrOut = new byte[preBytes + dataBytes]; - if (wmem_ != null) { - wmem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes); + if (wseg_ != null) { + MemorySegment.copy(wseg_, JAVA_BYTE, 0, byteArrOut, 0, preBytes + dataBytes); } else { - final WritableMemory memOut = WritableMemory.writableWrap(byteArrOut); + final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); //preamble - memOut.putByte(PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 - memOut.putByte(SER_VER_BYTE, (byte) SER_VER); - memOut.putByte(FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); - memOut.putByte(LG_NOM_LONGS_BYTE, (byte) 0); //not used - memOut.putByte(LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); - if (empty_) { memOut.setBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } - else { memOut.clearBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } - memOut.putShort(SEED_HASH_SHORT, seedHash_); - memOut.putInt(RETAINED_ENTRIES_INT, curCount_); - memOut.putFloat(P_FLOAT, (float) 1.0); - memOut.putLong(THETA_LONG, thetaLong_); + segOut.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 + segOut.set(JAVA_BYTE, SER_VER_BYTE, (byte) SER_VER); + segOut.set(JAVA_BYTE, FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); + segOut.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 0); //not used + segOut.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); + if (empty_) { setBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } + else { clearBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } + segOut.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + segOut.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount_); + segOut.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, (float) 1.0); + segOut.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); //data if (curCount_ > 0) { - memOut.putLongArray(preBytes, hashTable_, 0, 1 << lgArrLongs_); + MemorySegment.copy(hashTable_, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, 1 << lgArrLongs_); } } return byteArrOut; @@ -412,13 +417,13 @@ boolean isEmpty() { @Override long[] getCache() { - if (wmem_ == null) { + if (wseg_ == null) { return hashTable_ != null ? hashTable_ : new long[0]; } - //Direct + //offHeap final int arrLongs = 1 << lgArrLongs_; final long[] outArr = new long[arrLongs]; - wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); return outArr; } @@ -436,10 +441,10 @@ private void performIntersect(final Sketch sketchIn) { // curCount and input data are nonzero, match against HT assert curCount_ > 0 && !empty_; final long[] hashTable; - if (wmem_ != null) { + if (wseg_ != null) { final int htLen = 1 << lgArrLongs_; hashTable = new long[htLen]; - wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); } else { hashTable = hashTable_; } @@ -463,10 +468,10 @@ private void performIntersect(final Sketch sketchIn) { //reduce effective array size to minimum curCount_ = matchSetCount; lgArrLongs_ = minLgHashTableSize(matchSetCount, ThetaUtil.REBUILD_THRESHOLD); - if (wmem_ != null) { - insertCurCount(wmem_, matchSetCount); - insertLgArrLongs(wmem_, lgArrLongs_); - wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear for rebuild + if (wseg_ != null) { + insertCurCount(wseg_, matchSetCount); + insertLgArrLongs(wseg_, lgArrLongs_); + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); //clear for rebuild } else { Arrays.fill(hashTable_, 0, 1 << lgArrLongs_, 0L); //clear for rebuild } @@ -483,14 +488,14 @@ private void performIntersect(final Sketch sketchIn) { private void moveDataToTgt(final long[] arr, final int count) { final int arrLongsIn = arr.length; int tmpCnt = 0; - if (wmem_ != null) { //Off Heap puts directly into mem + if (wseg_ != null) { //Off Heap puts directly into mem final int preBytes = CONST_PREAMBLE_LONGS << 3; final int lgArrLongs = lgArrLongs_; final long thetaLong = thetaLong_; for (int i = 0; i < arrLongsIn; i++ ) { final long hashIn = arr[i]; if (continueCondition(thetaLong, hashIn)) { continue; } - hashInsertOnlyMemory(wmem_, lgArrLongs, hashIn, preBytes); + hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hashIn, preBytes); tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough @@ -507,7 +512,7 @@ private void moveDataToTgt(final long[] arr, final int count) { private void moveDataToTgt(final Sketch sketch) { final int count = sketch.getRetainedEntries(); int tmpCnt = 0; - if (wmem_ != null) { //Off Heap puts directly into mem + if (wseg_ != null) { //Off Heap puts directly into mem final int preBytes = CONST_PREAMBLE_LONGS << 3; final int lgArrLongs = lgArrLongs_; final long thetaLong = thetaLong_; @@ -515,7 +520,7 @@ private void moveDataToTgt(final Sketch sketch) { while (it.next()) { final long hash = it.get(); if (continueCondition(thetaLong, hash)) { continue; } - hashInsertOnlyMemory(wmem_, lgArrLongs, hash, preBytes); + hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hash, preBytes); tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough @@ -532,9 +537,9 @@ private void moveDataToTgt(final Sketch sketch) { private void hardReset() { resetCommon(); - if (wmem_ != null) { - insertCurCount(wmem_, -1); //Universal Set - clearEmpty(wmem_); //false + if (wseg_ != null) { + insertCurCount(wseg_, -1); //Universal Set + clearEmpty(wseg_); //false } curCount_ = -1; //Universal Set empty_ = false; @@ -542,20 +547,20 @@ private void hardReset() { private void resetToEmpty() { resetCommon(); - if (wmem_ != null) { - insertCurCount(wmem_, 0); - setEmpty(wmem_); //true + if (wseg_ != null) { + insertCurCount(wseg_, 0); + setEmpty(wseg_); //true } curCount_ = 0; empty_ = true; } private void resetCommon() { - if (wmem_ != null) { + if (wseg_ != null) { if (readOnly_) { throw new SketchesReadOnlyException(); } - wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS); - insertLgArrLongs(wmem_, ThetaUtil.MIN_LG_ARR_LONGS); - insertThetaLong(wmem_, Long.MAX_VALUE); + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS).fill((byte)0); + insertLgArrLongs(wseg_, ThetaUtil.MIN_LG_ARR_LONGS); + insertThetaLong(wseg_, Long.MAX_VALUE); } lgArrLongs_ = ThetaUtil.MIN_LG_ARR_LONGS; thetaLong_ = Long.MAX_VALUE; diff --git a/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java index 1f9e710c0..a058f649a 100644 --- a/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java +++ b/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java @@ -37,6 +37,8 @@ public final class JaccardSimilarity { private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB private static final double[] ONES = {1.0, 1.0, 1.0}; + private JaccardSimilarity() { } + /** * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each diff --git a/src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java deleted file mode 100644 index d5f37de96..000000000 --- a/src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta; - -import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; - -import org.apache.datasketches.memory.Memory; - -/* - * This is to uncompress serial version 4 sketch incrementally - */ -class MemoryCompactCompressedHashIterator implements HashIterator { - private Memory mem; - private int offset; - private int entryBits; - private int numEntries; - private int index; - private long previous; - private int offsetBits; - private long[] buffer; - private byte[] bytes; - private boolean isBlockMode; - private boolean isFirstUnpack1; - - MemoryCompactCompressedHashIterator( - final Memory mem, - final int offset, - final int entryBits, - final int numEntries - ) { - this.mem = mem; - this.offset = offset; - this.entryBits = entryBits; - this.numEntries = numEntries; - index = -1; - previous = 0; - offsetBits = 0; - buffer = new long[8]; - bytes = new byte[entryBits]; - isBlockMode = numEntries >= 8; - isFirstUnpack1 = true; - } - - @Override - public long get() { - return buffer[index & 7]; - } - - @Override - public boolean next() { - if (++index == numEntries) { return false; } - if (isBlockMode) { - if ((index & 7) == 0) { - if (numEntries - index >= 8) { - unpack8(); - } else { - isBlockMode = false; - unpack1(); - } - } - } else { - unpack1(); - } - return true; - } - - private void unpack1() { - if (isFirstUnpack1) { - mem.getByteArray(offset, bytes, 0, wholeBytesToHoldBits((numEntries - index) * entryBits)); - offset = 0; - isFirstUnpack1 = false; - } - final int i = index & 7; - BitPacking.unpackBits(buffer, i, entryBits, bytes, offset, offsetBits); - offset += (offsetBits + entryBits) >>> 3; - offsetBits = (offsetBits + entryBits) & 7; - buffer[i] += previous; - previous = buffer[i]; - } - - private void unpack8() { - mem.getByteArray(offset, bytes, 0, entryBits); - BitPacking.unpackBitsBlock8(buffer, 0, bytes, 0, entryBits); - offset += entryBits; - for (int i = 0; i < 8; i++) { - buffer[i] += previous; - previous = buffer[i]; - } - } -} diff --git a/src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java deleted file mode 100644 index 926d0ad9e..000000000 --- a/src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta; - -import org.apache.datasketches.memory.Memory; - -/** - * @author Lee Rhodes - */ -class MemoryHashIterator implements HashIterator { - private Memory mem; - private int arrLongs; - private long thetaLong; - private long offsetBytes; - private int index; - private long hash; - - MemoryHashIterator(final Memory mem, final int arrLongs, final long thetaLong) { - this.mem = mem; - this.arrLongs = arrLongs; - this.thetaLong = thetaLong; - offsetBytes = PreambleUtil.extractPreLongs(mem) << 3; - index = -1; - hash = 0; - } - - @Override - public long get() { - return hash; - } - - @Override - public boolean next() { - while (++index < arrLongs) { - hash = mem.getLong(offsetBytes + (index << 3)); - if ((hash != 0) && (hash < thetaLong)) { - return true; - } - } - return false; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/MemorySegmentCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta/MemorySegmentCompactCompressedHashIterator.java similarity index 96% rename from src/main/java/org/apache/datasketches/theta2/MemorySegmentCompactCompressedHashIterator.java rename to src/main/java/org/apache/datasketches/theta/MemorySegmentCompactCompressedHashIterator.java index 89804f63b..031cee687 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemorySegmentCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/MemorySegmentCompactCompressedHashIterator.java @@ -17,10 +17,10 @@ * under the License. */ -package org.apache.datasketches.theta2; +package org.apache.datasketches.theta; import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; +import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; import java.lang.foreign.MemorySegment; diff --git a/src/main/java/org/apache/datasketches/theta2/MemorySegmentHashIterator.java b/src/main/java/org/apache/datasketches/theta/MemorySegmentHashIterator.java similarity index 97% rename from src/main/java/org/apache/datasketches/theta2/MemorySegmentHashIterator.java rename to src/main/java/org/apache/datasketches/theta/MemorySegmentHashIterator.java index aefc44ffd..548c79ef3 100644 --- a/src/main/java/org/apache/datasketches/theta2/MemorySegmentHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/MemorySegmentHashIterator.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.theta2; +package org.apache.datasketches.theta; import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index de8d8dca9..ed368bd8b 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -19,17 +19,21 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.LS; import static org.apache.datasketches.common.Util.zeroPad; +import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; //@formatter:off @@ -197,14 +201,13 @@ private PreambleUtil() {} (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); /** - * Computes the number of bytes required for a non-full sized sketch in hash-table form. - * This can be used to compute current storage size for heap sketches, or current off-heap memory - * required for off-heap (direct) sketches. This does not apply for compact sketches. + * Computes the number of bytes required for an updatable sketch using a hash-table cache. + * This does not apply for compact sketches. * @param lgArrLongs log2(current hash-table size) * @param preambleLongs current preamble size * @return the size in bytes */ - static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { + static final int getSegBytes(final int lgArrLongs, final int preambleLongs) { return (8 << lgArrLongs) + (preambleLongs << 3); } @@ -218,31 +221,31 @@ static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { * @return the summary preamble string. */ static String preambleToString(final byte[] byteArr) { - final Memory mem = Memory.wrap(byteArr); - return preambleToString(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArr); + return preambleToString(seg); } /** - * Returns a human readable string summary of the preamble state of the given Memory. - * Note: other than making sure that the given Memory size is large + * Returns a human readable string summary of the preamble state of the given MemorySegment. + * Note: other than making sure that the given MemorySegment size is large * enough for just the preamble, this does not do much value checking of the contents of the * preamble as this is primarily a tool for debugging the preamble visually. * - * @param mem the given Memory. + * @param seg the given MemorySegment. * @return the summary preamble string. */ - static String preambleToString(final Memory mem) { - final int preLongs = getAndCheckPreLongs(mem); - final int rfId = extractLgResizeFactor(mem); + static String preambleToString(final MemorySegment seg) { + final int preLongs = getAndCheckPreLongs(seg); + final int rfId = extractLgResizeFactor(seg); final ResizeFactor rf = ResizeFactor.getRF(rfId); - final int serVer = extractSerVer(mem); - final int familyId = extractFamilyID(mem); + final int serVer = extractSerVer(seg); + final int familyId = extractFamilyID(seg); final Family family = Family.idToFamily(familyId); - final int lgNomLongs = extractLgNomLongs(mem); - final int lgArrLongs = extractLgArrLongs(mem); + final int lgNomLongs = extractLgNomLongs(seg); + final int lgArrLongs = extractLgArrLongs(seg); //Flags - final int flags = extractFlags(mem); + final int flags = extractFlags(seg); final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8); final String nativeOrder = ByteOrder.nativeOrder().toString(); @@ -253,7 +256,7 @@ static String preambleToString(final Memory mem) { final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0; final boolean singleItem = (flags & SINGLEITEM_FLAG_MASK) > 0; //!empty && (preLongs == 1); - final int seedHash = extractSeedHash(mem); + final int seedHash = extractSeedHash(seg); //assumes preLongs == 1; empty or singleItem int curCount = singleItem ? 1 : 0; @@ -262,20 +265,20 @@ static String preambleToString(final Memory mem) { long thetaULong = thetaLong; //preLongs 1, 2 or 3 if (preLongs == 2) { //exact (non-estimating) CompactSketch - curCount = extractCurCount(mem); - p = extractP(mem); + curCount = extractCurCount(seg); + p = extractP(seg); } else if (preLongs == 3) { //Update Sketch - curCount = extractCurCount(mem); - p = extractP(mem); - thetaLong = extractThetaLong(mem); + curCount = extractCurCount(seg); + p = extractP(seg); + thetaLong = extractThetaLong(seg); thetaULong = thetaLong; } else if (preLongs == 4) { //Union - curCount = extractCurCount(mem); - p = extractP(mem); - thetaLong = extractThetaLong(mem); - thetaULong = extractUnionThetaLong(mem); + curCount = extractCurCount(seg); + p = extractP(seg); + thetaLong = extractThetaLong(seg); + thetaULong = extractUnionThetaLong(seg); } //else the same as an empty sketch or singleItem @@ -339,171 +342,171 @@ else if (preLongs == 3) { sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS); sb.append( "Data Bytes : ").append(curCount * 8).append(LS); sb.append( "TOTAL Sketch Bytes : ").append((preLongs + curCount) * 8).append(LS); - sb.append( "TOTAL Capacity Bytes : ").append(mem.getCapacity()).append(LS); + sb.append( "TOTAL Capacity Bytes : ").append(seg.byteSize()).append(LS); sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS); return sb.toString(); } //@formatter:on - static int extractPreLongs(final Memory mem) { - return mem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + static int extractPreLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; } - static int extractLgResizeFactor(final Memory mem) { - return (mem.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; + static int extractLgResizeFactor(final MemorySegment seg) { + return (seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; } - static int extractLgResizeRatioV1(final Memory mem) { - return mem.getByte(LG_RESIZE_RATIO_BYTE_V1) & 0X3; + static int extractLgResizeRatioV1(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_RESIZE_RATIO_BYTE_V1) & 0X3; } - static int extractSerVer(final Memory mem) { - return mem.getByte(SER_VER_BYTE) & 0XFF; + static int extractSerVer(final MemorySegment seg) { + return seg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; } - static int extractFamilyID(final Memory mem) { - return mem.getByte(FAMILY_BYTE) & 0XFF; + static int extractFamilyID(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; } - static int extractLgNomLongs(final Memory mem) { - return mem.getByte(LG_NOM_LONGS_BYTE) & 0XFF; + static int extractLgNomLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; } - static int extractLgArrLongs(final Memory mem) { - return mem.getByte(LG_ARR_LONGS_BYTE) & 0XFF; + static int extractLgArrLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; } - static int extractFlags(final Memory mem) { - return mem.getByte(FLAGS_BYTE) & 0XFF; + static int extractFlags(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; } - static int extractFlagsV1(final Memory mem) { - return mem.getByte(FLAGS_BYTE_V1) & 0XFF; + static int extractFlagsV1(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FLAGS_BYTE_V1) & 0XFF; } - static int extractSeedHash(final Memory mem) { - return mem.getShort(SEED_HASH_SHORT) & 0XFFFF; + static int extractSeedHash(final MemorySegment seg) { + return seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT) & 0XFFFF; } - static int extractCurCount(final Memory mem) { - return mem.getInt(RETAINED_ENTRIES_INT); + static int extractCurCount(final MemorySegment seg) { + return seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); } - static float extractP(final Memory mem) { - return mem.getFloat(P_FLOAT); + static float extractP(final MemorySegment seg) { + return seg.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); } - static long extractThetaLong(final Memory mem) { - return mem.getLong(THETA_LONG); + static long extractThetaLong(final MemorySegment seg) { + return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } - static long extractUnionThetaLong(final Memory mem) { - return mem.getLong(UNION_THETA_LONG); + static long extractUnionThetaLong(final MemorySegment seg) { + return seg.get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG); } - static int extractEntryBitsV4(final Memory mem) { - return mem.getByte(ENTRY_BITS_BYTE_V4) & 0XFF; + static int extractEntryBitsV4(final MemorySegment seg) { + return seg.get(JAVA_BYTE, ENTRY_BITS_BYTE_V4) & 0XFF; } - static int extractNumEntriesBytesV4(final Memory mem) { - return mem.getByte(NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF; + static int extractNumEntriesBytesV4(final MemorySegment seg) { + return seg.get(JAVA_BYTE, NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF; } - static long extractThetaLongV4(final Memory mem) { - return mem.getLong(THETA_LONG_V4); + static long extractThetaLongV4(final MemorySegment seg) { + return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG_V4); } /** * Sets PreLongs in the low 6 bits and sets LgRF in the upper 2 bits = 0. - * @param wmem the target WritableMemory + * @param seg the target MemorySegment * @param preLongs the given number of preamble longs */ - static void insertPreLongs(final WritableMemory wmem, final int preLongs) { - wmem.putByte(PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); + static void insertPreLongs(final MemorySegment seg, final int preLongs) { + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); } /** * Sets the top 2 lgRF bits and does not affect the lower 6 bits (PreLongs). * To work properly, this should be called after insertPreLongs(). - * @param wmem the target WritableMemory + * @param seg the target MemorySegment * @param rf the given lgRF bits */ - static void insertLgResizeFactor(final WritableMemory wmem, final int rf) { - final int curByte = wmem.getByte(PREAMBLE_LONGS_BYTE) & 0xFF; + static void insertLgResizeFactor(final MemorySegment seg, final int rf) { + final int curByte = seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0xFF; final int shift = LG_RESIZE_FACTOR_BIT; // shift in bits final int mask = 3; final byte newByte = (byte) (((rf & mask) << shift) | (~(mask << shift) & curByte)); - wmem.putByte(PREAMBLE_LONGS_BYTE, newByte); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, newByte); } - static void insertSerVer(final WritableMemory wmem, final int serVer) { - wmem.putByte(SER_VER_BYTE, (byte) serVer); + static void insertSerVer(final MemorySegment seg, final int serVer) { + seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) serVer); } - static void insertFamilyID(final WritableMemory wmem, final int famId) { - wmem.putByte(FAMILY_BYTE, (byte) famId); + static void insertFamilyID(final MemorySegment seg, final int famId) { + seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) famId); } - static void insertLgNomLongs(final WritableMemory wmem, final int lgNomLongs) { - wmem.putByte(LG_NOM_LONGS_BYTE, (byte) lgNomLongs); + static void insertLgNomLongs(final MemorySegment seg, final int lgNomLongs) { + seg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) lgNomLongs); } - static void insertLgArrLongs(final WritableMemory wmem, final int lgArrLongs) { - wmem.putByte(LG_ARR_LONGS_BYTE, (byte) lgArrLongs); + static void insertLgArrLongs(final MemorySegment seg, final int lgArrLongs) { + seg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs); } - static void insertFlags(final WritableMemory wmem, final int flags) { - wmem.putByte(FLAGS_BYTE, (byte) flags); + static void insertFlags(final MemorySegment seg, final int flags) { + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); } - static void insertSeedHash(final WritableMemory wmem, final int seedHash) { - wmem.putShort(SEED_HASH_SHORT, (short) seedHash); + static void insertSeedHash(final MemorySegment seg, final int seedHash) { + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, (short) seedHash); } - static void insertCurCount(final WritableMemory wmem, final int curCount) { - wmem.putInt(RETAINED_ENTRIES_INT, curCount); + static void insertCurCount(final MemorySegment seg, final int curCount) { + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); } - static void insertP(final WritableMemory wmem, final float p) { - wmem.putFloat(P_FLOAT, p); + static void insertP(final MemorySegment seg, final float p) { + seg.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, p); } - static void insertThetaLong(final WritableMemory wmem, final long thetaLong) { - wmem.putLong(THETA_LONG, thetaLong); + static void insertThetaLong(final MemorySegment seg, final long thetaLong) { + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); } - static void insertUnionThetaLong(final WritableMemory wmem, final long unionThetaLong) { - wmem.putLong(UNION_THETA_LONG, unionThetaLong); + static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) { + seg.set(JAVA_LONG_UNALIGNED, UNION_THETA_LONG, unionThetaLong); } - static void setEmpty(final WritableMemory wmem) { - int flags = wmem.getByte(FLAGS_BYTE) & 0XFF; + static void setEmpty(final MemorySegment seg) { + int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; flags |= EMPTY_FLAG_MASK; - wmem.putByte(FLAGS_BYTE, (byte) flags); + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); } - static void clearEmpty(final WritableMemory wmem) { - int flags = wmem.getByte(FLAGS_BYTE) & 0XFF; + static void clearEmpty(final MemorySegment seg) { + int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; flags &= ~EMPTY_FLAG_MASK; - wmem.putByte(FLAGS_BYTE, (byte) flags); + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); } - static boolean isEmptyFlag(final Memory mem) { - return ((extractFlags(mem) & EMPTY_FLAG_MASK) > 0); + static boolean isEmptyFlag(final MemorySegment seg) { + return ((extractFlags(seg) & EMPTY_FLAG_MASK) > 0); } /** - * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. - * @param mem the given Memory + * Checks MemorySegment for capacity to hold the preamble and returns the extracted preLongs. + * @param seg the given MemorySegment * @return the extracted prelongs value. */ - static int getAndCheckPreLongs(final Memory mem) { - final long cap = mem.getCapacity(); + static int getAndCheckPreLongs(final MemorySegment seg) { + final long cap = seg.byteSize(); if (cap < 8) { throwNotBigEnough(cap, 8); } - final int preLongs = extractPreLongs(mem); + final int preLongs = extractPreLongs(seg); final int required = Math.max(preLongs << 3, 8); if (cap < required) { throwNotBigEnough(cap, required); @@ -511,15 +514,15 @@ static int getAndCheckPreLongs(final Memory mem) { return preLongs; } - static final short checkMemorySeedHash(final Memory mem, final long seed) { - final short seedHashMem = (short) extractSeedHash(mem); - Util.checkSeedHashes(seedHashMem, Util.computeSeedHash(seed)); //throws if bad seedHash - return seedHashMem; + static final short checkSegmentSeedHash(final MemorySegment seg, final long seed) { + final short seedHashSeg = (short) extractSeedHash(seg); + Util.checkSeedHashes(seedHashSeg, Util.computeSeedHash(seed)); //throws if bad seedHash + return seedHashSeg; } private static void throwNotBigEnough(final long cap, final int required) { throw new SketchesArgumentException( - "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap + "Possible Corruption: Size of byte array or MemorySegment not large enough: Size: " + cap + ", Required: " + required); } diff --git a/src/main/java/org/apache/datasketches/theta/Rebuilder.java b/src/main/java/org/apache/datasketches/theta/Rebuilder.java index c2e78edab..54b1e190b 100644 --- a/src/main/java/org/apache/datasketches/theta/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta/Rebuilder.java @@ -19,6 +19,8 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; @@ -28,14 +30,18 @@ import static org.apache.datasketches.theta.PreambleUtil.insertLgArrLongs; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; /** - * Handles common resize, rebuild and move operations. - * The Memory based operations assume a specific data structure that is unique to the theta sketches. + * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments. + * + *

              NOTE: These operations copy data from the input MemorySegment into local arrays, perform the required operations on the + * arrays, and then copies the result to the destination MemorySegment. Attempting to perform these operations directly on the + * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are + * vectorized at the machine level and are quite fast. Measurements reveal that this is a good tradeoff.

              * * @author Lee Rhodes */ @@ -44,73 +50,63 @@ final class Rebuilder { private Rebuilder() {} /** - * Rebuild the hashTable in the given Memory at its current size. Changes theta and thus count. - * This assumes a Memory preamble of standard form with correct values of curCount and thetaLong. + * Rebuild the hashTable in the given MemorySegment at its current size. Changes theta and thus count. + * This assumes a MemorySegment preamble of standard form with correct values of curCount and thetaLong. * ThetaLong and curCount will change. - * Afterwards, caller must update local class members curCount and thetaLong from Memory. + * Afterwards, caller must update local class members curCount and thetaLong from MemorySegment. * - * @param mem the Memory the given Memory + * @param seg the given MemorySegment * @param preambleLongs size of preamble in longs * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch */ - static final void quickSelectAndRebuild(final WritableMemory mem, final int preambleLongs, - final int lgNomLongs) { - //Note: This copies the Memory data onto the heap and then at the end copies the result - // back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing, - // and the internal loops would be slower. The bulk copies are performed at a low level and - // are quite fast. Measurements reveal that we are not paying much of a penalty. - - //Pull data into tmp arr for QS algo - final int lgArrLongs = extractLgArrLongs(mem); - final int curCount = extractCurCount(mem); + static final void quickSelectAndRebuild(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { + + //Copy data from input segment into local buffer array for QS algorithm + final int lgArrLongs = extractLgArrLongs(seg); final int arrLongs = 1 << lgArrLongs; final long[] tmpArr = new long[arrLongs]; final int preBytes = preambleLongs << 3; - mem.getLongArray(preBytes, tmpArr, 0, arrLongs); //copy mem data to tmpArr + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, tmpArr, 0, arrLongs); //Do the QuickSelect on a tmp arr to create new thetaLong final int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS - final long newThetaLong = selectExcludingZeros(tmpArr, curCount, pivot); - insertThetaLong(mem, newThetaLong); //UPDATE thetalong + final long newThetaLong = selectExcludingZeros(tmpArr, extractCurCount(seg), pivot); + insertThetaLong(seg, newThetaLong); //UPDATE thetaLong //Rebuild to clean up dirty data, update count final long[] tgtArr = new long[arrLongs]; final int newCurCount = HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong); - insertCurCount(mem, newCurCount); //UPDATE curCount + insertCurCount(seg, newCurCount); //UPDATE curCount - //put the rebuilt array back into memory - mem.putLongArray(preBytes, tgtArr, 0, arrLongs); + //put the rebuilt array back into MemorySegment + MemorySegment.copy(tgtArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, arrLongs); } /** - * Moves me (the entire updatable sketch) to a new larger Memory location and rebuilds the hash table. - * This assumes a Memory preamble of standard form with the correct value of thetaLong. - * Afterwards, the caller must update the local Memory reference, lgArrLongs - * and hashTableThreshold from the dstMemory and free the source Memory. + * Moves me (the entire updatable sketch) to a new larger MemorySegment location and rebuilds the hash table. + * This assumes a MemorySegment preamble of standard form with the correct value of thetaLong. + * Afterwards, the caller must update the local MemorySegment reference, lgArrLongs + * and hashTableThreshold from the destination MemorySegment and free the source MemorySegment. * - * @param srcMem the source Memory + * @param srcSeg the source MemorySegment * @param preambleLongs size of preamble in longs * @param srcLgArrLongs size (log_base2) of source hash table - * @param dstMem the destination Memory, which may be garbage + * @param dstSeg the destination MemorySegment, which may be garbage * @param dstLgArrLongs the destination hash table target size * @param thetaLong theta as a long */ - static final void moveAndResize(final Memory srcMem, final int preambleLongs, - final int srcLgArrLongs, final WritableMemory dstMem, final int dstLgArrLongs, final long thetaLong) { - //Note: This copies the Memory data onto the heap and then at the end copies the result - // back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing, - // and the internal loops would be slower. The bulk copies are performed at a low level and - // are quite fast. Measurements reveal that we are not paying much of a penalty. - - //Move Preamble to destination memory + static final void moveAndResize(final MemorySegment srcSeg, final int preambleLongs, + final int srcLgArrLongs, final MemorySegment dstSeg, final int dstLgArrLongs, final long thetaLong) { + + //Move Preamble to destination MemorySegment final int preBytes = preambleLongs << 3; - srcMem.copyTo(0, dstMem, 0, preBytes); //copy the preamble + MemorySegment.copy(srcSeg, 0, dstSeg, 0, preBytes); - //Bulk copy source to on-heap buffer + //Bulk copy source Hash Table to local buffer array final int srcHTLen = 1 << srcLgArrLongs; final long[] srcHTArr = new long[srcHTLen]; - srcMem.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); //Create destination buffer final int dstHTLen = 1 << dstLgArrLongs; @@ -119,51 +115,51 @@ static final void moveAndResize(final Memory srcMem, final int preambleLongs, //Rebuild hash table in destination buffer HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong); - //Bulk copy to destination memory - dstMem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); - dstMem.putByte(LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update in dstMem + //Bulk copy to destination MemorySegment + MemorySegment.copy(dstHTArr, 0, dstSeg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); + dstSeg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update lgArrLongs in dstSeg } /** - * Resizes existing hash array into a larger one within a single Memory assuming enough space. - * This assumes a Memory preamble of standard form with the correct value of thetaLong. - * The Memory lgArrLongs will change. - * Afterwards, the caller must update local copies of lgArrLongs and hashTableThreshold from - * Memory. + * Resizes existing hash array into a larger one within a single MemorySegment, assuming enough space. + * This assumes a preamble of standard form with the correct value of thetaLong. + * The lgArrLongs will change. + * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold + * from the returned MemorySegment. * - * @param mem the Memory + * @param seg the source and destination MemorySegment * @param preambleLongs the size of the preamble in longs * @param srcLgArrLongs the size of the source hash table * @param tgtLgArrLongs the LgArrLongs value for the new hash table */ - static final void resize(final WritableMemory mem, final int preambleLongs, + static final void resize(final MemorySegment seg, final int preambleLongs, final int srcLgArrLongs, final int tgtLgArrLongs) { - //Note: This copies the Memory data onto the heap and then at the end copies the result - // back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing, - // and the internal loops would be slower. The bulk copies are performed at a low level and - // are quite fast. Measurements reveal that we are not paying much of a penalty. //Preamble stays in place final int preBytes = preambleLongs << 3; + //Bulk copy source to on-heap buffer final int srcHTLen = 1 << srcLgArrLongs; //current value final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer - mem.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + //seg.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); + //Create destination on-heap buffer final int dstHTLen = 1 << tgtLgArrLongs; final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer + //Rebuild hash table in destination buffer - final long thetaLong = extractThetaLong(mem); - HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, thetaLong); - //Bulk copy to destination memory - mem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); //put it back, no need to clear - insertLgArrLongs(mem, tgtLgArrLongs); //update in mem + HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, extractThetaLong(seg)); + + //Bulk copy to destination segment + MemorySegment.copy(dstHTArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); + insertLgArrLongs(seg, tgtLgArrLongs); //update in mem } /** * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be * an integer value between zero and the given lgRF, inclusive; - * @param capBytes the current memory capacity in bytes + * @param capBytes the current MemorySegment capacity in bytes * @param lgArrLongs the current lg hash table size in longs * @param preLongs the current preamble size in longs * @param lgRF the configured lg Resize Factor diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java index 54797dc88..126a9298a 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java @@ -19,24 +19,25 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.common.Family.idToFamily; import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.MemoryStatus; +import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * The parent API for all Set Operations * * @author Lee Rhodes */ -public abstract class SetOperation implements MemoryStatus { +public abstract class SetOperation implements MemorySegmentStatus { static final int CONST_PREAMBLE_LONGS = 3; /** @@ -54,45 +55,43 @@ public static final SetOperationBuilder builder() { } /** - * Heapify takes the SetOperations image in Memory and instantiates an on-heap + * Heapify takes the SetOperations image in MemorySegment and instantiates an on-heap * SetOperation using the * Default Update Seed. - * The resulting SetOperation will not retain any link to the source Memory. + * The resulting SetOperation will not retain any link to the source MemorySegment. * *

              Note: Only certain set operators during stateful operations can be serialized and thus * heapified.

              * - * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash. - * See Memory - * @return a Heap-based SetOperation from the given Memory + * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. + * @return a Heap-based SetOperation from the given MemorySegment */ - public static SetOperation heapify(final Memory srcMem) { - return heapify(srcMem, Util.DEFAULT_UPDATE_SEED); + public static SetOperation heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify takes the SetOperation image in Memory and instantiates an on-heap + * Heapify takes the SetOperation image in MemorySegment and instantiates an on-heap * SetOperation using the given expectedSeed. - * The resulting SetOperation will not retain any link to the source Memory. + * The resulting SetOperation will not retain any link to the source MemorySegment. * *

              Note: Only certain set operators during stateful operations can be serialized and thus * heapified.

              * - * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a Heap-based SetOperation from the given Memory + * @return a Heap-based SetOperation from the given MemorySegment */ - public static SetOperation heapify(final Memory srcMem, final long expectedSeed) { - final byte famID = srcMem.getByte(FAMILY_BYTE); + public static SetOperation heapify(final MemorySegment srcSeg, final long expectedSeed) { + final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(famID); switch (family) { case UNION : { - return UnionImpl.heapifyInstance(srcMem, expectedSeed); + return UnionImpl.heapifyInstance(srcSeg, expectedSeed); } case INTERSECTION : { - return IntersectionImpl.heapifyInstance(srcMem, expectedSeed); + return IntersectionImpl.heapifyInstance(srcSeg, expectedSeed); } default: { throw new SketchesArgumentException("SetOperation cannot heapify family: " @@ -102,104 +101,51 @@ public static SetOperation heapify(final Memory srcMem, final long expectedSeed) } /** - * Wrap takes the SetOperation image in Memory and refers to it directly. + * Wrap takes the SetOperation image in MemorySegment and refers to it directly. * There is no data copying onto the java heap. - * This method assumes the - * Default Update Seed. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * - *

              Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.

              + *

              Note: Only certain set operators during stateful operations can be serialized and thus wrapped.

              * - * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash. - * See Memory - * @return a SetOperation backed by the given Memory + * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. + * @return a SetOperation backed by the given MemorySegment */ - public static SetOperation wrap(final Memory srcMem) { - return wrap(srcMem, Util.DEFAULT_UPDATE_SEED); + public static SetOperation wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap takes the SetOperation image in Memory and refers to it directly. + * Wrap takes the SetOperation image in MemorySegment and refers to it directly. * There is no data copying onto the java heap. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * - *

              Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.

              + *

              Note: Only certain set operators during stateful operations can be serialized and thus wrapped.

              * - * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a SetOperation backed by the given Memory + * @return a SetOperation backed by the given MemorySegment */ - public static SetOperation wrap(final Memory srcMem, final long expectedSeed) { - final byte famID = srcMem.getByte(FAMILY_BYTE); + public static SetOperation wrap(final MemorySegment srcSeg, final long expectedSeed) { + final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(famID); - final int serVer = srcMem.getByte(SER_VER_BYTE); + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); if (serVer != 3) { throw new SketchesArgumentException("SerVer must be 3: " + serVer); } switch (family) { case UNION : { - return UnionImpl.wrapInstance(srcMem, expectedSeed); + return UnionImpl.wrapInstance(srcSeg, expectedSeed); } case INTERSECTION : { - return IntersectionImpl.wrapInstance((WritableMemory)srcMem, expectedSeed, true); + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); } default: throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString()); } } - /** - * Wrap takes the SetOperation image in Memory and refers to it directly. - * There is no data copying onto the java heap. - * This method assumes the - * Default Update Seed. - * - *

              Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.

              - * - * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash. - * See Memory - * @return a SetOperation backed by the given Memory - */ - public static SetOperation wrap(final WritableMemory srcMem) { - return wrap(srcMem, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap takes the SetOperation image in Memory and refers to it directly. - * There is no data copying onto the java heap. - * - *

              Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.

              - * - * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. - * See Update Hash Seed. - * @return a SetOperation backed by the given Memory - */ - public static SetOperation wrap(final WritableMemory srcMem, final long expectedSeed) { - final byte famID = srcMem.getByte(FAMILY_BYTE); - final Family family = idToFamily(famID); - final int serVer = srcMem.getByte(SER_VER_BYTE); - if (serVer != 3) { - throw new SketchesArgumentException("SerVer must be 3: " + serVer); - } - switch (family) { - case UNION : { - return UnionImpl.wrapInstance(srcMem, expectedSeed); - } - case INTERSECTION : { - return IntersectionImpl.wrapInstance(srcMem, expectedSeed, false); - } - default: - throw new SketchesArgumentException("SetOperation cannot wrap family: " - + family.toString()); - } - } - /** * Returns the maximum required storage bytes given a nomEntries parameter for Union operations * @param nomEntries Nominal Entries @@ -251,6 +197,12 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract long[] getCache(); + /** + * Returns the backing MemorySegment object if it exists, otherwise null. + * @return the backing MemorySegment object if it exists, otherwise null. + */ + MemorySegment getMemorySegment() { return null; } + /** * Gets the current count of retained entries. * This is only useful during stateful operations. @@ -274,6 +226,12 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract long getThetaLong(); + @Override + public abstract boolean hasMemorySegment(); + + @Override + public abstract boolean isDirect(); + /** * Returns true if this set operator is empty. * Only useful during stateful operations. @@ -282,4 +240,7 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract boolean isEmpty(); + @Override + public abstract boolean isSameResource(final MemorySegment seg); + } diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java index a978e3c7b..d4af63681 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java @@ -23,13 +23,12 @@ import static org.apache.datasketches.common.Util.TAB; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -37,12 +36,11 @@ * * @author Lee Rhodes */ -public class SetOperationBuilder { +public final class SetOperationBuilder { private int bLgNomLongs; private long bSeed; private ResizeFactor bRF; private float bP; - private MemoryRequestServer bMemReqSvr; /** * Constructor for building a new SetOperation. The default configuration is @@ -52,7 +50,7 @@ public class SetOperationBuilder { *
            • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
            • *
            • {@link ResizeFactor#X8}
            • *
            • Input Sampling Probability: 1.0
            • - *
            • Memory: null
            • + *
            • MemorySegment: null
            • *
            */ public SetOperationBuilder() { @@ -60,7 +58,6 @@ public SetOperationBuilder() { bSeed = Util.DEFAULT_UPDATE_SEED; bP = (float) 1.0; bRF = ResizeFactor.X8; - bMemReqSvr = new DefaultMemoryRequestServer(); } /** @@ -161,24 +158,6 @@ public ResizeFactor getResizeFactor() { return bRF; } - /** - * Set the MemoryRequestServer - * @param memReqSvr the given MemoryRequestServer - * @return this SetOperationBuilder - */ - public SetOperationBuilder setMemoryRequestServer(final MemoryRequestServer memReqSvr) { - bMemReqSvr = memReqSvr; - return this; - } - - /** - * Returns the MemoryRequestServer - * @return the MemoryRequestServer - */ - public MemoryRequestServer getMemoryRequestServer() { - return bMemReqSvr; - } - /** * Returns a SetOperation with the current configuration of this Builder and the given Family. * @param family the chosen SetOperation family @@ -190,34 +169,34 @@ public SetOperation build(final Family family) { /** * Returns a SetOperation with the current configuration of this Builder, the given Family - * and the given destination memory. Note that the destination memory cannot be used with AnotB. + * and the given destination MemorySegment. Note that the destination MemorySegment cannot be used with AnotB. * @param family the chosen SetOperation family - * @param dstMem The destination Memory. + * @param dstSeg The destination MemorySegment. * @return a SetOperation */ - public SetOperation build(final Family family, final WritableMemory dstMem) { + public SetOperation build(final Family family, final MemorySegment dstSeg) { SetOperation setOp = null; switch (family) { case UNION: { - if (dstMem == null) { + if (dstSeg == null) { setOp = UnionImpl.initNewHeapInstance(bLgNomLongs, bSeed, bP, bRF); } else { - setOp = UnionImpl.initNewDirectInstance(bLgNomLongs, bSeed, bP, bRF, bMemReqSvr, dstMem); + setOp = UnionImpl.initNewDirectInstance(bLgNomLongs, bSeed, bP, bRF, dstSeg); } break; } case INTERSECTION: { - if (dstMem == null) { + if (dstSeg == null) { setOp = IntersectionImpl.initNewHeapInstance(bSeed); } else { - setOp = IntersectionImpl.initNewDirectInstance(bSeed, dstMem); + setOp = IntersectionImpl.initNewDirectInstance(bSeed, dstSeg); } break; } case A_NOT_B: { - if (dstMem == null) { + if (dstSeg == null) { setOp = new AnotBimpl(bSeed); } else { @@ -245,12 +224,12 @@ public Union buildUnion() { /** * Convenience method, returns a configured SetOperation Union with * Default Nominal Entries - * and the given destination memory. - * @param dstMem The destination Memory. + * and the given destination MemorySegment. + * @param dstSeg The destination MemorySegment. * @return a Union object */ - public Union buildUnion(final WritableMemory dstMem) { - return (Union) build(Family.UNION, dstMem); + public Union buildUnion(final MemorySegment dstSeg) { + return (Union) build(Family.UNION, dstSeg); } /** @@ -265,12 +244,12 @@ public Intersection buildIntersection() { /** * Convenience method, returns a configured SetOperation Intersection with * Default Nominal Entries - * and the given destination memory. - * @param dstMem The destination Memory. + * and the given destination MemorySegment. + * @param dstSeg The destination MemorySegment. * @return an Intersection object */ - public Intersection buildIntersection(final WritableMemory dstMem) { - return (Intersection) build(Family.INTERSECTION, dstMem); + public Intersection buildIntersection(final MemorySegment dstSeg) { + return (Intersection) build(Family.INTERSECTION, dstSeg); } /** @@ -291,8 +270,6 @@ public String toString() { sb.append("Seed:").append(TAB).append(bSeed).append(LS); sb.append("p:").append(TAB).append(bP).append(LS); sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); - final String mrsStr = bMemReqSvr.getClass().getSimpleName(); - sb.append("MemoryRequestServer:").append(TAB).append(mrsStr).append(LS); return sb.toString(); } diff --git a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java index 5bef18392..c6ed7ec52 100644 --- a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.common.ByteArrayUtil.putLongLE; import static org.apache.datasketches.hash.MurmurHash3.hash; @@ -29,11 +30,11 @@ import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash; import static org.apache.datasketches.theta.PreambleUtil.extractSerVer; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * A CompactSketch that holds only one item hash. @@ -75,26 +76,26 @@ private SingleItemSketch(final long hash) { } /** - * Creates a SingleItemSketch on the heap given a SingleItemSketch Memory image and a seedHash. - * Checks the seed hash of the given Memory against the given seedHash. - * @param srcMem the Memory to be heapified. - * @param expectedSeedHash the given seedHash to be checked against the srcMem seedHash + * Creates a SingleItemSketch on the heap given a SingleItemSketch MemorySegment image and a seedHash. + * Checks the seed hash of the given MemorySegment against the given seedHash. + * @param srcSeg the MemorySegment to be heapified. + * @param expectedSeedHash the given seedHash to be checked against the srcSeg seedHash * @return a SingleItemSketch */ //does not override Sketch - static SingleItemSketch heapify(final Memory srcMem, final short expectedSeedHash) { - Util.checkSeedHashes((short) extractSeedHash(srcMem), expectedSeedHash); - final boolean singleItem = otherCheckForSingleItem(srcMem); - if (singleItem) { return new SingleItemSketch(srcMem.getLong(8), expectedSeedHash); } - throw new SketchesArgumentException("Input Memory is not a SingleItemSketch."); + static SingleItemSketch heapify(final MemorySegment srcSeg, final short expectedSeedHash) { + Util.checkSeedHashes((short) extractSeedHash(srcSeg), expectedSeedHash); + final boolean singleItem = otherCheckForSingleItem(srcSeg); + if (singleItem) { return new SingleItemSketch(srcSeg.get(JAVA_LONG_UNALIGNED, 8), expectedSeedHash); } + throw new SketchesArgumentException("Input MemorySegment is not a SingleItemSketch."); } @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - if (dstMem == null) { return this; } + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg == null) { return this; } else { - dstMem.putLong(0, pre0_); - dstMem.putLong(8, hash_); - return new DirectCompactSketch(dstMem); + dstSeg.set(JAVA_LONG_UNALIGNED, 0, pre0_); + dstSeg.set(JAVA_LONG_UNALIGNED, 8, hash_); + return new DirectCompactSketch(dstSeg); } } @@ -378,19 +379,14 @@ int getCurrentPreambleLongs() { return 1; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return (short) (pre0_ >>> 48); } - static final boolean otherCheckForSingleItem(final Memory mem) { - return otherCheckForSingleItem(extractPreLongs(mem), extractSerVer(mem), - extractFamilyID(mem), extractFlags(mem) ); + static final boolean otherCheckForSingleItem(final MemorySegment seg) { + return otherCheckForSingleItem(extractPreLongs(seg), extractSerVer(seg), + extractFamilyID(seg), extractFlags(seg) ); } static final boolean otherCheckForSingleItem(final int preLongs, final int serVer, diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index 8370b295c..6310d82c4 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.common.Family.idToFamily; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.common.Util.LS; @@ -31,12 +32,12 @@ import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; import static org.apache.datasketches.thetacommon.HashOperations.count; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.MemoryStatus; +import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.BinomialBoundsN; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -46,64 +47,62 @@ * * @author Lee Rhodes */ -public abstract class Sketch implements MemoryStatus { - static final int DEFAULT_LG_RESIZE_FACTOR = 3; //Unique to Heap +public abstract class Sketch implements MemorySegmentStatus { Sketch() {} //public static factory constructor-type methods /** - * Heapify takes the sketch image in Memory and instantiates an on-heap Sketch. + * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. * - *

            The resulting sketch will not retain any link to the source Memory.

            + *

            The resulting sketch will not retain any link to the source MemorySegment.

            * *

            For Update Sketches this method checks if the * Default Update Seed

            - * was used to create the source Memory image. + * was used to create the source MemorySegment image. * *

            For Compact Sketches this method assumes that the sketch image was created with the * correct hash seed, so it is not checked.

            * - * @param srcMem an image of a Sketch. - * See Memory. + * @param srcSeg an image of a Sketch. + * * @return a Sketch on the heap. */ - public static Sketch heapify(final Memory srcMem) { - final byte familyID = srcMem.getByte(FAMILY_BYTE); + public static Sketch heapify(final MemorySegment srcSeg) { + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(familyID); if (family == Family.COMPACT) { - return CompactSketch.heapify(srcMem); + return CompactSketch.heapify(srcSeg); } - return heapifyUpdateFromMemory(srcMem, Util.DEFAULT_UPDATE_SEED); + return heapifyUpdateFromMemorySegment(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify takes the sketch image in Memory and instantiates an on-heap Sketch. + * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. * - *

            The resulting sketch will not retain any link to the source Memory.

            + *

            The resulting sketch will not retain any link to the source MemorySegment.

            * *

            For Update and Compact Sketches this method checks if the given expectedSeed was used to - * create the source Memory image. However, SerialVersion 1 sketches cannot be checked.

            + * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

            * - * @param srcMem an image of a Sketch that was created using the given expectedSeed. - * See Memory. - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * Compact sketches store a 16-bit hash of the seed, but not the seed itself. * @return a Sketch on the heap. */ - public static Sketch heapify(final Memory srcMem, final long expectedSeed) { - final byte familyID = srcMem.getByte(FAMILY_BYTE); + public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(familyID); if (family == Family.COMPACT) { - return CompactSketch.heapify(srcMem, expectedSeed); + return CompactSketch.heapify(srcSeg, expectedSeed); } - return heapifyUpdateFromMemory(srcMem, expectedSeed); + return heapifyUpdateFromMemorySegment(srcSeg, expectedSeed); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -115,41 +114,40 @@ public static Sketch heapify(final Memory srcMem, final long expectedSeed) { * *

            Wrapping any subclass of this class that is empty or contains only a single item will * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

            + * This is actually faster and consumes less overall space.

            * *

            For Update Sketches this method checks if the * Default Update Seed

            - * was used to create the source Memory image. + * was used to create the source MemorySegment image. * *

            For Compact Sketches this method assumes that the sketch image was created with the * correct hash seed, so it is not checked.

            * - * @param srcMem an image of a Sketch. - * See Memory. - * @return a Sketch backed by the given Memory - */ - public static Sketch wrap(final Memory srcMem) { - final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; - final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF; + * @param srcSeg an image of a Sketch. + * @return a Sketch backed by the given MemorySegment + */ + public static Sketch wrap(final MemorySegment srcSeg) { + final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; final Family family = Family.idToFamily(familyID); if (family == Family.QUICKSELECT) { if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcMem, Util.DEFAULT_UPDATE_SEED); + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } else { throw new SketchesArgumentException( "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); } } if (family == Family.COMPACT) { - return CompactSketch.wrap(srcMem); + return CompactSketch.wrap(srcSeg); } throw new SketchesArgumentException( "Cannot wrap family: " + family + " as a Sketch"); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -161,32 +159,31 @@ public static Sketch wrap(final Memory srcMem) { * *

            Wrapping any subclass of this class that is empty or contains only a single item will * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.

            + * This is actually faster and consumes less overall space.

            * *

            For Update and Compact Sketches this method checks if the given expectedSeed was used to - * create the source Memory image. However, SerialVersion 1 sketches cannot be checked.

            + * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

            * - * @param srcMem an image of a Sketch. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg a MemorySegment with an image of a Sketch. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a UpdateSketch backed by the given Memory except as above. + * @return a UpdateSketch backed by the given MemorySegment except as above. */ - public static Sketch wrap(final Memory srcMem, final long expectedSeed) { - final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; - final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF; + public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; final Family family = Family.idToFamily(familyID); if (family == Family.QUICKSELECT) { if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcMem, expectedSeed); + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed); } else { throw new SketchesArgumentException( "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); } } if (family == Family.COMPACT) { - return CompactSketch.wrap(srcMem, expectedSeed); + return CompactSketch.wrap(srcSeg, expectedSeed); } throw new SketchesArgumentException( "Cannot wrap family: " + family + " as a Sketch"); @@ -199,7 +196,7 @@ public static Sketch wrap(final Memory srcMem, final long expectedSeed) { * *

            If this.isCompact() == true this method returns this, * otherwise, this method is equivalent to - * {@link #compact(boolean, WritableMemory) compact(true, null)}. + * {@link #compact(boolean, MemorySegment) compact(true, null)}. * *

            A CompactSketch is always immutable.

            * @@ -224,9 +221,9 @@ public CompactSketch compact() { *

            A CompactSketch is always immutable.

            * *

            A new CompactSketch object is created:

            - *
            • if dstMem != null
            • - *
            • if dstMem == null and this.hasMemory() == true
            • - *
            • if dstMem == null and this has more than 1 item and this.isOrdered() == false + *
              • if dstSeg!= null
              • + *
              • if dstSeg == null and this.hasMemorySegment() == true
              • + *
              • if dstSeg == null and this has more than 1 item and this.isOrdered() == false * and dstOrdered == true.
              • *
              * @@ -235,12 +232,12 @@ public CompactSketch compact() { * @param dstOrdered assumed true if this sketch is empty or has only one value * See Destination Ordered * - * @param dstMem - * See Destination Memory. + * @param dstSeg + * See Destination MemorySegment. * * @return this sketch as a CompactSketch. */ - public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem); + public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); /** * Returns the number of storage bytes required for this Sketch if its current state were @@ -349,12 +346,12 @@ public int getRetainedEntries() { public abstract int getRetainedEntries(boolean valid); /** - * Returns the serialization version from the given Memory - * @param mem the sketch Memory - * @return the serialization version from the Memory + * Returns the serialization version from the given MemorySegment + * @param seg the sketch MemorySegment + * @return the serialization version from the MemorySegment */ - public static int getSerializationVersion(final Memory mem) { - return mem.getByte(SER_VER_BYTE); + public static int getSerializationVersion(final MemorySegment seg) { + return seg.get(JAVA_BYTE, SER_VER_BYTE); } /** @@ -538,19 +535,19 @@ public static String toString(final byte[] byteArr) { } /** - * Returns a human readable string of the preamble of a Memory image of a Theta Sketch. - * @param mem the given Memory object - * @return a human readable string of the preamble of a Memory image of a Theta Sketch. + * Returns a human readable string of the preamble of a MemorySegment image of a Theta Sketch. + * @param seg the given MemorySegment object + * @return a human readable string of the preamble of a MemorySegment image of a Theta Sketch. */ - public static String toString(final Memory mem) { - return PreambleUtil.preambleToString(mem); + public static String toString(final MemorySegment seg) { + return PreambleUtil.preambleToString(seg); } //Restricted methods /** * Gets the internal cache array. For on-heap sketches this will return a reference to the actual - * cache array. For Memory-based sketches this returns a copy. + * cache array. For MemorySegment-based sketches this returns a copy. * @return the internal cache array. */ abstract long[] getCache(); @@ -575,10 +572,11 @@ public static String toString(final Memory mem) { abstract int getCurrentPreambleLongs(); /** - * Returns the backing Memory object if it exists, otherwise null. - * @return the backing Memory object if it exists, otherwise null. + * Returns the backing MemorySegment object if it exists, otherwise null. + * This is overridden where relevant. + * @return the backing MemorySegment object if it exists, otherwise null. */ - abstract Memory getMemory(); + MemorySegment getMemorySegment() { return null; } /** * Gets the 16-bit seed hash @@ -598,20 +596,20 @@ static final boolean isValidSketchID(final int id) { } /** - * Checks Ordered and Compact flags for integrity between sketch and Memory + * Checks Ordered and Compact flags for integrity between sketch and a MemorySegment * @param sketch the given sketch */ - static final void checkSketchAndMemoryFlags(final Sketch sketch) { - final Memory mem = sketch.getMemory(); - if (mem == null) { return; } - final int flags = PreambleUtil.extractFlags(mem); + static final void checkSketchAndMemorySegmentFlags(final Sketch sketch) { + final MemorySegment seg = sketch.getMemorySegment(); + if (seg == null) { return; } + final int flags = PreambleUtil.extractFlags(seg); if ((flags & COMPACT_FLAG_MASK) > 0 ^ sketch.isCompact()) { throw new SketchesArgumentException("Possible corruption: " - + "Memory Compact Flag inconsistent with Sketch"); + + "MemorySegment Compact Flag inconsistent with Sketch"); } if ((flags & ORDERED_FLAG_MASK) > 0 ^ sketch.isOrdered()) { throw new SketchesArgumentException("Possible corruption: " - + "Memory Ordered Flag inconsistent with Sketch"); + + "MemorySegment Ordered Flag inconsistent with Sketch"); } } @@ -636,32 +634,32 @@ private static final boolean estMode(final long thetaLong, final boolean empty) } /** - * Instantiates a Heap Update Sketch from Memory. Only SerVer3. SerVer 1 & 2 already handled. - * @param srcMem See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * Instantiates a Heap Update Sketch from MemorySegment. Only SerVer3. SerVer 1 & 2 already handled. + * @param srcSeg the source MemorySegment + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * @return a Sketch */ - private static final Sketch heapifyUpdateFromMemory(final Memory srcMem, final long expectedSeed) { - final long cap = srcMem.getCapacity(); + private static final Sketch heapifyUpdateFromMemorySegment(final MemorySegment srcSeg, final long expectedSeed) { + final long cap = srcSeg.byteSize(); if (cap < 8) { throw new SketchesArgumentException( "Corrupted: valid sketch must be at least 8 bytes."); } - final byte familyID = srcMem.getByte(FAMILY_BYTE); + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(familyID); if (family == Family.ALPHA) { - final int flags = PreambleUtil.extractFlags(srcMem); + final int flags = PreambleUtil.extractFlags(srcSeg); final boolean compactFlag = (flags & COMPACT_FLAG_MASK) != 0; if (compactFlag) { throw new SketchesArgumentException( "Corrupted: ALPHA family image: cannot be compact"); } - return HeapAlphaSketch.heapifyInstance(srcMem, expectedSeed); + return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); } if (family == Family.QUICKSELECT) { - return HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed); + return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); } throw new SketchesArgumentException( "Sketch cannot heapify family: " + family + " as a Sketch"); diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index 359a3eb87..40c7ccf86 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -19,6 +19,9 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; @@ -27,11 +30,11 @@ import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * This class brings together the common sketch and set operation creation methods and @@ -44,31 +47,31 @@ public final class Sketches { private Sketches() {} /** - * Gets the unique count estimate from a valid memory image of a Sketch - * @param srcMem See Memory + * Gets the unique count estimate from a valid MemorySegment image of a Sketch + * @param srcSeg the source MemorySegment * @return the sketch's best estimate of the cardinality of the input stream. */ - public static double getEstimate(final Memory srcMem) { - checkIfValidThetaSketch(srcMem); - return Sketch.estimate(getThetaLong(srcMem), getRetainedEntries(srcMem)); + public static double getEstimate(final MemorySegment srcSeg) { + checkIfValidThetaSketch(srcSeg); + return Sketch.estimate(getThetaLong(srcSeg), getRetainedEntries(srcSeg)); } /** - * Gets the approximate lower error bound from a valid memory image of a Sketch + * Gets the approximate lower error bound from a valid MemorySegment image of a Sketch * given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * See Number of Standard Deviations - * @param srcMem See Memory + * @param srcSeg the source MemorySegment * @return the lower bound. */ - public static double getLowerBound(final int numStdDev, final Memory srcMem) { - return Sketch.lowerBound(getRetainedEntries(srcMem), getThetaLong(srcMem), numStdDev, getEmpty(srcMem)); + public static double getLowerBound(final int numStdDev, final MemorySegment srcSeg) { + return Sketch.lowerBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg)); } /** - * Ref: {@link SetOperation#getMaxAnotBResultBytes(int)}. + * Convenience method, ref: {@link SetOperation#getMaxAnotBResultBytes(int)}. * Returns the maximum number of bytes for the returned CompactSketch, given the maximum * value of nomEntries of the first sketch A of AnotB. * @param maxNomEntries the given value @@ -102,7 +105,7 @@ public static int getCompactSketchMaxBytes(final int lgNomEntries) { } /** - * Ref: {@link SetOperation#getMaxIntersectionBytes(int)} + * Convenience method, ref: {@link SetOperation#getMaxIntersectionBytes(int)} * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries} * @return Ref: {@link SetOperation#getMaxIntersectionBytes(int)} */ @@ -111,7 +114,7 @@ public static int getMaxIntersectionBytes(final int nomEntries) { } /** - * Ref: {@link SetOperation#getMaxUnionBytes(int)} + * Convenience method, ref: {@link SetOperation#getMaxUnionBytes(int)} * @param nomEntries Ref: {@link SetOperation#getMaxUnionBytes(int)}, {@code nomEntries} * @return Ref: {@link SetOperation#getMaxUnionBytes(int)} */ @@ -120,7 +123,7 @@ public static int getMaxUnionBytes(final int nomEntries) { } /** - * Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} + * Convenience method, ref: {@link Sketch#getMaxUpdateSketchBytes(int)} * @param nomEntries Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}, {@code nomEntries} * @return Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} */ @@ -129,133 +132,132 @@ public static int getMaxUpdateSketchBytes(final int nomEntries) { } /** - * Ref: {@link Sketch#getSerializationVersion(Memory)} - * @param srcMem Ref: {@link Sketch#getSerializationVersion(Memory)}, {@code srcMem} - * @return Ref: {@link Sketch#getSerializationVersion(Memory)} + * Convenience method, ref: {@link Sketch#getSerializationVersion(MemorySegment)} + * @param srcSeg Ref: {@link Sketch#getSerializationVersion(MemorySegment)}, {@code srcSeg} + * @return Ref: {@link Sketch#getSerializationVersion(MemorySegment)} */ - public static int getSerializationVersion(final Memory srcMem) { - return Sketch.getSerializationVersion(srcMem); + public static int getSerializationVersion(final MemorySegment srcSeg) { + return Sketch.getSerializationVersion(srcSeg); } /** - * Gets the approximate upper error bound from a valid memory image of a Sketch + * Gets the approximate upper error bound from a valid MemorySegment image of a Sketch * given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * See Number of Standard Deviations - * @param srcMem - * See Memory + * @param srcSeg the source MemorySegment * @return the upper bound. */ - public static double getUpperBound(final int numStdDev, final Memory srcMem) { - return Sketch.upperBound(getRetainedEntries(srcMem), getThetaLong(srcMem), numStdDev, getEmpty(srcMem)); + public static double getUpperBound(final int numStdDev, final MemorySegment srcSeg) { + return Sketch.upperBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg)); } //Heapify Operations /** - * Ref: {@link CompactSketch#heapify(Memory) CompactSketch.heapify(Memory)} - * @param srcMem Ref: {@link CompactSketch#heapify(Memory) CompactSketch.heapify(Memory)}, {@code srcMem} + * Convenience method, ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)} + * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)}, {@code srcSeg} * @return {@link CompactSketch CompactSketch} */ - public static CompactSketch heapifyCompactSketch(final Memory srcMem) { - return CompactSketch.heapify(srcMem); + public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg) { + return CompactSketch.heapify(srcSeg); } /** - * Ref: {@link CompactSketch#heapify(Memory, long) CompactSketch.heapify(Memory, long)} - * @param srcMem Ref: {@link CompactSketch#heapify(Memory, long) CompactSketch.heapify(Memory, long)}, {@code srcMem} - * @param expectedSeed Ref: {@link CompactSketch#heapify(Memory, long) CompactSketch.heapify(Memory, long)}, + * Convenience method, ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, * {@code expectedSeed} * @return {@link CompactSketch CompactSketch} */ - public static CompactSketch heapifyCompactSketch(final Memory srcMem, final long expectedSeed) { - return CompactSketch.heapify(srcMem, expectedSeed); + public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg, final long expectedSeed) { + return CompactSketch.heapify(srcSeg, expectedSeed); } /** - * Ref: {@link CompactSketch#wrap(Memory) CompactSketch.wrap(Memory)} - * @param srcMem Ref: {@link CompactSketch#wrap(Memory) CompactSketch.wrap(Memory)}, {@code srcMem} + * Convenience method, ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)} + * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)}, {@code srcSeg} * @return {@link CompactSketch CompactSketch} */ - public static CompactSketch wrapCompactSketch(final Memory srcMem) { - return CompactSketch.wrap(srcMem); + public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg) { + return CompactSketch.wrap(srcSeg); } /** - * Ref: {@link CompactSketch#wrap(Memory, long) CompactSketch.wrap(Memory, long)} - * @param srcMem Ref: {@link CompactSketch#wrap(Memory, long) CompactSketch.wrap(Memory, long)}, {@code srcMem} - * @param expectedSeed Ref: {@link CompactSketch#wrap(Memory, long) CompactSketch.wrap(Memory, long)}, + * Convenience method, ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, * {@code expectedSeed} * @return {@link CompactSketch CompactSketch} */ - public static CompactSketch wrapCompactSketch(final Memory srcMem, final long expectedSeed) { - return CompactSketch.wrap(srcMem, expectedSeed); + public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg, final long expectedSeed) { + return CompactSketch.wrap(srcSeg, expectedSeed); } /** - * Ref: {@link SetOperation#heapify(Memory) SetOperation.heapify(Memory)} - * @param srcMem Ref: {@link SetOperation#heapify(Memory) SetOperation.heapify(Memory)}, {@code srcMem} + * Convenience method, ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)} + * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)}, {@code srcSeg} * @return {@link SetOperation SetOperation} */ - public static SetOperation heapifySetOperation(final Memory srcMem) { - return SetOperation.heapify(srcMem); + public static SetOperation heapifySetOperation(final MemorySegment srcSeg) { + return SetOperation.heapify(srcSeg); } /** - * Ref: {@link SetOperation#heapify(Memory, long) SetOperation.heapify(Memory, long)} - * @param srcMem Ref: {@link SetOperation#heapify(Memory, long) SetOperation.heapify(Memory, long)}, - * {@code srcMem} - * @param expectedSeed the seed used to validate the given Memory image. - * Ref: {@link SetOperation#heapify(Memory, long) SetOperation.heapify(Memory, long)}, + * Convenience method, ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, + * {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, * {@code expectedSeed} * @return {@link SetOperation SetOperation} */ - public static SetOperation heapifySetOperation(final Memory srcMem, final long expectedSeed) { - return SetOperation.heapify(srcMem, expectedSeed); + public static SetOperation heapifySetOperation(final MemorySegment srcSeg, final long expectedSeed) { + return SetOperation.heapify(srcSeg, expectedSeed); } /** - * Ref: {@link Sketch#heapify(Memory) Sketch.heapify(Memory)} - * @param srcMem Ref: {@link Sketch#heapify(Memory) Sketch.heapify(Memory)}, {@code srcMem} + * Convenience method, ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)} + * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)}, {@code srcSeg} * @return {@link Sketch Sketch} */ - public static Sketch heapifySketch(final Memory srcMem) { - return Sketch.heapify(srcMem); + public static Sketch heapifySketch(final MemorySegment srcSeg) { + return Sketch.heapify(srcSeg); } /** - * Ref: {@link Sketch#heapify(Memory, long) Sketch.heapify(Memory, long)} - * @param srcMem Ref: {@link Sketch#heapify(Memory, long) Sketch.heapify(Memory, long)}, {@code srcMem} - * @param expectedSeed the seed used to validate the given Memory image. - * Ref: {@link Sketch#heapify(Memory, long) Sketch.heapify(Memory, long)}, {@code expectedSeed} + * Convenience method, ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code expectedSeed} * @return {@link Sketch Sketch} */ - public static Sketch heapifySketch(final Memory srcMem, final long expectedSeed) { - return Sketch.heapify(srcMem, expectedSeed); + public static Sketch heapifySketch(final MemorySegment srcSeg, final long expectedSeed) { + return Sketch.heapify(srcSeg, expectedSeed); } /** - * Ref: {@link UpdateSketch#heapify(Memory) UpdateSketch.heapify(Memory)} - * @param srcMem Ref: {@link UpdateSketch#heapify(Memory) UpdateSketch.heapify(Memory)}, {@code srcMem} + * Convenience method, ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)} + * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)}, {@code srcSeg} * @return {@link UpdateSketch UpdateSketch} */ - public static UpdateSketch heapifyUpdateSketch(final Memory srcMem) { - return UpdateSketch.heapify(srcMem); + public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg) { + return UpdateSketch.heapify(srcSeg); } /** - * Ref: {@link UpdateSketch#heapify(Memory, long) UpdateSketch.heapify(Memory, long)} - * @param srcMem Ref: {@link UpdateSketch#heapify(Memory, long) UpdateSketch.heapify(Memory, long)}, - * {@code srcMem} - * @param expectedSeed the seed used to validate the given Memory image. - * Ref: {@link UpdateSketch#heapify(Memory, long) UpdateSketch.heapify(Memory, long)}, + * Convenience method, ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)} + * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, + * {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, * {@code expectedSeed} * @return {@link UpdateSketch UpdateSketch} */ - public static UpdateSketch heapifyUpdateSketch(final Memory srcMem, final long expectedSeed) { - return UpdateSketch.heapify(srcMem, expectedSeed); + public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) { + return UpdateSketch.heapify(srcSeg, expectedSeed); } //Builders @@ -279,168 +281,126 @@ public static UpdateSketchBuilder updateSketchBuilder() { //Wrap operations /** - * Convenience method, calls {@link SetOperation#wrap(Memory)} and casts the result to a Intersection - * @param srcMem Ref: {@link SetOperation#wrap(Memory)}, {@code srcMem} - * @return a Intersection backed by the given Memory - */ - public static Intersection wrapIntersection(final Memory srcMem) { - return (Intersection) SetOperation.wrap(srcMem); - } - - /** - * Convenience method, calls {@link SetOperation#wrap(Memory)} and casts the result to a Intersection - * @param srcMem Ref: {@link SetOperation#wrap(Memory)}, {@code srcMem} - * @return a Intersection backed by the given Memory + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment)} + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} + * @return a Intersection backed by the given MemorySegment */ - public static Intersection wrapIntersection(final WritableMemory srcMem) { - return (Intersection) SetOperation.wrap(srcMem); + public static Intersection wrapIntersection(final MemorySegment srcSeg) { + return (Intersection) SetOperation.wrap(srcSeg); } /** - * Ref: {@link SetOperation#wrap(Memory) SetOperation.wrap(Memory)} - * @param srcMem Ref: {@link SetOperation#wrap(Memory) SetOperation.wrap(Memory)}, {@code srcMem} - * @return {@link SetOperation SetOperation} - */ - public static SetOperation wrapSetOperation(final Memory srcMem) { - return wrapSetOperation(srcMem, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Ref: {@link SetOperation#wrap(Memory, long) SetOperation.wrap(Memory, long)} - * @param srcMem Ref: {@link SetOperation#wrap(Memory, long) SetOperation.wrap(Memory, long)}, - * {@code srcMem} - * @param expectedSeed the seed used to validate the given Memory image. - * Ref: {@link SetOperation#wrap(Memory, long) SetOperation.wrap(Memory, long)}, - * {@code expectedSeed} + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)} + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)}, {@code srcSeg} * @return {@link SetOperation SetOperation} */ - public static SetOperation wrapSetOperation(final Memory srcMem, final long expectedSeed) { - return SetOperation.wrap(srcMem, expectedSeed); + public static SetOperation wrapSetOperation(final MemorySegment srcSeg) { + return wrapSetOperation(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Ref: {@link SetOperation#wrap(Memory) SetOperation.wrap(Memory)} - * @param srcMem Ref: {@link SetOperation#wrap(Memory) SetOperation.wrap(Memory)}, {@code srcMem} + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code expectedSeed} * @return {@link SetOperation SetOperation} */ - public static SetOperation wrapSetOperation(final WritableMemory srcMem) { - return wrapSetOperation(srcMem, Util.DEFAULT_UPDATE_SEED); + public static SetOperation wrapSetOperation(final MemorySegment srcSeg, final long expectedSeed) { + return SetOperation.wrap(srcSeg, expectedSeed); } /** - * Ref: {@link SetOperation#wrap(Memory, long) SetOperation.wrap(Memory, long)} - * @param srcMem Ref: {@link SetOperation#wrap(Memory, long) SetOperation.wrap(Memory, long)}, - * {@code srcMem} - * @param expectedSeed the seed used to validate the given Memory image. - * Ref: {@link SetOperation#wrap(Memory, long) SetOperation.wrap(Memory, long)}, - * {@code expectedSeed} - * @return {@link SetOperation SetOperation} - */ - public static SetOperation wrapSetOperation(final WritableMemory srcMem, final long expectedSeed) { - return SetOperation.wrap(srcMem, expectedSeed); - } - - /** - * Ref: {@link Sketch#wrap(Memory) Sketch.wrap(Memory)} - * @param srcMem Ref: {@link Sketch#wrap(Memory) Sketch.wrap(Memory)}, {@code srcMem} + * Convenience method, ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)} + * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)}, {@code srcSeg} * @return {@link Sketch Sketch} */ - public static Sketch wrapSketch(final Memory srcMem) { - return Sketch.wrap(srcMem); + public static Sketch wrapSketch(final MemorySegment srcSeg) { + return Sketch.wrap(srcSeg); } /** - * Ref: {@link Sketch#wrap(Memory, long) Sketch.wrap(Memory, long)} - * @param srcMem Ref: {@link Sketch#wrap(Memory, long) Sketch.wrap(Memory, long)}, {@code srcMem} - * @param expectedSeed the expectedSeed used to validate the given Memory image. - * Ref: {@link Sketch#wrap(Memory, long) Sketch.wrap(Memory, long)}, {@code expectedSeed} + * Convenience method, ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the expectedSeed used to validate the given MemorySegment image. + * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code expectedSeed} * @return {@link Sketch Sketch} */ - public static Sketch wrapSketch(final Memory srcMem, final long expectedSeed) { - return Sketch.wrap(srcMem, expectedSeed); - } - - /** - * Convenience method, calls {@link SetOperation#wrap(Memory)} and casts the result to a Union - * @param srcMem Ref: {@link SetOperation#wrap(Memory)}, {@code srcMem} - * @return a Union backed by the given Memory - */ - public static Union wrapUnion(final Memory srcMem) { - return (Union) SetOperation.wrap(srcMem); + public static Sketch wrapSketch(final MemorySegment srcSeg, final long expectedSeed) { + return Sketch.wrap(srcSeg, expectedSeed); } /** - * Convenience method, calls {@link SetOperation#wrap(Memory)} and casts the result to a Union - * @param srcMem Ref: {@link SetOperation#wrap(Memory)}, {@code srcMem} - * @return a Union backed by the given Memory + * Convenience method, ref: {@link SetOperation#wrap(MemorySegment)} and casts the result to a Union + * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} + * @return a Union backed by the given MemorySegment. */ - public static Union wrapUnion(final WritableMemory srcMem) { - return (Union) SetOperation.wrap(srcMem); + public static Union wrapUnion(final MemorySegment srcSeg) { + return (Union) SetOperation.wrap(srcSeg); } /** - * Ref: {@link UpdateSketch#wrap(Memory) UpdateSketch.wrap(Memory)} - * @param srcMem Ref: {@link UpdateSketch#wrap(Memory) UpdateSketch.wrap(Memory)}, {@code srcMem} + * Convenience method, ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)} + * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)}, {@code srcSeg} * @return {@link UpdateSketch UpdateSketch} */ - public static UpdateSketch wrapUpdateSketch(final WritableMemory srcMem) { - return wrapUpdateSketch(srcMem, Util.DEFAULT_UPDATE_SEED); + public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg) { + return wrapUpdateSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Ref: {@link UpdateSketch#wrap(Memory, long) UpdateSketch.wrap(Memory, long)} - * @param srcMem Ref: {@link UpdateSketch#wrap(Memory, long) UpdateSketch.wrap(Memory, long)}, {@code srcMem} - * @param expectedSeed the seed used to validate the given Memory image. - * Ref: {@link UpdateSketch#wrap(Memory, long) UpdateSketch.wrap(Memory, long)}, {@code expectedSeed} + * Convenience method, ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)} + * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code srcSeg} + * @param expectedSeed the seed used to validate the given MemorySegment image. + * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code expectedSeed} * @return {@link UpdateSketch UpdateSketch} */ - public static UpdateSketch wrapUpdateSketch(final WritableMemory srcMem, final long expectedSeed) { - return UpdateSketch.wrap(srcMem, expectedSeed); + public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) { + return UpdateSketch.wrap(srcSeg, expectedSeed); } //Restricted static methods - static void checkIfValidThetaSketch(final Memory srcMem) { - final int fam = srcMem.getByte(FAMILY_BYTE); + static void checkIfValidThetaSketch(final MemorySegment srcSeg) { + final int fam = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); if (!Sketch.isValidSketchID(fam)) { - throw new SketchesArgumentException("Source Memory not a valid Sketch. Family: " + throw new SketchesArgumentException("Source MemorySegment not a valid Sketch. Family: " + Family.idToFamily(fam).toString()); } } - static boolean getEmpty(final Memory srcMem) { - final int serVer = srcMem.getByte(SER_VER_BYTE); + static boolean getEmpty(final MemorySegment srcSeg) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); if (serVer == 1) { - return ((getThetaLong(srcMem) == Long.MAX_VALUE) && (getRetainedEntries(srcMem) == 0)); + return ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (getRetainedEntries(srcSeg) == 0)); } - return (srcMem.getByte(FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 + return (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 } - static int getPreambleLongs(final Memory srcMem) { - return srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; //for SerVer 1,2,3 + static int getPreambleLongs(final MemorySegment srcSeg) { + return srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //for SerVer 1,2,3 } - static int getRetainedEntries(final Memory srcMem) { - final int serVer = srcMem.getByte(SER_VER_BYTE); + static int getRetainedEntries(final MemorySegment srcSeg) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); if (serVer == 1) { - final int entries = srcMem.getInt(RETAINED_ENTRIES_INT); - if ((getThetaLong(srcMem) == Long.MAX_VALUE) && (entries == 0)) { + final int entries = srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); + if ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (entries == 0)) { return 0; } return entries; } //SerVer 2 or 3 - final int preLongs = getPreambleLongs(srcMem); - final boolean empty = (srcMem.getByte(FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 + final int preLongs = getPreambleLongs(srcSeg); + final boolean empty = (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 if (preLongs == 1) { return empty ? 0 : 1; } //preLongs > 1 - return srcMem.getInt(RETAINED_ENTRIES_INT); //for SerVer 1,2,3 + return srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); //for SerVer 1,2,3 } - static long getThetaLong(final Memory srcMem) { - final int preLongs = getPreambleLongs(srcMem); - return (preLongs < 3) ? Long.MAX_VALUE : srcMem.getLong(THETA_LONG); //for SerVer 1,2,3 + static long getThetaLong(final MemorySegment srcSeg) { + final int preLongs = getPreambleLongs(srcSeg); + return (preLongs < 3) ? Long.MAX_VALUE : srcSeg.get(JAVA_LONG_UNALIGNED, THETA_LONG); //for SerVer 1,2,3 } } diff --git a/src/main/java/org/apache/datasketches/theta/Union.java b/src/main/java/org/apache/datasketches/theta/Union.java index 7a2ff9c5b..7503729cb 100644 --- a/src/main/java/org/apache/datasketches/theta/Union.java +++ b/src/main/java/org/apache/datasketches/theta/Union.java @@ -19,11 +19,15 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; + +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.Family; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; /** * Compute the union of two or more theta sketches. @@ -33,6 +37,62 @@ */ public abstract class Union extends SetOperation { + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * This method assumes the Default Update Seed. + * This does NO validity checking of the given MemorySegment. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @return this class + */ + public static Union fastWrap(final MemorySegment srcSeg) { + return fastWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * This does NO validity checking of the given MemorySegment. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + public static Union fastWrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return UnionImpl.fastWrapInstance(srcSeg, expectedSeed); + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @return this class + */ + public static Union wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); + } + + /** + * Wrap a Union object around a Union MemorySegment object containing data. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. + * See seed + * @return this class + */ + public static Union wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return UnionImpl.wrapInstance(srcSeg, expectedSeed); + } + /** * Returns the number of storage bytes required for this union in its current state. * @@ -67,12 +127,11 @@ public Family getFamily() { * @param dstOrdered * See Destination Ordered * - * @param dstMem - * See Destination Memory. + * @param dstSeg destination MemorySegment * * @return the result of this operation as a CompactSketch of the chosen form */ - public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem); + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); /** * Resets this Union. The seed remains intact, everything else reverts back to its virgin state. @@ -108,11 +167,11 @@ public CompactSketch union(final Sketch sketchA, final Sketch sketchB) { * @param sketchA The first argument * @param sketchB The second argument * @param dstOrdered If true, the returned CompactSketch will be ordered. - * @param dstMem If not null, the returned CompactSketch will be placed in this WritableMemory. + * @param dstSeg If not null, the returned CompactSketch will be placed in this MemorySegment. * @return the result CompactSketch. */ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstOrdered, - WritableMemory dstMem); + MemorySegment dstSeg); /** * Perform a Union operation with this union and the given on-heap sketch of the Theta Family. @@ -127,7 +186,7 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO public abstract void union(Sketch sketchIn); /** - * Perform a Union operation with this union and the given Memory image of any sketch of the + * Perform a Union operation with this union and the given MemorySegment image of any sketch of the * Theta Family. The input image may be from earlier versions of the Theta Compact Sketch, * called the SetSketch (circa 2014), which was prior to Open Source and are compact and ordered. * @@ -135,9 +194,9 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO * *

              Nulls and empty sketches are ignored.

              * - * @param mem Memory image of sketch to be merged + * @param seg MemorySegment image of sketch to be merged */ - public abstract void union(Memory mem); + public abstract void union(MemorySegment seg); /** * Update this union with the given long data item. diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 5cbae1dd3..387ee3455 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -20,6 +20,7 @@ package org.apache.datasketches.theta; import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta.PreambleUtil.UNION_THETA_LONG; import static org.apache.datasketches.theta.PreambleUtil.clearEmpty; @@ -27,14 +28,13 @@ import static org.apache.datasketches.theta.PreambleUtil.extractUnionThetaLong; import static org.apache.datasketches.theta.PreambleUtil.insertUnionThetaLong; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; +import java.util.Objects; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; /** @@ -88,15 +88,14 @@ static UnionImpl initNewHeapInstance( } /** - * Construct a new Direct Union in the off-heap destination Memory. + * Construct a new Direct Union in the destination MemorySegment. * Called by SetOperationBuilder. * * @param lgNomLongs See lgNomLongs. * @param seed See seed * @param p See Sampling Probability, p * @param rf See Resize Factor - * @param memReqSvr a given instance of a MemoryRequestServer - * @param dstMem the given Memory object destination. It will be cleared prior to use. + * @param dstSeg the given MemorySegment object destination. It will be cleared prior to use. * @return this class */ static UnionImpl initNewDirectInstance( @@ -104,10 +103,9 @@ static UnionImpl initNewDirectInstance( final long seed, final float p, final ResizeFactor rf, - final MemoryRequestServer memReqSvr, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { final UpdateSketch gadget = //create with UNION family - new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, memReqSvr, dstMem, true); + new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, dstSeg, true); final UnionImpl unionImpl = new UnionImpl(gadget, seed); unionImpl.unionThetaLong_ = gadget.getThetaLong(); unionImpl.unionEmpty_ = gadget.isEmpty(); @@ -115,92 +113,58 @@ static UnionImpl initNewDirectInstance( } /** - * Heapify a Union from a Memory Union object containing data. + * Heapify a Union from a MemorySegment Union object containing data. * Called by SetOperation. - * @param srcMem The source Memory Union object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg The source MemorySegment Union object. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return this class */ - static UnionImpl heapifyInstance(final Memory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed); + static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { + final MemorySegment srcSegRO = srcSeg.asReadOnly(); + Family.UNION.checkFamilyID(extractFamilyID(srcSegRO)); + final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSegRO, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSegRO); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSegRO); return unionImpl; } /** - * Fast-wrap a Union object around a Union Memory object containing data. - * This does NO validity checking of the given Memory. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * Fast-wrap a Union object around a Union MemorySegment object containing data. + * This does NO validity checking of the given MemorySegment. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return this class */ - static UnionImpl fastWrap(final Memory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketchR.fastReadOnlyWrap(srcMem, expectedSeed); + static UnionImpl fastWrapInstance(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = srcSeg.isReadOnly() + ? DirectQuickSelectSketchR.fastReadOnlyWrap(srcSeg, expectedSeed) + : DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); return unionImpl; } /** - * Fast-wrap a Union object around a Union WritableMemory object containing data. - * This does NO validity checking of the given Memory. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. - * See seed - * @return this class - */ - static UnionImpl fastWrap(final WritableMemory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketch.fastWritableWrap(srcMem, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); - return unionImpl; - } - - /** - * Wrap a Union object around a Union Memory object containing data. + * Wrap a Union object around a Union MemorySegment object containing data. * Called by SetOperation. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return this class */ - static UnionImpl wrapInstance(final Memory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketchR.readOnlyWrap(srcMem, expectedSeed); + static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = srcSeg.isReadOnly() + ? DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed) + : DirectQuickSelectSketch.writableWrap(srcSeg, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); - return unionImpl; - } - - /** - * Wrap a Union object around a Union WritableMemory object containing data. - * Called by SetOperation. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. - * See seed - * @return this class - */ - static UnionImpl wrapInstance(final WritableMemory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketch.writableWrap(srcMem, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); return unionImpl; } @@ -215,17 +179,22 @@ public int getMaxUnionBytes() { return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3); } + @Override + MemorySegment getMemorySegment() { + return hasMemorySegment() ? gadget_.getMemorySegment() : null; + } + @Override public CompactSketch getResult() { return getResult(true, null); } @Override - public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) { + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { final int gadgetCurCount = gadget_.getRetainedEntries(true); final int k = 1 << gadget_.getLgNomLongs(); final long[] gadgetCacheCopy = - gadget_.hasMemory() ? gadget_.getCache() : gadget_.getCache().clone(); + gadget_.hasMemorySegment() ? gadget_.getCache() : gadget_.getCache().clone(); //Pull back to k final long curGadgetThetaLong = gadget_.getThetaLong(); @@ -233,8 +202,9 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds ? selectExcludingZeros(gadgetCacheCopy, gadgetCurCount, k + 1) : curGadgetThetaLong; //Finalize Theta and curCount - final long unionThetaLong = gadget_.hasMemory() - ? gadget_.getMemory().getLong(UNION_THETA_LONG) : unionThetaLong_; + final long unionThetaLong = gadget_.hasMemorySegment() + ? gadget_.getMemorySegment().get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG) + : unionThetaLong_; final long minThetaLong = min(min(curGadgetThetaLong, adjGadgetThetaLong), unionThetaLong); final int curCountOut = minThetaLong < curGadgetThetaLong @@ -247,25 +217,22 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds final boolean empty = gadget_.isEmpty() && unionEmpty_; final short seedHash = gadget_.getSeedHash(); return CompactOperations.componentsToCompact( - minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem, compactCacheOut); + minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstSeg, compactCacheOut); } @Override - public boolean hasMemory() { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.hasMemory() : false; + public boolean hasMemorySegment() { + return gadget_.hasMemorySegment(); } @Override public boolean isDirect() { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.isDirect() : false; + return gadget_.isDirect(); } @Override - public boolean isSameResource(final Memory that) { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return gadget_.isSameResource(that); } @Override @@ -278,10 +245,10 @@ public void reset() { @Override public byte[] toByteArray() { final byte[] gadgetByteArr = gadget_.toByteArray(); - final WritableMemory mem = WritableMemory.writableWrap(gadgetByteArr); - insertUnionThetaLong(mem, unionThetaLong_); + final MemorySegment seg = MemorySegment.ofArray(gadgetByteArr); + insertUnionThetaLong(seg, unionThetaLong_); if (gadget_.isEmpty() != unionEmpty_) { - clearEmpty(mem); + clearEmpty(seg); unionEmpty_ = false; } return gadgetByteArr; @@ -289,11 +256,11 @@ public byte[] toByteArray() { @Override //Stateless Union public CompactSketch union(final Sketch sketchA, final Sketch sketchB, final boolean dstOrdered, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { reset(); union(sketchA); union(sketchB); - final CompactSketch csk = getResult(dstOrdered, dstMem); + final CompactSketch csk = getResult(dstOrdered, dstSeg); reset(); return csk; } @@ -312,7 +279,7 @@ public void union(final Sketch sketchIn) { gadget_.hashUpdate(sketchIn.getCache()[0]); return; } - Sketch.checkSketchAndMemoryFlags(sketchIn); + Sketch.checkSketchAndMemorySegmentFlags(sketchIn); unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule unionEmpty_ = false; @@ -327,18 +294,17 @@ public void union(final Sketch sketchIn) { } } unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //Theta rule with gadget - if (gadget_.hasMemory()) { - final WritableMemory wmem = (WritableMemory)gadget_.getMemory(); - PreambleUtil.insertUnionThetaLong(wmem, unionThetaLong_); - PreambleUtil.clearEmpty(wmem); + if (gadget_.hasMemorySegment()) { + final MemorySegment wseg = gadget_.getMemorySegment(); + PreambleUtil.insertUnionThetaLong(wseg, unionThetaLong_); + PreambleUtil.clearEmpty(wseg); } } @Override - public void union(final Memory skMem) { - if (skMem != null) { - union(Sketch.wrap(skMem)); - } + public void union(final MemorySegment seg) { + Objects.requireNonNull(seg, "MemorySegment must be non-null"); + union(Sketch.wrap(seg.asReadOnly())); } @Override diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java index 011e4db9c..30d3b6f49 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.common.Util.checkBounds; @@ -32,16 +33,17 @@ import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SER_VER; import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash; +import static org.apache.datasketches.theta.PreambleUtil.checkSegmentSeedHash; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; import static org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor; import static org.apache.datasketches.theta.PreambleUtil.extractP; import static org.apache.datasketches.theta.PreambleUtil.extractSerVer; import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta.PreambleUtil.getSegBytes; import static org.apache.datasketches.theta.UpdateReturnState.RejectedNullOrEmpty; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.util.Objects; @@ -49,8 +51,6 @@ import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -65,47 +65,45 @@ public abstract class UpdateSketch extends Sketch { UpdateSketch() {} /** - * Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto + * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct objects can be wrapped. This method assumes the + * been explicitly stored as writable, direct objects can be wrapped. This method assumes the * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. * Default Update Seed. - * @param srcMem an image of a Sketch where the image seed hash matches the default seed hash. + * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash. * It must have a size of at least 24 bytes. - * See Memory - * @return a Sketch backed by the given Memory + * @return an UpdateSketch backed by the given MemorySegment */ - public static UpdateSketch wrap(final WritableMemory srcMem) { - return wrap(srcMem, Util.DEFAULT_UPDATE_SEED); + public static UpdateSketch wrap(final MemorySegment srcWSeg) { + return wrap(srcWSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto + * Wrap takes the sketch image in MemorySegment and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct objects can be wrapped. + * been explicitly stored as writable direct objects can be wrapped. * An attempt to "wrap" earlier version sketches will result in a "heapified", normal * Java Heap version of the sketch where all data will be copied to the heap. - * @param srcMem an image of a Sketch where the image seed hash matches the given seed hash. + * @param srcWSeg an image of a writable sketch where the image seed hash matches the given seed hash. * It must have a size of at least 24 bytes. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * Compact sketches store a 16-bit hash of the seed, but not the seed itself. - * @return a UpdateSketch backed by the given Memory + * @return a UpdateSketch backed by the given MemorySegment */ - public static UpdateSketch wrap(final WritableMemory srcMem, final long expectedSeed) { - Objects.requireNonNull(srcMem, "Source Memory must not be null"); - checkBounds(0, 24, srcMem.getCapacity()); //need min 24 bytes - final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; - final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF; + public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expectedSeed) { + Objects.requireNonNull(srcWSeg, "Source MemorySeg e t must not be null"); + checkBounds(0, 24, srcWSeg.byteSize()); //need min 24 bytes + final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; final Family family = Family.idToFamily(familyID); if (family != Family.QUICKSELECT) { throw new SketchesArgumentException( "A " + family + " sketch cannot be wrapped as an UpdateSketch."); } if ((serVer == 3) && (preLongs == 3)) { - return DirectQuickSelectSketch.writableWrap(srcMem, expectedSeed); + return DirectQuickSelectSketch.writableWrap(srcWSeg, expectedSeed); } else { throw new SketchesArgumentException( "Corrupted: An UpdateSketch image must have SerVer = 3 and preLongs = 3"); @@ -113,40 +111,40 @@ public static UpdateSketch wrap(final WritableMemory srcMem, final long expected } /** - * Instantiates an on-heap UpdateSketch from Memory. This method assumes the + * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. - * @param srcMem See Memory + * @param srcSeg the given MemorySegment with a sketch image. * It must have a size of at least 24 bytes. * @return an UpdateSketch */ - public static UpdateSketch heapify(final Memory srcMem) { - return heapify(srcMem, Util.DEFAULT_UPDATE_SEED); + public static UpdateSketch heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Instantiates an on-heap UpdateSketch from Memory. - * @param srcMem See Memory + * Instantiates an on-heap UpdateSketch from a MemorySegment. + * @param srcSeg the given MemorySegment. * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * @return an UpdateSketch */ - public static UpdateSketch heapify(final Memory srcMem, final long expectedSeed) { - Objects.requireNonNull(srcMem, "Source Memory must not be null"); - checkBounds(0, 24, srcMem.getCapacity()); //need min 24 bytes - final Family family = Family.idToFamily(srcMem.getByte(FAMILY_BYTE)); + public static UpdateSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); + checkBounds(0, 24, srcSeg.byteSize()); //need min 24 bytes + final Family family = Family.idToFamily(srcSeg.get(JAVA_BYTE, FAMILY_BYTE)); if (family.equals(Family.ALPHA)) { - return HeapAlphaSketch.heapifyInstance(srcMem, expectedSeed); + return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); } - return HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed); + return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); } //Sketch interface @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), - false, false, dstOrdered, dstMem, getCache()); + false, false, dstOrdered, dstWSeg, getCache()); } @Override @@ -161,16 +159,31 @@ int getCurrentDataLongs() { return 1 << getLgArrLongs(); } + @Override + public boolean hasMemorySegment() { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).hasMemorySegment()); + } + @Override public boolean isCompact() { return false; } + @Override + public boolean isDirect() { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isDirect()); + } + @Override public boolean isOrdered() { return false; } + @Override + public boolean isSameResource(final MemorySegment that) { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isSameResource(that)); + } + //UpdateSketch interface /** @@ -379,10 +392,10 @@ public UpdateReturnState update(final long[] data) { */ abstract boolean isOutOfSpace(int numEntries); - static void checkUnionQuickSelectFamily(final Memory mem, final int preambleLongs, + static void checkUnionQuickSelectFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { //Check Family - final int familyID = extractFamilyID(mem); //byte 2 + final int familyID = extractFamilyID(seg); //byte 2 final Family family = Family.idToFamily(familyID); if (family.equals(Family.UNION)) { if (preambleLongs != Family.UNION.getMinPreLongs()) { @@ -403,45 +416,45 @@ else if (family.equals(Family.QUICKSELECT)) { //Check lgNomLongs if (lgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS) { throw new SketchesArgumentException( - "Possible corruption: Current Memory lgNomLongs < min required size: " + "Possible corruption: Current MemorySegment lgNomLongs < min required size: " + lgNomLongs + " < " + ThetaUtil.MIN_LG_NOM_LONGS); } } - static void checkMemIntegrity(final Memory srcMem, final long expectedSeed, final int preambleLongs, + static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs, final int lgNomLongs, final int lgArrLongs) { //Check SerVer - final int serVer = extractSerVer(srcMem); //byte 1 + final int serVer = extractSerVer(srcSeg); //byte 1 if (serVer != SER_VER) { throw new SketchesArgumentException( "Possible corruption: Invalid Serialization Version: " + serVer); } //Check flags - final int flags = extractFlags(srcMem); //byte 5 + final int flags = extractFlags(srcSeg); //byte 5 final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK; if ((flags & flagsMask) > 0) { throw new SketchesArgumentException( - "Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only"); + "Possible corruption: Input srcSeg cannot be: big-endian, compact, ordered, nor read-only"); } //Check seed hashes - final short seedHash = checkMemorySeedHash(srcMem, expectedSeed); //byte 6,7 + final short seedHash = checkSegmentSeedHash(srcSeg, expectedSeed); //byte 6,7 Util.checkSeedHashes(seedHash, Util.computeSeedHash(expectedSeed)); - //Check mem capacity, lgArrLongs - final long curCapBytes = srcMem.getCapacity(); - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + //Check seg capacity, lgArrLongs + final long curCapBytes = srcSeg.byteSize(); + final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); if (curCapBytes < minReqBytes) { throw new SketchesArgumentException( - "Possible corruption: Current Memory size < min required size: " + "Possible corruption: Current MemorySegment size < min required size: " + curCapBytes + " < " + minReqBytes); } //check Theta, p - final float p = extractP(srcMem); //bytes 12-15 - final long thetaLong = extractThetaLong(srcMem); //bytes 16-23 + final float p = extractP(srcSeg); //bytes 12-15 + final long thetaLong = extractThetaLong(srcSeg); //bytes 16-23 final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; //if (lgArrLongs <= lgNomLongs) the sketch is still resizing, thus theta cannot be < p. if ((lgArrLongs <= lgNomLongs) && (theta < p) ) { @@ -452,19 +465,19 @@ static void checkMemIntegrity(final Memory srcMem, final long expectedSeed, fina } /** - * This checks to see if the memory RF factor was set correctly as early versions may not + * This checks to see if the MemorySegment RF factor was set correctly as early versions may not * have set it. - * @param srcMem the source memory + * @param srcSeg the source MemorySegment * @param lgNomLongs the current lgNomLongs * @param lgArrLongs the current lgArrLongs - * @return true if the the memory RF factor is incorrect and the caller can either + * @return true if the the MemorySegment RF factor is incorrect and the caller can either * correct it or throw an error. */ - static boolean isResizeFactorIncorrect(final Memory srcMem, final int lgNomLongs, + static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgNomLongs, final int lgArrLongs) { final int lgT = lgNomLongs + 1; final int lgA = lgArrLongs; - final int lgR = extractLgResizeFactor(srcMem); + final int lgR = extractLgResizeFactor(srcSeg); if (lgR == 0) { return lgA != lgT; } return !(((lgT - lgA) % lgR) == 0); } diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java index 789a35bc7..834778f87 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java @@ -23,15 +23,14 @@ import static org.apache.datasketches.common.Util.TAB; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -39,13 +38,12 @@ * * @author Lee Rhodes */ -public class UpdateSketchBuilder { +public final class UpdateSketchBuilder { private int bLgNomLongs; private long bSeed; private ResizeFactor bRF; private Family bFam; private float bP; - private MemoryRequestServer bMemReqSvr; //Fields for concurrent theta sketch private int bNumPoolThreads; @@ -62,17 +60,16 @@ public class UpdateSketchBuilder { *
            • Input Sampling Probability: 1.0
            • *
            • Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
            • *
            • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. - * For direct sketches, which are targeted for native memory off the Java heap, this value will + * For direct sketches, which are targeted for off-heap, this value will * be fixed at either {@link ResizeFactor#X1} or {@link ResizeFactor#X2}.
            • - *
            • MemoryRequestServer (Direct only): - * {@link org.apache.datasketches.memory.DefaultMemoryRequestServer}.
            • *
            * Parameters unique to the concurrent sketches only: *
              - *
            • Number of local Nominal Entries: 4
            • *
            • Concurrent NumPoolThreads: 3
            • + *
            • Number of local Nominal Entries: 4
            • *
            • Concurrent PropagateOrderedCompact: true
            • *
            • Concurrent MaxConcurrencyError: 0
            • + *
            • Concurrent MaxNumLocalThreads: 1
            • *
            */ public UpdateSketchBuilder() { @@ -81,7 +78,6 @@ public UpdateSketchBuilder() { bP = (float) 1.0; bRF = ResizeFactor.X8; bFam = Family.QUICKSELECT; - bMemReqSvr = new DefaultMemoryRequestServer(); // Default values for concurrent sketch bNumPoolThreads = ConcurrentPropagationService.NUM_POOL_THREADS; bLocalLgNomLongs = 4; //default is smallest legal QS sketch @@ -250,24 +246,6 @@ public Family getFamily() { return bFam; } - /** - * Set the MemoryRequestServer - * @param memReqSvr the given MemoryRequestServer - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setMemoryRequestServer(final MemoryRequestServer memReqSvr) { - bMemReqSvr = memReqSvr; - return this; - } - - /** - * Returns the MemoryRequestServer - * @return the MemoryRequestServer - */ - public MemoryRequestServer getMemoryRequestServer() { - return bMemReqSvr; - } - /** * Sets the number of pool threads used for background propagation in the concurrent sketches. * @param numPoolThreads the given number of pool threads @@ -348,30 +326,30 @@ public UpdateSketch build() { /** * Returns an UpdateSketch with the current configuration of this Builder - * with the specified backing destination Memory store. + * with the specified backing destination MemorySegment store. * Note: this cannot be used with the Alpha Family of sketches. - * @param dstMem The destination Memory. + * @param dstSeg The destination MemorySegment. * @return an UpdateSketch */ - public UpdateSketch build(final WritableMemory dstMem) { + public UpdateSketch build(final MemorySegment dstSeg) { UpdateSketch sketch = null; switch (bFam) { case ALPHA: { - if (dstMem == null) { + if (dstSeg == null) { sketch = HeapAlphaSketch.newHeapInstance(bLgNomLongs, bSeed, bP, bRF); } else { - throw new SketchesArgumentException("AlphaSketch cannot be made Direct to Memory."); + throw new SketchesArgumentException("AlphaSketch cannot be backed by a MemorySegment."); } break; } case QUICKSELECT: { - if (dstMem == null) { + if (dstSeg == null) { sketch = new HeapQuickSelectSketch(bLgNomLongs, bSeed, bP, bRF, false); } else { sketch = new DirectQuickSelectSketch( - bLgNomLongs, bSeed, bP, bRF, bMemReqSvr, dstMem, false); + bLgNomLongs, bSeed, bP, bRF, dstSeg, false); } break; } @@ -405,9 +383,9 @@ public UpdateSketch buildShared() { } /** - * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current - * configuration of the Builder and the given destination WritableMemory. If the destination - * WritableMemory is null, this defaults to an on-heap concurrent shared UpdateSketch. + * Returns a concurrent shared UpdateSketch with the current + * configuration of the Builder and the given destination MemorySegment. If the destination + * MemorySegment is null, this defaults to an on-heap concurrent shared UpdateSketch. * *

            The parameters unique to the shared concurrent sketch are: *

              @@ -418,28 +396,28 @@ public UpdateSketch buildShared() { *

              Key parameters that are in common with other Theta sketches: *

                *
              • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
              • - *
              • Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
              • + *
              • Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
              • *
              * - * @param dstMem the given WritableMemory for Direct, otherwise null. + * @param dstSeg the given MemorySegment for Direct, otherwise null. * @return a concurrent UpdateSketch with the current configuration of the Builder - * and the given destination WritableMemory. + * and the given destination MemorySegment. */ @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "Harmless in Builder, fix later") - public UpdateSketch buildShared(final WritableMemory dstMem) { + public UpdateSketch buildShared(final MemorySegment dstSeg) { ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; - if (dstMem == null) { + if (dstSeg == null) { return new ConcurrentHeapQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError); } else { - return new ConcurrentDirectQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError, dstMem); + return new ConcurrentDirectQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError, dstSeg); } } /** * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current * configuration of the Builder, the data from the given sketch, and the given destination - * WritableMemory. If the destination WritableMemory is null, this defaults to an on-heap + * MemorySegment. If the destination MemorySegment is null, this defaults to an on-heap * concurrent shared UpdateSketch. * *

              The parameters unique to the shared concurrent sketch are: @@ -451,23 +429,23 @@ public UpdateSketch buildShared(final WritableMemory dstMem) { *

              Key parameters that are in common with other Theta sketches: *

                *
              • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
              • - *
              • Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
              • + *
              • Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
              • *
              * * @param sketch a given UpdateSketch from which the data is used to initialize the returned * shared sketch. - * @param dstMem the given WritableMemory for Direct, otherwise null. + * @param dstSeg the given MemorySegment for Direct, otherwise null. * @return a concurrent UpdateSketch with the current configuration of the Builder - * and the given destination WritableMemory. + * and the given destination MemorySegment. */ @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "Harmless in Builder, fix later") - public UpdateSketch buildSharedFromSketch(final UpdateSketch sketch, final WritableMemory dstMem) { + public UpdateSketch buildSharedFromSketch(final UpdateSketch sketch, final MemorySegment dstSeg) { ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; - if (dstMem == null) { + if (dstSeg == null) { return new ConcurrentHeapQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError); } else { - return new ConcurrentDirectQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError, dstMem); + return new ConcurrentDirectQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError, dstSeg); } } @@ -505,8 +483,6 @@ public String toString() { sb.append("p:").append(TAB).append(bP).append(LS); sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); sb.append("Family:").append(TAB).append(bFam).append(LS); - final String mrsStr = bMemReqSvr.getClass().getSimpleName(); - sb.append("MemoryRequestServer:").append(TAB).append(mrsStr).append(LS); sb.append("Propagate Ordered Compact").append(TAB).append(bPropagateOrderedCompact).append(LS); sb.append("NumPoolThreads").append(TAB).append(bNumPoolThreads).append(LS); sb.append("MaxConcurrencyError").append(TAB).append(bMaxConcurrencyError).append(LS); diff --git a/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java index c9c38ca61..e9a952ab4 100644 --- a/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta/WrappedCompactCompressedSketch.java @@ -19,10 +19,10 @@ package org.apache.datasketches.theta; -import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; import static org.apache.datasketches.theta.PreambleUtil.ENTRY_BITS_BYTE_V4; import static org.apache.datasketches.theta.PreambleUtil.NUM_ENTRIES_BYTES_BYTE_V4; import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE; +import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; import org.apache.datasketches.common.ByteArrayUtil; import org.apache.datasketches.common.Util; @@ -32,8 +32,8 @@ * *

              This sketch can only be associated with a Serialization Version 4 format binary image.

              */ -class WrappedCompactCompressedSketch extends WrappedCompactSketch { - +final class WrappedCompactCompressedSketch extends WrappedCompactSketch { + /** * Construct this sketch with the given bytes. * @param bytes containing serialized compact compressed sketch. @@ -66,7 +66,7 @@ public int getCurrentBytes() { private static final int START_PACKED_DATA_EXACT_MODE = 8; private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; - + @Override public int getRetainedEntries(final boolean valid) { //compact is always valid // number of entries is stored using variable length encoding diff --git a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java index 633bcecce..a5b67363f 100644 --- a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java @@ -22,20 +22,19 @@ import static org.apache.datasketches.common.ByteArrayUtil.getIntLE; import static org.apache.datasketches.common.ByteArrayUtil.getLongLE; import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; -import static org.apache.datasketches.theta.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta.CompactOperations.segmentToCompact; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT; import static org.apache.datasketches.theta.PreambleUtil.SEED_HASH_SHORT; +import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. @@ -54,7 +53,7 @@ class WrappedCompactSketch extends CompactSketch { } /** - * Wraps the given Memory, which must be a SerVer 3 CompactSketch image. + * Wraps the given byteArray, which must be a SerVer 3 CompactSketch image. * @param bytes representation of serialized compressed compact sketch. * @param seedHash The update seedHash. * See Seed Hash. @@ -68,8 +67,8 @@ static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHas //Sketch Overrides @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - return memoryToCompact(Memory.wrap(bytes_), dstOrdered, dstMem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + return segmentToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg); } @Override @@ -91,16 +90,6 @@ public long getThetaLong() { return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0; @@ -111,11 +100,6 @@ public boolean isOrdered() { return (bytes_[FLAGS_BYTE] & ORDERED_FLAG_MASK) > 0; } - @Override - public boolean isSameResource(final Memory that) { - return false; - } - @Override public HashIterator iterator() { return new BytesCompactHashIterator( @@ -153,11 +137,6 @@ int getCurrentPreambleLongs() { return bytes_[PREAMBLE_LONGS_BYTE]; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return getShortLE(bytes_, SEED_HASH_SHORT); diff --git a/src/main/java/org/apache/datasketches/theta2/AnotB.java b/src/main/java/org/apache/datasketches/theta2/AnotB.java deleted file mode 100644 index 5e6efe16b..000000000 --- a/src/main/java/org/apache/datasketches/theta2/AnotB.java +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; - -/** - * Computes a set difference, A-AND-NOT-B, of two theta sketches. - * This class includes both stateful and stateless operations. - * - *

              The stateful operation is as follows:

              - *
              
              - * AnotB anotb = SetOperationBuilder.buildAnotB();
              - *
              - * anotb.setA(Sketch skA); //The first argument.
              - * anotb.notB(Sketch skB); //The second (subtraction) argument.
              - * anotb.notB(Sketch skC); // ...any number of additional subtractions...
              - * anotb.getResult(false); //Get an interim result.
              - * anotb.notB(Sketch skD); //Additional subtractions.
              - * anotb.getResult(true);  //Final result and resets the AnotB operator.
              - * 
              - * - *

              The stateless operation is as follows:

              - *
              
              - * AnotB anotb = SetOperationBuilder.buildAnotB();
              - *
              - * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
              - * 
              - * - *

              Calling the setA operation a second time essentially clears the internal state and loads - * the new sketch.

              - * - *

              The stateless and stateful operations are independent of each other with the exception of - * sharing the same update hash seed loaded as the default seed or specified by the user as an - * argument to the builder.

              - * - * @author Lee Rhodes - */ -public abstract class AnotB extends SetOperation { - - /** - * Constructor - */ - AnotB() {} - - @Override - public Family getFamily() { - return Family.A_NOT_B; - } - - /** - * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the - * first argument A of A-AND-NOT-B. This overwrites the internal state of this - * AnotB operator with the contents of the given sketch. - * This sets the stage for multiple following notB steps. - * - *

              An input argument of null will throw an exception.

              - * - *

              Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. - * That is distinctly different from the java null, which represents a nonexistent object. - * In most cases it is a programming error due to some object that was not properly initialized. - * With a null as the first argument, we cannot know what the user's intent is. - * Since it is very likely that a null is a programming error, we throw a an exception.

              - * - *

              An empty input argument will set the internal state to empty.

              - * - *

              Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent, - * valid argument for B irrelevant, we must allow this and assume the user knows what they are - * doing.

              - * - *

              Performing {@link #getResult(boolean)} just after this step will return a compact form of - * the given argument.

              - * - * @param skA The incoming sketch for the first argument, A. - */ - public abstract void setA(Sketch skA); - - /** - * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the - * second (or n+1th) argument B of A-AND-NOT-B. - * Performs an AND NOT operation with the existing internal state of this AnotB operator. - * - *

              An input argument of null or empty is ignored.

              - * - *

              Rationale: A null for the second or following arguments is more tolerable because - * A NOT null is still A even if we don't know exactly what the null represents. It - * clearly does not have any content that overlaps with A. Also, because this can be part of - * a multistep operation with multiple notB steps. Other following steps can still produce - * a valid result.

              - * - *

              Use {@link #getResult(boolean)} to obtain the result.

              - * - * @param skB The incoming Theta sketch for the second (or following) argument B. - */ - public abstract void notB(Sketch skB); - - /** - * Gets the result of the multistep, stateful operation AnotB that have been executed with calls - * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or - * {@link #notB(org.apache.datasketches.theta2.Sketch)}). - * - * @param reset If true, clears this operator to the empty state after this result is - * returned. Set this to false if you wish to obtain an intermediate result. - * - * @return the result of this operation as an ordered, on-heap {@link CompactSketch}. - */ - public abstract CompactSketch getResult(boolean reset); - - /** - * Gets the result of the multistep, stateful operation AnotB that have been executed with calls - * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or - * {@link #notB(org.apache.datasketches.theta2.Sketch)}). - * - * @param dstOrdered If true, the result will be an ordered {@link CompactSketch}. - * See Destination Ordered. - * - * @param dstSeg if not null the given MemorySegment will be the target location of the result. - * - * @param reset If true, clears this operator to the empty state after this result is - * returned. Set this to false if you wish to obtain an intermediate result. - * - * @return the result of this operation as a {@link CompactSketch} in the given dstSeg. - */ - public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg, boolean reset); - - /** - * Perform A-and-not-B set operation on the two given sketches and return the result as an - * ordered CompactSketch on the heap. - * - *

              This a stateless operation and has no impact on the internal state of this operator. - * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)}, - * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or - * {@link #getResult(boolean, MemorySegment, boolean)} methods.

              - * - *

              If either argument is null an exception is thrown.

              - * - *

              Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. - * That is distinctly different from the java null, which represents a nonexistent object. - * In most cases null is a programming error due to a non-initialized object.

              - * - *

              With a null as the first argument we cannot know what the user's intent is and throw an - * exception. With a null as the second argument for this method we must return a result and - * there is no following possible viable arguments for the second argument so we thrown an - * exception.

              - * - * @param skA The incoming sketch for the first argument. It must not be null. - * @param skB The incoming sketch for the second argument. It must not be null. - * @return an ordered CompactSketch on the heap - */ - public CompactSketch aNotB(final Sketch skA, final Sketch skB) { - return aNotB(skA, skB, true, null); - } - - /** - * Perform A-and-not-B set operation on the two given sketches and return the result as a - * CompactSketch. - * - *

              This a stateless operation and has no impact on the internal state of this operator. - * Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)}, - * {@link #notB(Sketch)}, {@link #getResult(boolean)}, or - * {@link #getResult(boolean, MemorySegment, boolean)} methods.

              - * - *

              If either argument is null an exception is thrown.

              - * - *

              Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. - * That is distinctly different from the java null, which represents a nonexistent object. - * In most cases null is a programming error due to a non-initialized object.

              - * - *

              With a null as the first argument we cannot know what the user's intent is and throw an - * exception. With a null as the second argument for this method we must return a result and - * there is no following possible viable arguments for the second argument so we thrown an - * exception.

              - * - * @param skA The incoming sketch for the first argument. It must not be null. - * @param skB The incoming sketch for the second argument. It must not be null. - * @param dstOrdered - * See Destination Ordered. - * @param dstSeg the destination MemorySegment - * @return the result as a CompactSketch. - */ - public abstract CompactSketch aNotB(Sketch skA, Sketch skB, boolean dstOrdered, - MemorySegment dstSeg); - -} diff --git a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java deleted file mode 100644 index 85db27af6..000000000 --- a/src/main/java/org/apache/datasketches/theta2/AnotBimpl.java +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon2.HashOperations.checkThetaCorruption; -import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsert; -import static org.apache.datasketches.thetacommon2.HashOperations.minLgHashTableSize; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Implements the A-and-not-B operations. - * @author Lee Rhodes - * @author Kevin Lang - */ -final class AnotBimpl extends AnotB { - private final short seedHash_; - private boolean empty_; - private long thetaLong_; - private long[] hashArr_ = new long[0]; //compact array w curCount_ entries - private int curCount_; - - /** - * Construct a new AnotB SetOperation on the java heap. Called by SetOperation.Builder. - * - * @param seed See seed - */ - AnotBimpl(final long seed) { - this(Util.computeSeedHash(seed)); - } - - /** - * Construct a new AnotB SetOperation on the java heap. - * - * @param seedHash 16 bit hash of the chosen update seed. - */ - private AnotBimpl(final short seedHash) { - seedHash_ = seedHash; - reset(); - } - - @Override - public void setA(final Sketch skA) { - if (skA == null) { - reset(); - throw new SketchesArgumentException("The input argument A must not be null"); - } - if (skA.isEmpty()) { - reset(); - return; - } - //skA is not empty - Util.checkSeedHashes(seedHash_, skA.getSeedHash()); - - //process A - hashArr_ = getHashArrA(skA); - empty_ = false; - thetaLong_ = skA.getThetaLong(); - curCount_ = hashArr_.length; - } - - @Override - public void notB(final Sketch skB) { - if (empty_ || skB == null || skB.isEmpty()) { return; } - //local and skB is not empty - Util.checkSeedHashes(seedHash_, skB.getSeedHash()); - - thetaLong_ = Math.min(thetaLong_, skB.getThetaLong()); - - //process B - hashArr_ = getResultHashArr(thetaLong_, curCount_, hashArr_, skB); - curCount_ = hashArr_.length; - empty_ = curCount_ == 0 && thetaLong_ == Long.MAX_VALUE; - } - - @Override - public CompactSketch getResult(final boolean reset) { - return getResult(true, null, reset); - } - - @Override - public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg, - final boolean reset) { - final CompactSketch result = CompactOperations.componentsToCompact( - thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstSeg, hashArr_.clone()); - if (reset) { reset(); } - return result; - } - - @Override - public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dstOrdered, - final MemorySegment dstSeg) { - if (skA == null || skB == null) { - throw new SketchesArgumentException("Neither argument may be null"); - } - //Both skA & skB are not null - - final long minThetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong()); - - if (skA.isEmpty()) { return skA.compact(dstOrdered, dstSeg); } - //A is not Empty - Util.checkSeedHashes(skA.getSeedHash(), seedHash_); - - if (skB.isEmpty()) { - return skA.compact(dstOrdered, dstSeg); - } - Util.checkSeedHashes(skB.getSeedHash(), seedHash_); - //Both skA & skB are not empty - - //process A - final long[] hashArrA = getHashArrA(skA); - final int countA = hashArrA.length; - - //process B - final long[] hashArrOut = getResultHashArr(minThetaLong, countA, hashArrA, skB); //out is clone - final int countOut = hashArrOut.length; - final boolean empty = countOut == 0 && minThetaLong == Long.MAX_VALUE; - - final CompactSketch result = CompactOperations.componentsToCompact( - minThetaLong, countOut, seedHash_, empty, true, false, dstOrdered, dstSeg, hashArrOut); - return result; - } - - @Override - int getRetainedEntries() { - return curCount_; - } - - //restricted - - private static long[] getHashArrA(final Sketch skA) { //returns a new array - //Get skA cache as array - final CompactSketch cskA = skA.compact(false, null); //sorting not required - final long[] hashArrA = cskA.getCache().clone(); - return hashArrA; - } - - private static long[] getResultHashArr( //returns a new array - final long minThetaLong, - final int countA, - final long[] hashArrA, - final Sketch skB) { - - // Rebuild or get hashtable of skB - final long[] hashTableB; //read only - if (skB instanceof CompactSketch) { - hashTableB = convertToHashTable(skB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD); - } else { - hashTableB = skB.getCache(); - } - - //build temporary result arrays of skA - final long[] tmpHashArrA = new long[countA]; - - //search for non matches and build temp arrays - final int lgHTBLen = exactLog2OfLong(hashTableB.length); - int nonMatches = 0; - for (int i = 0; i < countA; i++) { - final long hash = hashArrA[i]; - if (hash != 0 && hash < minThetaLong) { //only allows hashes of A < minTheta - final int index = hashSearch(hashTableB, lgHTBLen, hash); - if (index == -1) { - tmpHashArrA[nonMatches] = hash; - nonMatches++; - } - } - } - return Arrays.copyOfRange(tmpHashArrA, 0, nonMatches); - } - - private static long[] convertToHashTable( - final Sketch sketch, - final long thetaLong, - final double rebuildThreshold) { - final int lgArrLongs = minLgHashTableSize(sketch.getRetainedEntries(true), rebuildThreshold); - final int arrLongs = 1 << lgArrLongs; - final long[] hashTable = new long[arrLongs]; - checkThetaCorruption(thetaLong); - final HashIterator it = sketch.iterator(); - while (it.next()) { - final long hash = it.get(); - if (continueCondition(thetaLong, hash) ) { - continue; - } - hashSearchOrInsert(hashTable, lgArrLongs, hash); - } - return hashTable; - } - - private void reset() { - thetaLong_ = Long.MAX_VALUE; - empty_ = true; - hashArr_ = new long[0]; - curCount_ = 0; - } - - @Override - long[] getCache() { - return hashArr_.clone(); - } - - @Override - short getSeedHash() { - return seedHash_; - } - - @Override - long getThetaLong() { - return thetaLong_; - } - - @Override - public boolean hasMemorySegment() { return false; } - - @Override - public boolean isDirect() { return false; } - - @Override - public boolean isSameResource( final MemorySegment that) { return false; } - - @Override - boolean isEmpty() { - return empty_; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/BitPacking.java b/src/main/java/org/apache/datasketches/theta2/BitPacking.java deleted file mode 100644 index 66d5a245e..000000000 --- a/src/main/java/org/apache/datasketches/theta2/BitPacking.java +++ /dev/null @@ -1,6294 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * Used as part of Theta compression. - */ -public final class BitPacking { - - private BitPacking() { } - - /** - * The bit packing operation - * @param value the value to pack - * @param bits number of bits to pack - * @param buffer the output byte array buffer - * @param bufOffset the byte offset in the buffer - * @param bitOffset the bit offset - */ - public static void packBits(final long value, int bits, final byte[] buffer, int bufOffset, final int bitOffset) { - if (bitOffset > 0) { - final int chunkBits = 8 - bitOffset; - final int mask = (1 << chunkBits) - 1; - if (bits < chunkBits) { - buffer[bufOffset] |= (value << (chunkBits - bits)) & mask; - return; - } - buffer[bufOffset++] |= (value >>> (bits - chunkBits)) & mask; - bits -= chunkBits; - } - while (bits >= 8) { - buffer[bufOffset++] = (byte)(value >>> (bits - 8)); - bits -= 8; - } - if (bits > 0) { - buffer[bufOffset] = (byte)(value << (8 - bits)); - } - } - - /** - * The unpacking operation - * @param value the output array - * @param index index of the value array - * @param bits the number of bits to unpack - * @param buffer the input packed buffer - * @param bufOffset the buffer offset - * @param bitOffset the bit offset - */ - public static void unpackBits(final long[] value, final int index, int bits, final byte[] buffer, - int bufOffset,final int bitOffset) { - final int availBits = 8 - bitOffset; - final int chunkBits = availBits <= bits ? availBits : bits; - final int mask = (1 << chunkBits) - 1; - value[index] = (buffer[bufOffset] >>> (availBits - chunkBits)) & mask; - bufOffset += availBits == chunkBits ? 1 : 0; - bits -= chunkBits; - while (bits >= 8) { - value[index] <<= 8; - value[index] |= (Byte.toUnsignedLong(buffer[bufOffset++])); - bits -= 8; - } - if (bits > 0) { - value[index] <<= bits; - value[index] |= Byte.toUnsignedLong(buffer[bufOffset]) >>> (8 - bits); - } - } - - // pack given number of bits from a block of 8 64-bit values into bytes - // we don't need 0 and 64 bits - // we assume that higher bits (which we are not packing) are zeros - // this assumption allows to avoid masking operations - - static void packBitsBlock8(final long[] values, final int i, final byte[] buf, final int off, final int bits) { - switch (bits) { - case 1: packBits1(values, i, buf, off); break; - case 2: packBits2(values, i, buf, off); break; - case 3: packBits3(values, i, buf, off); break; - case 4: packBits4(values, i, buf, off); break; - case 5: packBits5(values, i, buf, off); break; - case 6: packBits6(values, i, buf, off); break; - case 7: packBits7(values, i, buf, off); break; - case 8: packBits8(values, i, buf, off); break; - case 9: packBits9(values, i, buf, off); break; - case 10: packBits10(values, i, buf, off); break; - case 11: packBits11(values, i, buf, off); break; - case 12: packBits12(values, i, buf, off); break; - case 13: packBits13(values, i, buf, off); break; - case 14: packBits14(values, i, buf, off); break; - case 15: packBits15(values, i, buf, off); break; - case 16: packBits16(values, i, buf, off); break; - case 17: packBits17(values, i, buf, off); break; - case 18: packBits18(values, i, buf, off); break; - case 19: packBits19(values, i, buf, off); break; - case 20: packBits20(values, i, buf, off); break; - case 21: packBits21(values, i, buf, off); break; - case 22: packBits22(values, i, buf, off); break; - case 23: packBits23(values, i, buf, off); break; - case 24: packBits24(values, i, buf, off); break; - case 25: packBits25(values, i, buf, off); break; - case 26: packBits26(values, i, buf, off); break; - case 27: packBits27(values, i, buf, off); break; - case 28: packBits28(values, i, buf, off); break; - case 29: packBits29(values, i, buf, off); break; - case 30: packBits30(values, i, buf, off); break; - case 31: packBits31(values, i, buf, off); break; - case 32: packBits32(values, i, buf, off); break; - case 33: packBits33(values, i, buf, off); break; - case 34: packBits34(values, i, buf, off); break; - case 35: packBits35(values, i, buf, off); break; - case 36: packBits36(values, i, buf, off); break; - case 37: packBits37(values, i, buf, off); break; - case 38: packBits38(values, i, buf, off); break; - case 39: packBits39(values, i, buf, off); break; - case 40: packBits40(values, i, buf, off); break; - case 41: packBits41(values, i, buf, off); break; - case 42: packBits42(values, i, buf, off); break; - case 43: packBits43(values, i, buf, off); break; - case 44: packBits44(values, i, buf, off); break; - case 45: packBits45(values, i, buf, off); break; - case 46: packBits46(values, i, buf, off); break; - case 47: packBits47(values, i, buf, off); break; - case 48: packBits48(values, i, buf, off); break; - case 49: packBits49(values, i, buf, off); break; - case 50: packBits50(values, i, buf, off); break; - case 51: packBits51(values, i, buf, off); break; - case 52: packBits52(values, i, buf, off); break; - case 53: packBits53(values, i, buf, off); break; - case 54: packBits54(values, i, buf, off); break; - case 55: packBits55(values, i, buf, off); break; - case 56: packBits56(values, i, buf, off); break; - case 57: packBits57(values, i, buf, off); break; - case 58: packBits58(values, i, buf, off); break; - case 59: packBits59(values, i, buf, off); break; - case 60: packBits60(values, i, buf, off); break; - case 61: packBits61(values, i, buf, off); break; - case 62: packBits62(values, i, buf, off); break; - case 63: packBits63(values, i, buf, off); break; - default: throw new SketchesArgumentException("wrong number of bits in packBitsBlock8: " + bits); - } - } - - static void unpackBitsBlock8(final long[] values, final int i, final byte[] buf, final int off, final int bits) { - switch (bits) { - case 1: unpackBits1(values, i, buf, off); break; - case 2: unpackBits2(values, i, buf, off); break; - case 3: unpackBits3(values, i, buf, off); break; - case 4: unpackBits4(values, i, buf, off); break; - case 5: unpackBits5(values, i, buf, off); break; - case 6: unpackBits6(values, i, buf, off); break; - case 7: unpackBits7(values, i, buf, off); break; - case 8: unpackBits8(values, i, buf, off); break; - case 9: unpackBits9(values, i, buf, off); break; - case 10: unpackBits10(values, i, buf, off); break; - case 11: unpackBits11(values, i, buf, off); break; - case 12: unpackBits12(values, i, buf, off); break; - case 13: unpackBits13(values, i, buf, off); break; - case 14: unpackBits14(values, i, buf, off); break; - case 15: unpackBits15(values, i, buf, off); break; - case 16: unpackBits16(values, i, buf, off); break; - case 17: unpackBits17(values, i, buf, off); break; - case 18: unpackBits18(values, i, buf, off); break; - case 19: unpackBits19(values, i, buf, off); break; - case 20: unpackBits20(values, i, buf, off); break; - case 21: unpackBits21(values, i, buf, off); break; - case 22: unpackBits22(values, i, buf, off); break; - case 23: unpackBits23(values, i, buf, off); break; - case 24: unpackBits24(values, i, buf, off); break; - case 25: unpackBits25(values, i, buf, off); break; - case 26: unpackBits26(values, i, buf, off); break; - case 27: unpackBits27(values, i, buf, off); break; - case 28: unpackBits28(values, i, buf, off); break; - case 29: unpackBits29(values, i, buf, off); break; - case 30: unpackBits30(values, i, buf, off); break; - case 31: unpackBits31(values, i, buf, off); break; - case 32: unpackBits32(values, i, buf, off); break; - case 33: unpackBits33(values, i, buf, off); break; - case 34: unpackBits34(values, i, buf, off); break; - case 35: unpackBits35(values, i, buf, off); break; - case 36: unpackBits36(values, i, buf, off); break; - case 37: unpackBits37(values, i, buf, off); break; - case 38: unpackBits38(values, i, buf, off); break; - case 39: unpackBits39(values, i, buf, off); break; - case 40: unpackBits40(values, i, buf, off); break; - case 41: unpackBits41(values, i, buf, off); break; - case 42: unpackBits42(values, i, buf, off); break; - case 43: unpackBits43(values, i, buf, off); break; - case 44: unpackBits44(values, i, buf, off); break; - case 45: unpackBits45(values, i, buf, off); break; - case 46: unpackBits46(values, i, buf, off); break; - case 47: unpackBits47(values, i, buf, off); break; - case 48: unpackBits48(values, i, buf, off); break; - case 49: unpackBits49(values, i, buf, off); break; - case 50: unpackBits50(values, i, buf, off); break; - case 51: unpackBits51(values, i, buf, off); break; - case 52: unpackBits52(values, i, buf, off); break; - case 53: unpackBits53(values, i, buf, off); break; - case 54: unpackBits54(values, i, buf, off); break; - case 55: unpackBits55(values, i, buf, off); break; - case 56: unpackBits56(values, i, buf, off); break; - case 57: unpackBits57(values, i, buf, off); break; - case 58: unpackBits58(values, i, buf, off); break; - case 59: unpackBits59(values, i, buf, off); break; - case 60: unpackBits60(values, i, buf, off); break; - case 61: unpackBits61(values, i, buf, off); break; - case 62: unpackBits62(values, i, buf, off); break; - case 63: unpackBits63(values, i, buf, off); break; - default: throw new SketchesArgumentException("wrong number of bits unpackBitsBlock8: " + bits); - } - } - - static void packBits1(final long[] values, final int i, final byte[] buf, final int off) { - buf[off] = (byte) (values[i + 0] << 7); - buf[off] |= values[i + 1] << 6; - buf[off] |= values[i + 2] << 5; - buf[off] |= values[i + 3] << 4; - buf[off] |= values[i + 4] << 3; - buf[off] |= values[i + 5] << 2; - buf[off] |= values[i + 6] << 1; - buf[off] |= values[i + 7]; - } - - static void packBits2(final long[] values, final int i, final byte[] buf, int off) { - buf[off] = (byte) (values[i + 0] << 6); - buf[off] |= values[i + 1] << 4; - buf[off] |= values[i + 2] << 2; - buf[off++] |= values[i + 3]; - - buf[off] = (byte) (values[i + 4] << 6); - buf[off] |= values[i + 5] << 4; - buf[off] |= values[i + 6] << 2; - buf[off] |= values[i + 7]; - } - - static void packBits3(final long[] values, final int i, final byte[] buf, int off) { - buf[off] = (byte) (values[i + 0] << 5); - buf[off] |= values[i + 1] << 2; - buf[off++] |= values[i + 2] >>> 1; - - buf[off] = (byte) (values[i + 2] << 7); - buf[off] |= values[i + 3] << 4; - buf[off] |= values[i + 4] << 1; - buf[off++] |= values[i + 5] >>> 2; - - buf[off] = (byte) (values[i + 5] << 6); - buf[off] |= values[i + 6] << 3; - buf[off] |= values[i + 7]; - } - - static void packBits4(final long[] values, final int i, final byte[] buf, int off) { - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1]; - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3]; - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5]; - - buf[off] = (byte) (values[i + 6] << 4); - buf[off] |= values[i + 7]; - } - - static void packBits5(final long[] values, final int i, final byte[] buf, int off) { - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 2; - - buf[off] = (byte) (values[i + 1] << 6); - buf[off] |= values[i + 2] << 1; - buf[off++] |= values[i + 3] >>> 4; - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 1; - - buf[off] = (byte) (values[i + 4] << 7); - buf[off] |= values[i + 5] << 2; - buf[off++] |= values[i + 6] >>> 3; - - buf[off] = (byte) (values[i + 6] << 5); - buf[off] |= values[i + 7]; - } - - static void packBits6(final long[] values, final int i, final byte[] buf, int off) { - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 4; - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 2; - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3]; - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 4; - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 2; - - buf[off] = (byte) (values[i + 6] << 6); - buf[off] |= values[i + 7]; - } - - static void packBits7(final long[] values, final int i, final byte[] buf, int off) { - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 6; - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 5; - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 4; - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 3; - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 2; - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 1; - - buf[off] = (byte) (values[i + 6] << 7); - buf[off] |= values[i + 7]; - } - - static void packBits8(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0]); - buf[off++] = (byte) (values[i + 1]); - buf[off++] = (byte) (values[i + 2]); - buf[off++] = (byte) (values[i + 3]); - buf[off++] = (byte) (values[i + 4]); - buf[off++] = (byte) (values[i + 5]); - buf[off++] = (byte) (values[i + 6]); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits9(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 2; - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 3; - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 4; - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 5; - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 6; - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 7; - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits10(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 4; - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 6; - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 8; - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 4; - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 6; - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits11(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 6; - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 9; - - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 4; - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 7; - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 10; - - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 5; - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits12(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 4); - - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 8; - - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 4); - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 8; - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 4); - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 8; - - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 4); - - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits13(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 10; - - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 7; - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 12; - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 9; - - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 6; - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 11; - - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits14(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 12; - - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 10; - - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 8; - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 12; - - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 10; - - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits15(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 14; - - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 13; - - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 12; - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 11; - - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 10; - - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 9; - - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 8; - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits16(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 8); - buf[off++] = (byte) (values[i + 0]); - - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 8); - buf[off++] = (byte) (values[i + 2]); - - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 8); - buf[off++] = (byte) (values[i + 4]); - - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 8); - buf[off++] = (byte) (values[i + 6]); - - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits17(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 9); - - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 10; - - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 11; - - buf[off++] = (byte) (values[i + 2] >>> 3); - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 12; - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 13; - - buf[off++] = (byte) (values[i + 4] >>> 5); - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 14; - - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 15; - - buf[off++] = (byte) (values[i + 6] >>> 7); - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits18(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 10); - - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 12; - - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 14; - - buf[off++] = (byte) (values[i + 2] >>> 6); - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 16; - - buf[off++] = (byte) (values[i + 3] >>> 8); - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 10); - - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 12; - - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 14; - - buf[off++] = (byte) (values[i + 6] >>> 6); - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits19(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 11); - - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 14; - - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 17; - - buf[off++] = (byte) (values[i + 2] >>> 9); - - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 12; - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 15; - - buf[off++] |= values[i + 4] >>> 7; - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 18; - - buf[off++] = (byte) (values[i + 5] >>> 10); - - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 13; - - buf[off++] = (byte) (values[i + 6] >>> 5); - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits20(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 12); - - buf[off++] = (byte) (values[i + 0] >>> 4); - - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 16; - - buf[off++] = (byte) (values[i + 1] >>> 8); - - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 12); - - buf[off++] = (byte) (values[i + 2] >>> 4); - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 16; - - buf[off++] = (byte) (values[i + 3] >>> 8); - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 12); - - buf[off++] = (byte) (values[i + 4] >>> 4); - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 16; - - buf[off++] = (byte) (values[i + 5] >>> 8); - - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 12); - - buf[off++] = (byte) (values[i + 6] >>> 4); - - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits21(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 13); - - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 18; - - buf[off++] = (byte) (values[i + 1] >>> 10); - - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 15; - - buf[off++] = (byte) (values[i + 2] >>> 7); - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 20; - - buf[off++] = (byte) (values[i + 3] >>> 12); - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 17; - - buf[off++] = (byte) (values[i + 4] >>> 9); - - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 14; - - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 19; - - buf[off++] = (byte) (values[i + 6] >>> 11); - - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits22(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 14); - - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 20; - - buf[off++] = (byte) (values[i + 1] >>> 12); - - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 18; - - buf[off++] = (byte) (values[i + 2] >>> 10); - - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 16; - - buf[off++] = (byte) (values[i + 3] >>> 8); - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 14); - - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 20; - - buf[off++] = (byte) (values[i + 5] >>> 12); - - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 18; - - buf[off++] = (byte) (values[i + 6] >>> 10); - - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits23(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 15); - - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 22; - - buf[off++] = (byte) (values[i + 1] >>> 14); - - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 21; - - buf[off++] = (byte) (values[i + 2] >>> 13); - - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 20; - - buf[off++] = (byte) (values[i + 3] >>> 12); - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 19; - - buf[off++] = (byte) (values[i + 4] >>> 11); - - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 18; - - buf[off++] = (byte) (values[i + 5] >>> 10); - - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 17; - - buf[off++] = (byte) (values[i + 6] >>> 9); - - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 16; - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits24(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 16); - buf[off++] = (byte) (values[i + 0] >>> 8); - buf[off++] = (byte) (values[i + 0]); - - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 16); - buf[off++] = (byte) (values[i + 2] >>> 8); - buf[off++] = (byte) (values[i + 2]); - - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 16); - buf[off++] = (byte) (values[i + 4] >>> 8); - buf[off++] = (byte) (values[i + 4]); - - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 16); - buf[off++] = (byte) (values[i + 6] >>> 8); - buf[off++] = (byte) (values[i + 6]); - - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits25(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 17); - - buf[off++] = (byte) (values[i + 0] >>> 9); - - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 18; - - buf[off++] = (byte) (values[i + 1] >>> 10); - - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 19; - - buf[off++] = (byte) (values[i + 2] >>> 11); - - buf[off++] = (byte) (values[i + 2] >>> 3); - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 20; - - buf[off++] = (byte) (values[i + 3] >>> 12); - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 21; - - buf[off++] = (byte) (values[i + 4] >>> 13); - - buf[off++] = (byte) (values[i + 4] >>> 5); - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 22; - - buf[off++] = (byte) (values[i + 5] >>> 14); - - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 23; - - buf[off++] = (byte) (values[i + 6] >>> 15); - - buf[off++] = (byte) (values[i + 6] >>> 7); - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 24; - - buf[off++] = (byte) (values[i + 7] >>> 16); - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits26(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 18); - - buf[off++] = (byte) (values[i + 0] >>> 10); - - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 20; - - buf[off++] = (byte) (values[i + 1] >>> 12); - - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 22; - - buf[off++] = (byte) (values[i + 2] >>> 14); - - buf[off++] = (byte) (values[i + 2] >>> 6); - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 24; - - buf[off++] = (byte) (values[i + 3] >>> 16); - - buf[off++] = (byte) (values[i + 3] >>> 8); - - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 18); - - buf[off++] = (byte) (values[i + 4] >>> 10); - - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 20; - - buf[off++] = (byte) (values[i + 5] >>> 12); - - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 22; - - buf[off++] = (byte) (values[i + 6] >>> 14); - - buf[off++] = (byte) (values[i + 6] >>> 6); - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 24; - - buf[off++] = (byte) (values[i + 7] >>> 16); - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits27(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 19); - - buf[off++] = (byte) (values[i + 0] >>> 11); - - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 22; - - buf[off++] = (byte) (values[i + 1] >>> 14); - - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 25; - - buf[off++] = (byte) (values[i + 2] >>> 17); - - buf[off++] = (byte) (values[i + 2] >>> 9); - - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 20; - - buf[off++] = (byte) (values[i + 3] >>> 12); - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 23; - - buf[off++] = (byte) (values[i + 4] >>> 15); - - buf[off++] = (byte) (values[i + 4] >>> 7); - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 26; - - buf[off++] = (byte) (values[i + 5] >>> 18); - - buf[off++] = (byte) (values[i + 5] >>> 10); - - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 21; - - buf[off++] = (byte) (values[i + 6] >>> 13); - - buf[off++] = (byte) (values[i + 6] >>> 5); - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 24; - - buf[off++] = (byte) (values[i + 7] >>> 16); - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits28(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 20); - buf[off++] = (byte) (values[i + 0] >>> 12); - buf[off++] = (byte) (values[i + 0] >>> 4); - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 24; - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - buf[off++] = (byte) (values[i + 2] >>> 20); - buf[off++] = (byte) (values[i + 2] >>> 12); - buf[off++] = (byte) (values[i + 2] >>> 4); - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 24; - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - buf[off++] = (byte) (values[i + 4] >>> 20); - buf[off++] = (byte) (values[i + 4] >>> 12); - buf[off++] = (byte) (values[i + 4] >>> 4); - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 24; - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - buf[off++] = (byte) (values[i + 6] >>> 20); - buf[off++] = (byte) (values[i + 6] >>> 12); - buf[off++] = (byte) (values[i + 6] >>> 4); - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 24; - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits29(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 21); - - buf[off++] = (byte) (values[i + 0] >>> 13); - - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 26; - - buf[off++] = (byte) (values[i + 1] >>> 18); - - buf[off++] = (byte) (values[i + 1] >>> 10); - - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 23; - - buf[off++] = (byte) (values[i + 2] >>> 15); - - buf[off++] = (byte) (values[i + 2] >>> 7); - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 28; - - buf[off++] = (byte) (values[i + 3] >>> 20); - - buf[off++] = (byte) (values[i + 3] >>> 12); - - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 25; - - buf[off++] = (byte) (values[i + 4] >>> 17); - - buf[off++] = (byte) (values[i + 4] >>> 9); - - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 22; - - buf[off++] = (byte) (values[i + 5] >>> 14); - - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 27; - - buf[off++] = (byte) (values[i + 6] >>> 19); - - buf[off++] = (byte) (values[i + 6] >>> 11); - - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 24; - - buf[off++] = (byte) (values[i + 7] >>> 16); - - buf[off++] = (byte) (values[i + 7] >>> 8); - - buf[off] = (byte) (values[i + 7]); - } - - static void packBits30(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 22); - buf[off++] = (byte) (values[i + 0] >>> 14); - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 28; - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 26; - buf[off++] = (byte) (values[i + 2] >>> 18); - buf[off++] = (byte) (values[i + 2] >>> 10); - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 24; - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 22); - buf[off++] = (byte) (values[i + 4] >>> 14); - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 28; - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 26; - buf[off++] = (byte) (values[i + 6] >>> 18); - buf[off++] = (byte) (values[i + 6] >>> 10); - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 24; - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits31(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 23); - buf[off++] = (byte) (values[i + 0] >>> 15); - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 30; - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 29; - buf[off++] = (byte) (values[i + 2] >>> 21); - buf[off++] = (byte) (values[i + 2] >>> 13); - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 28; - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 27; - buf[off++] = (byte) (values[i + 4] >>> 19); - buf[off++] = (byte) (values[i + 4] >>> 11); - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 26; - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 25; - buf[off++] = (byte) (values[i + 6] >>> 17); - buf[off++] = (byte) (values[i + 6] >>> 9); - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 24; - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits32(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 24); - buf[off++] = (byte) (values[i + 0] >>> 16); - buf[off++] = (byte) (values[i + 0] >>> 8); - buf[off++] = (byte) (values[i + 0]); - - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 24); - buf[off++] = (byte) (values[i + 2] >>> 16); - buf[off++] = (byte) (values[i + 2] >>> 8); - buf[off++] = (byte) (values[i + 2]); - - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 24); - buf[off++] = (byte) (values[i + 4] >>> 16); - buf[off++] = (byte) (values[i + 4] >>> 8); - buf[off++] = (byte) (values[i + 4]); - - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 24); - buf[off++] = (byte) (values[i + 6] >>> 16); - buf[off++] = (byte) (values[i + 6] >>> 8); - buf[off++] = (byte) (values[i + 6]); - - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits33(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 25); - buf[off++] = (byte) (values[i + 0] >>> 17); - buf[off++] = (byte) (values[i + 0] >>> 9); - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 26; - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 27; - buf[off++] = (byte) (values[i + 2] >>> 19); - buf[off++] = (byte) (values[i + 2] >>> 11); - buf[off++] = (byte) (values[i + 2] >>> 3); - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 28; - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 29; - buf[off++] = (byte) (values[i + 4] >>> 21); - buf[off++] = (byte) (values[i + 4] >>> 13); - buf[off++] = (byte) (values[i + 4] >>> 5); - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 30; - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 31; - buf[off++] = (byte) (values[i + 6] >>> 23); - buf[off++] = (byte) (values[i + 6] >>> 15); - buf[off++] = (byte) (values[i + 6] >>> 7); - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits34(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 26); - buf[off++] = (byte) (values[i + 0] >>> 18); - buf[off++] = (byte) (values[i + 0] >>> 10); - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 28; - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 30; - buf[off++] = (byte) (values[i + 2] >>> 22); - buf[off++] = (byte) (values[i + 2] >>> 14); - buf[off++] = (byte) (values[i + 2] >>> 6); - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 32; - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 26); - buf[off++] = (byte) (values[i + 4] >>> 18); - buf[off++] = (byte) (values[i + 4] >>> 10); - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 28; - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 30; - buf[off++] = (byte) (values[i + 6] >>> 22); - buf[off++] = (byte) (values[i + 6] >>> 14); - buf[off++] = (byte) (values[i + 6] >>> 6); - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits35(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 27); - buf[off++] = (byte) (values[i + 0] >>> 19); - buf[off++] = (byte) (values[i + 0] >>> 11); - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 30; - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 33; - buf[off++] = (byte) (values[i + 2] >>> 25); - buf[off++] = (byte) (values[i + 2] >>> 17); - buf[off++] = (byte) (values[i + 2] >>> 9); - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 28; - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 31; - buf[off++] = (byte) (values[i + 4] >>> 23); - buf[off++] = (byte) (values[i + 4] >>> 15); - buf[off++] = (byte) (values[i + 4] >>> 7); - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 34; - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 29; - buf[off++] = (byte) (values[i + 6] >>> 21); - buf[off++] = (byte) (values[i + 6] >>> 13); - buf[off++] = (byte) (values[i + 6] >>> 5); - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits36(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 28); - buf[off++] = (byte) (values[i + 0] >>> 20); - buf[off++] = (byte) (values[i + 0] >>> 12); - buf[off++] = (byte) (values[i + 0] >>> 4); - - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 32; - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 28); - buf[off++] = (byte) (values[i + 2] >>> 20); - buf[off++] = (byte) (values[i + 2] >>> 12); - buf[off++] = (byte) (values[i + 2] >>> 4); - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 32; - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 28); - buf[off++] = (byte) (values[i + 4] >>> 20); - buf[off++] = (byte) (values[i + 4] >>> 12); - buf[off++] = (byte) (values[i + 4] >>> 4); - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 32; - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 28); - buf[off++] = (byte) (values[i + 6] >>> 20); - buf[off++] = (byte) (values[i + 6] >>> 12); - buf[off++] = (byte) (values[i + 6] >>> 4); - - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits37(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 29); - buf[off++] = (byte) (values[i + 0] >>> 21); - buf[off++] = (byte) (values[i + 0] >>> 13); - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 34; - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 31; - buf[off++] = (byte) (values[i + 2] >>> 23); - buf[off++] = (byte) (values[i + 2] >>> 15); - buf[off++] = (byte) (values[i + 2] >>> 7); - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 36; - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 33; - buf[off++] = (byte) (values[i + 4] >>> 25); - buf[off++] = (byte) (values[i + 4] >>> 17); - buf[off++] = (byte) (values[i + 4] >>> 9); - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 30; - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 35; - buf[off++] = (byte) (values[i + 6] >>> 27); - buf[off++] = (byte) (values[i + 6] >>> 19); - buf[off++] = (byte) (values[i + 6] >>> 11); - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits38(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 30); - buf[off++] = (byte) (values[i + 0] >>> 22); - buf[off++] = (byte) (values[i + 0] >>> 14); - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 36; - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 34; - buf[off++] = (byte) (values[i + 2] >>> 26); - buf[off++] = (byte) (values[i + 2] >>> 18); - buf[off++] = (byte) (values[i + 2] >>> 10); - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 32; - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 30); - buf[off++] = (byte) (values[i + 4] >>> 22); - buf[off++] = (byte) (values[i + 4] >>> 14); - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 36; - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 34; - buf[off++] = (byte) (values[i + 6] >>> 26); - buf[off++] = (byte) (values[i + 6] >>> 18); - buf[off++] = (byte) (values[i + 6] >>> 10); - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits39(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 31); - buf[off++] = (byte) (values[i + 0] >>> 23); - buf[off++] = (byte) (values[i + 0] >>> 15); - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 38; - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 37; - buf[off++] = (byte) (values[i + 2] >>> 29); - buf[off++] = (byte) (values[i + 2] >>> 21); - buf[off++] = (byte) (values[i + 2] >>> 13); - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 36; - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 35; - buf[off++] = (byte) (values[i + 4] >>> 27); - buf[off++] = (byte) (values[i + 4] >>> 19); - buf[off++] = (byte) (values[i + 4] >>> 11); - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 34; - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 33; - buf[off++] = (byte) (values[i + 6] >>> 25); - buf[off++] = (byte) (values[i + 6] >>> 17); - buf[off++] = (byte) (values[i + 6] >>> 9); - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 32; - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits40(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 32); - buf[off++] = (byte) (values[i + 0] >>> 24); - buf[off++] = (byte) (values[i + 0] >>> 16); - buf[off++] = (byte) (values[i + 0] >>> 8); - buf[off++] = (byte) (values[i + 0]); - - buf[off++] = (byte) (values[i + 1] >>> 32); - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 32); - buf[off++] = (byte) (values[i + 2] >>> 24); - buf[off++] = (byte) (values[i + 2] >>> 16); - buf[off++] = (byte) (values[i + 2] >>> 8); - buf[off++] = (byte) (values[i + 2]); - - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 32); - buf[off++] = (byte) (values[i + 4] >>> 24); - buf[off++] = (byte) (values[i + 4] >>> 16); - buf[off++] = (byte) (values[i + 4] >>> 8); - buf[off++] = (byte) (values[i + 4]); - - buf[off++] = (byte) (values[i + 5] >>> 32); - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 32); - buf[off++] = (byte) (values[i + 6] >>> 24); - buf[off++] = (byte) (values[i + 6] >>> 16); - buf[off++] = (byte) (values[i + 6] >>> 8); - buf[off++] = (byte) (values[i + 6]); - - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits41(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 33); - buf[off++] = (byte) (values[i + 0] >>> 25); - buf[off++] = (byte) (values[i + 0] >>> 17); - buf[off++] = (byte) (values[i + 0] >>> 9); - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 34; - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 35; - buf[off++] = (byte) (values[i + 2] >>> 27); - buf[off++] = (byte) (values[i + 2] >>> 19); - buf[off++] = (byte) (values[i + 2] >>> 11); - buf[off++] = (byte) (values[i + 2] >>> 3); - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 36; - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 37; - buf[off++] = (byte) (values[i + 4] >>> 29); - buf[off++] = (byte) (values[i + 4] >>> 21); - buf[off++] = (byte) (values[i + 4] >>> 13); - buf[off++] = (byte) (values[i + 4] >>> 5); - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 38; - buf[off++] = (byte) (values[i + 5] >>> 30); - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 39; - buf[off++] = (byte) (values[i + 6] >>> 31); - buf[off++] = (byte) (values[i + 6] >>> 23); - buf[off++] = (byte) (values[i + 6] >>> 15); - buf[off++] = (byte) (values[i + 6] >>> 7); - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits42(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 34); - buf[off++] = (byte) (values[i + 0] >>> 26); - buf[off++] = (byte) (values[i + 0] >>> 18); - buf[off++] = (byte) (values[i + 0] >>> 10); - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 36; - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 38; - buf[off++] = (byte) (values[i + 2] >>> 30); - buf[off++] = (byte) (values[i + 2] >>> 22); - buf[off++] = (byte) (values[i + 2] >>> 14); - buf[off++] = (byte) (values[i + 2] >>> 6); - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 40; - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 34); - buf[off++] = (byte) (values[i + 4] >>> 26); - buf[off++] = (byte) (values[i + 4] >>> 18); - buf[off++] = (byte) (values[i + 4] >>> 10); - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 36; - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 38; - buf[off++] = (byte) (values[i + 6] >>> 30); - buf[off++] = (byte) (values[i + 6] >>> 22); - buf[off++] = (byte) (values[i + 6] >>> 14); - buf[off++] = (byte) (values[i + 6] >>> 6); - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits43(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 35); - buf[off++] = (byte) (values[i + 0] >>> 27); - buf[off++] = (byte) (values[i + 0] >>> 19); - buf[off++] = (byte) (values[i + 0] >>> 11); - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 38; - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 41; - buf[off++] = (byte) (values[i + 2] >>> 33); - buf[off++] = (byte) (values[i + 2] >>> 25); - buf[off++] = (byte) (values[i + 2] >>> 17); - buf[off++] = (byte) (values[i + 2] >>> 9); - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 36; - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 39; - buf[off++] = (byte) (values[i + 4] >>> 31); - buf[off++] = (byte) (values[i + 4] >>> 23); - buf[off++] = (byte) (values[i + 4] >>> 15); - buf[off++] = (byte) (values[i + 4] >>> 7); - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 42; - buf[off++] = (byte) (values[i + 5] >>> 34); - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 37; - buf[off++] = (byte) (values[i + 6] >>> 29); - buf[off++] = (byte) (values[i + 6] >>> 21); - buf[off++] = (byte) (values[i + 6] >>> 13); - buf[off++] = (byte) (values[i + 6] >>> 5); - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits44(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 36); - buf[off++] = (byte) (values[i + 0] >>> 28); - buf[off++] = (byte) (values[i + 0] >>> 20); - buf[off++] = (byte) (values[i + 0] >>> 12); - buf[off++] = (byte) (values[i + 0] >>> 4); - - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 40; - buf[off++] = (byte) (values[i + 1] >>> 32); - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 36); - buf[off++] = (byte) (values[i + 2] >>> 28); - buf[off++] = (byte) (values[i + 2] >>> 20); - buf[off++] = (byte) (values[i + 2] >>> 12); - buf[off++] = (byte) (values[i + 2] >>> 4); - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 40; - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 36); - buf[off++] = (byte) (values[i + 4] >>> 28); - buf[off++] = (byte) (values[i + 4] >>> 20); - buf[off++] = (byte) (values[i + 4] >>> 12); - buf[off++] = (byte) (values[i + 4] >>> 4); - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 40; - buf[off++] = (byte) (values[i + 5] >>> 32); - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 36); - buf[off++] = (byte) (values[i + 6] >>> 28); - buf[off++] = (byte) (values[i + 6] >>> 20); - buf[off++] = (byte) (values[i + 6] >>> 12); - buf[off++] = (byte) (values[i + 6] >>> 4); - - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits45(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 37); - buf[off++] = (byte) (values[i + 0] >>> 29); - buf[off++] = (byte) (values[i + 0] >>> 21); - buf[off++] = (byte) (values[i + 0] >>> 13); - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 42; - buf[off++] = (byte) (values[i + 1] >>> 34); - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 39; - buf[off++] = (byte) (values[i + 2] >>> 31); - buf[off++] = (byte) (values[i + 2] >>> 23); - buf[off++] = (byte) (values[i + 2] >>> 15); - buf[off++] = (byte) (values[i + 2] >>> 7); - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 44; - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 41; - buf[off++] = (byte) (values[i + 4] >>> 33); - buf[off++] = (byte) (values[i + 4] >>> 25); - buf[off++] = (byte) (values[i + 4] >>> 17); - buf[off++] = (byte) (values[i + 4] >>> 9); - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 38; - buf[off++] = (byte) (values[i + 5] >>> 30); - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 43; - buf[off++] = (byte) (values[i + 6] >>> 35); - buf[off++] = (byte) (values[i + 6] >>> 27); - buf[off++] = (byte) (values[i + 6] >>> 19); - buf[off++] = (byte) (values[i + 6] >>> 11); - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits46(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 38); - buf[off++] = (byte) (values[i + 0] >>> 30); - buf[off++] = (byte) (values[i + 0] >>> 22); - buf[off++] = (byte) (values[i + 0] >>> 14); - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 44; - buf[off++] = (byte) (values[i + 1] >>> 36); - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 42; - buf[off++] = (byte) (values[i + 2] >>> 34); - buf[off++] = (byte) (values[i + 2] >>> 26); - buf[off++] = (byte) (values[i + 2] >>> 18); - buf[off++] = (byte) (values[i + 2] >>> 10); - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 40; - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 38); - buf[off++] = (byte) (values[i + 4] >>> 30); - buf[off++] = (byte) (values[i + 4] >>> 22); - buf[off++] = (byte) (values[i + 4] >>> 14); - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 44; - buf[off++] = (byte) (values[i + 5] >>> 36); - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 42; - buf[off++] = (byte) (values[i + 6] >>> 34); - buf[off++] = (byte) (values[i + 6] >>> 26); - buf[off++] = (byte) (values[i + 6] >>> 18); - buf[off++] = (byte) (values[i + 6] >>> 10); - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits47(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 39); - buf[off++] = (byte) (values[i + 0] >>> 31); - buf[off++] = (byte) (values[i + 0] >>> 23); - buf[off++] = (byte) (values[i + 0] >>> 15); - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 46; - buf[off++] = (byte) (values[i + 1] >>> 38); - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 45; - buf[off++] = (byte) (values[i + 2] >>> 37); - buf[off++] = (byte) (values[i + 2] >>> 29); - buf[off++] = (byte) (values[i + 2] >>> 21); - buf[off++] = (byte) (values[i + 2] >>> 13); - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 44; - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 43; - buf[off++] = (byte) (values[i + 4] >>> 35); - buf[off++] = (byte) (values[i + 4] >>> 27); - buf[off++] = (byte) (values[i + 4] >>> 19); - buf[off++] = (byte) (values[i + 4] >>> 11); - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 42; - buf[off++] = (byte) (values[i + 5] >>> 34); - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 41; - buf[off++] = (byte) (values[i + 6] >>> 33); - buf[off++] = (byte) (values[i + 6] >>> 25); - buf[off++] = (byte) (values[i + 6] >>> 17); - buf[off++] = (byte) (values[i + 6] >>> 9); - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 40; - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits48(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 40); - buf[off++] = (byte) (values[i + 0] >>> 32); - buf[off++] = (byte) (values[i + 0] >>> 24); - buf[off++] = (byte) (values[i + 0] >>> 16); - buf[off++] = (byte) (values[i + 0] >>> 8); - buf[off++] = (byte) (values[i + 0]); - - buf[off++] = (byte) (values[i + 1] >>> 40); - buf[off++] = (byte) (values[i + 1] >>> 32); - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 40); - buf[off++] = (byte) (values[i + 2] >>> 32); - buf[off++] = (byte) (values[i + 2] >>> 24); - buf[off++] = (byte) (values[i + 2] >>> 16); - buf[off++] = (byte) (values[i + 2] >>> 8); - buf[off++] = (byte) (values[i + 2]); - - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 40); - buf[off++] = (byte) (values[i + 4] >>> 32); - buf[off++] = (byte) (values[i + 4] >>> 24); - buf[off++] = (byte) (values[i + 4] >>> 16); - buf[off++] = (byte) (values[i + 4] >>> 8); - buf[off++] = (byte) (values[i + 4]); - - buf[off++] = (byte) (values[i + 5] >>> 40); - buf[off++] = (byte) (values[i + 5] >>> 32); - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 40); - buf[off++] = (byte) (values[i + 6] >>> 32); - buf[off++] = (byte) (values[i + 6] >>> 24); - buf[off++] = (byte) (values[i + 6] >>> 16); - buf[off++] = (byte) (values[i + 6] >>> 8); - buf[off++] = (byte) (values[i + 6]); - - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits49(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 41); - buf[off++] = (byte) (values[i + 0] >>> 33); - buf[off++] = (byte) (values[i + 0] >>> 25); - buf[off++] = (byte) (values[i + 0] >>> 17); - buf[off++] = (byte) (values[i + 0] >>> 9); - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 42; - buf[off++] = (byte) (values[i + 1] >>> 34); - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 43; - buf[off++] = (byte) (values[i + 2] >>> 35); - buf[off++] = (byte) (values[i + 2] >>> 27); - buf[off++] = (byte) (values[i + 2] >>> 19); - buf[off++] = (byte) (values[i + 2] >>> 11); - buf[off++] = (byte) (values[i + 2] >>> 3); - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 44; - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 45; - buf[off++] = (byte) (values[i + 4] >>> 37); - buf[off++] = (byte) (values[i + 4] >>> 29); - buf[off++] = (byte) (values[i + 4] >>> 21); - buf[off++] = (byte) (values[i + 4] >>> 13); - buf[off++] = (byte) (values[i + 4] >>> 5); - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 46; - buf[off++] = (byte) (values[i + 5] >>> 38); - buf[off++] = (byte) (values[i + 5] >>> 30); - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 47; - buf[off++] = (byte) (values[i + 6] >>> 39); - buf[off++] = (byte) (values[i + 6] >>> 31); - buf[off++] = (byte) (values[i + 6] >>> 23); - buf[off++] = (byte) (values[i + 6] >>> 15); - buf[off++] = (byte) (values[i + 6] >>> 7); - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits50(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 42); - buf[off++] = (byte) (values[i + 0] >>> 34); - buf[off++] = (byte) (values[i + 0] >>> 26); - buf[off++] = (byte) (values[i + 0] >>> 18); - buf[off++] = (byte) (values[i + 0] >>> 10); - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 44; - buf[off++] = (byte) (values[i + 1] >>> 36); - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 46; - buf[off++] = (byte) (values[i + 2] >>> 38); - buf[off++] = (byte) (values[i + 2] >>> 30); - buf[off++] = (byte) (values[i + 2] >>> 22); - buf[off++] = (byte) (values[i + 2] >>> 14); - buf[off++] = (byte) (values[i + 2] >>> 6); - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 48; - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 42); - buf[off++] = (byte) (values[i + 4] >>> 34); - buf[off++] = (byte) (values[i + 4] >>> 26); - buf[off++] = (byte) (values[i + 4] >>> 18); - buf[off++] = (byte) (values[i + 4] >>> 10); - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 44; - buf[off++] = (byte) (values[i + 5] >>> 36); - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 46; - buf[off++] = (byte) (values[i + 6] >>> 38); - buf[off++] = (byte) (values[i + 6] >>> 30); - buf[off++] = (byte) (values[i + 6] >>> 22); - buf[off++] = (byte) (values[i + 6] >>> 14); - buf[off++] = (byte) (values[i + 6] >>> 6); - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits51(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 43); - buf[off++] = (byte) (values[i + 0] >>> 35); - buf[off++] = (byte) (values[i + 0] >>> 27); - buf[off++] = (byte) (values[i + 0] >>> 19); - buf[off++] = (byte) (values[i + 0] >>> 11); - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 46; - buf[off++] = (byte) (values[i + 1] >>> 38); - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 49; - buf[off++] = (byte) (values[i + 2] >>> 41); - buf[off++] = (byte) (values[i + 2] >>> 33); - buf[off++] = (byte) (values[i + 2] >>> 25); - buf[off++] = (byte) (values[i + 2] >>> 17); - buf[off++] = (byte) (values[i + 2] >>> 9); - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 44; - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 47; - buf[off++] = (byte) (values[i + 4] >>> 39); - buf[off++] = (byte) (values[i + 4] >>> 31); - buf[off++] = (byte) (values[i + 4] >>> 23); - buf[off++] = (byte) (values[i + 4] >>> 15); - buf[off++] = (byte) (values[i + 4] >>> 7); - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 50; - buf[off++] = (byte) (values[i + 5] >>> 42); - buf[off++] = (byte) (values[i + 5] >>> 34); - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 45; - buf[off++] = (byte) (values[i + 6] >>> 37); - buf[off++] = (byte) (values[i + 6] >>> 29); - buf[off++] = (byte) (values[i + 6] >>> 21); - buf[off++] = (byte) (values[i + 6] >>> 13); - buf[off++] = (byte) (values[i + 6] >>> 5); - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits52(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 44); - buf[off++] = (byte) (values[i + 0] >>> 36); - buf[off++] = (byte) (values[i + 0] >>> 28); - buf[off++] = (byte) (values[i + 0] >>> 20); - buf[off++] = (byte) (values[i + 0] >>> 12); - buf[off++] = (byte) (values[i + 0] >>> 4); - - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 48; - buf[off++] = (byte) (values[i + 1] >>> 40); - buf[off++] = (byte) (values[i + 1] >>> 32); - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 44); - buf[off++] = (byte) (values[i + 2] >>> 36); - buf[off++] = (byte) (values[i + 2] >>> 28); - buf[off++] = (byte) (values[i + 2] >>> 20); - buf[off++] = (byte) (values[i + 2] >>> 12); - buf[off++] = (byte) (values[i + 2] >>> 4); - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 48; - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 44); - buf[off++] = (byte) (values[i + 4] >>> 36); - buf[off++] = (byte) (values[i + 4] >>> 28); - buf[off++] = (byte) (values[i + 4] >>> 20); - buf[off++] = (byte) (values[i + 4] >>> 12); - buf[off++] = (byte) (values[i + 4] >>> 4); - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 48; - buf[off++] = (byte) (values[i + 5] >>> 40); - buf[off++] = (byte) (values[i + 5] >>> 32); - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 44); - buf[off++] = (byte) (values[i + 6] >>> 36); - buf[off++] = (byte) (values[i + 6] >>> 28); - buf[off++] = (byte) (values[i + 6] >>> 20); - buf[off++] = (byte) (values[i + 6] >>> 12); - buf[off++] = (byte) (values[i + 6] >>> 4); - - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits53(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 45); - buf[off++] = (byte) (values[i + 0] >>> 37); - buf[off++] = (byte) (values[i + 0] >>> 29); - buf[off++] = (byte) (values[i + 0] >>> 21); - buf[off++] = (byte) (values[i + 0] >>> 13); - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 50; - buf[off++] = (byte) (values[i + 1] >>> 42); - buf[off++] = (byte) (values[i + 1] >>> 34); - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 47; - buf[off++] = (byte) (values[i + 2] >>> 39); - buf[off++] = (byte) (values[i + 2] >>> 31); - buf[off++] = (byte) (values[i + 2] >>> 23); - buf[off++] = (byte) (values[i + 2] >>> 15); - buf[off++] = (byte) (values[i + 2] >>> 7); - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 52; - buf[off++] = (byte) (values[i + 3] >>> 44); - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 49; - buf[off++] = (byte) (values[i + 4] >>> 41); - buf[off++] = (byte) (values[i + 4] >>> 33); - buf[off++] = (byte) (values[i + 4] >>> 25); - buf[off++] = (byte) (values[i + 4] >>> 17); - buf[off++] = (byte) (values[i + 4] >>> 9); - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 46; - buf[off++] = (byte) (values[i + 5] >>> 38); - buf[off++] = (byte) (values[i + 5] >>> 30); - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 51; - buf[off++] = (byte) (values[i + 6] >>> 43); - buf[off++] = (byte) (values[i + 6] >>> 35); - buf[off++] = (byte) (values[i + 6] >>> 27); - buf[off++] = (byte) (values[i + 6] >>> 19); - buf[off++] = (byte) (values[i + 6] >>> 11); - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits54(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 46); - buf[off++] = (byte) (values[i + 0] >>> 38); - buf[off++] = (byte) (values[i + 0] >>> 30); - buf[off++] = (byte) (values[i + 0] >>> 22); - buf[off++] = (byte) (values[i + 0] >>> 14); - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 52; - buf[off++] = (byte) (values[i + 1] >>> 44); - buf[off++] = (byte) (values[i + 1] >>> 36); - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 50; - buf[off++] = (byte) (values[i + 2] >>> 42); - buf[off++] = (byte) (values[i + 2] >>> 34); - buf[off++] = (byte) (values[i + 2] >>> 26); - buf[off++] = (byte) (values[i + 2] >>> 18); - buf[off++] = (byte) (values[i + 2] >>> 10); - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 48; - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 46); - buf[off++] = (byte) (values[i + 4] >>> 38); - buf[off++] = (byte) (values[i + 4] >>> 30); - buf[off++] = (byte) (values[i + 4] >>> 22); - buf[off++] = (byte) (values[i + 4] >>> 14); - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 52; - buf[off++] = (byte) (values[i + 5] >>> 44); - buf[off++] = (byte) (values[i + 5] >>> 36); - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 50; - buf[off++] = (byte) (values[i + 6] >>> 42); - buf[off++] = (byte) (values[i + 6] >>> 34); - buf[off++] = (byte) (values[i + 6] >>> 26); - buf[off++] = (byte) (values[i + 6] >>> 18); - buf[off++] = (byte) (values[i + 6] >>> 10); - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits55(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 47); - buf[off++] = (byte) (values[i + 0] >>> 39); - buf[off++] = (byte) (values[i + 0] >>> 31); - buf[off++] = (byte) (values[i + 0] >>> 23); - buf[off++] = (byte) (values[i + 0] >>> 15); - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 54; - buf[off++] = (byte) (values[i + 1] >>> 46); - buf[off++] = (byte) (values[i + 1] >>> 38); - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 53; - buf[off++] = (byte) (values[i + 2] >>> 45); - buf[off++] = (byte) (values[i + 2] >>> 37); - buf[off++] = (byte) (values[i + 2] >>> 29); - buf[off++] = (byte) (values[i + 2] >>> 21); - buf[off++] = (byte) (values[i + 2] >>> 13); - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 52; - buf[off++] = (byte) (values[i + 3] >>> 44); - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 51; - buf[off++] = (byte) (values[i + 4] >>> 43); - buf[off++] = (byte) (values[i + 4] >>> 35); - buf[off++] = (byte) (values[i + 4] >>> 27); - buf[off++] = (byte) (values[i + 4] >>> 19); - buf[off++] = (byte) (values[i + 4] >>> 11); - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 50; - buf[off++] = (byte) (values[i + 5] >>> 42); - buf[off++] = (byte) (values[i + 5] >>> 34); - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 49; - buf[off++] = (byte) (values[i + 6] >>> 41); - buf[off++] = (byte) (values[i + 6] >>> 33); - buf[off++] = (byte) (values[i + 6] >>> 25); - buf[off++] = (byte) (values[i + 6] >>> 17); - buf[off++] = (byte) (values[i + 6] >>> 9); - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 48; - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits56(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 48); - buf[off++] = (byte) (values[i + 0] >>> 40); - buf[off++] = (byte) (values[i + 0] >>> 32); - buf[off++] = (byte) (values[i + 0] >>> 24); - buf[off++] = (byte) (values[i + 0] >>> 16); - buf[off++] = (byte) (values[i + 0] >>> 8); - buf[off++] = (byte) (values[i + 0]); - - buf[off++] = (byte) (values[i + 1] >>> 48); - buf[off++] = (byte) (values[i + 1] >>> 40); - buf[off++] = (byte) (values[i + 1] >>> 32); - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 48); - buf[off++] = (byte) (values[i + 2] >>> 40); - buf[off++] = (byte) (values[i + 2] >>> 32); - buf[off++] = (byte) (values[i + 2] >>> 24); - buf[off++] = (byte) (values[i + 2] >>> 16); - buf[off++] = (byte) (values[i + 2] >>> 8); - buf[off++] = (byte) (values[i + 2]); - - buf[off++] = (byte) (values[i + 3] >>> 48); - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 48); - buf[off++] = (byte) (values[i + 4] >>> 40); - buf[off++] = (byte) (values[i + 4] >>> 32); - buf[off++] = (byte) (values[i + 4] >>> 24); - buf[off++] = (byte) (values[i + 4] >>> 16); - buf[off++] = (byte) (values[i + 4] >>> 8); - buf[off++] = (byte) (values[i + 4]); - - buf[off++] = (byte) (values[i + 5] >>> 48); - buf[off++] = (byte) (values[i + 5] >>> 40); - buf[off++] = (byte) (values[i + 5] >>> 32); - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 48); - buf[off++] = (byte) (values[i + 6] >>> 40); - buf[off++] = (byte) (values[i + 6] >>> 32); - buf[off++] = (byte) (values[i + 6] >>> 24); - buf[off++] = (byte) (values[i + 6] >>> 16); - buf[off++] = (byte) (values[i + 6] >>> 8); - buf[off++] = (byte) (values[i + 6]); - - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits57(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 49); - buf[off++] = (byte) (values[i + 0] >>> 41); - buf[off++] = (byte) (values[i + 0] >>> 33); - buf[off++] = (byte) (values[i + 0] >>> 25); - buf[off++] = (byte) (values[i + 0] >>> 17); - buf[off++] = (byte) (values[i + 0] >>> 9); - buf[off++] = (byte) (values[i + 0] >>> 1); - - buf[off] = (byte) (values[i + 0] << 7); - buf[off++] |= values[i + 1] >>> 50; - buf[off++] = (byte) (values[i + 1] >>> 42); - buf[off++] = (byte) (values[i + 1] >>> 34); - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 51; - buf[off++] = (byte) (values[i + 2] >>> 43); - buf[off++] = (byte) (values[i + 2] >>> 35); - buf[off++] = (byte) (values[i + 2] >>> 27); - buf[off++] = (byte) (values[i + 2] >>> 19); - buf[off++] = (byte) (values[i + 2] >>> 11); - buf[off++] = (byte) (values[i + 2] >>> 3); - - buf[off] = (byte) (values[i + 2] << 5); - buf[off++] |= values[i + 3] >>> 52; - buf[off++] = (byte) (values[i + 3] >>> 44); - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 53; - buf[off++] = (byte) (values[i + 4] >>> 45); - buf[off++] = (byte) (values[i + 4] >>> 37); - buf[off++] = (byte) (values[i + 4] >>> 29); - buf[off++] = (byte) (values[i + 4] >>> 21); - buf[off++] = (byte) (values[i + 4] >>> 13); - buf[off++] = (byte) (values[i + 4] >>> 5); - - buf[off] = (byte) (values[i + 4] << 3); - buf[off++] |= values[i + 5] >>> 54; - buf[off++] = (byte) (values[i + 5] >>> 46); - buf[off++] = (byte) (values[i + 5] >>> 38); - buf[off++] = (byte) (values[i + 5] >>> 30); - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 55; - buf[off++] = (byte) (values[i + 6] >>> 47); - buf[off++] = (byte) (values[i + 6] >>> 39); - buf[off++] = (byte) (values[i + 6] >>> 31); - buf[off++] = (byte) (values[i + 6] >>> 23); - buf[off++] = (byte) (values[i + 6] >>> 15); - buf[off++] = (byte) (values[i + 6] >>> 7); - - buf[off] = (byte) (values[i + 6] << 1); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits58(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 50); - buf[off++] = (byte) (values[i + 0] >>> 42); - buf[off++] = (byte) (values[i + 0] >>> 34); - buf[off++] = (byte) (values[i + 0] >>> 26); - buf[off++] = (byte) (values[i + 0] >>> 18); - buf[off++] = (byte) (values[i + 0] >>> 10); - buf[off++] = (byte) (values[i + 0] >>> 2); - - buf[off] = (byte) (values[i + 0] << 6); - buf[off++] |= values[i + 1] >>> 52; - buf[off++] = (byte) (values[i + 1] >>> 44); - buf[off++] = (byte) (values[i + 1] >>> 36); - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 54; - buf[off++] = (byte) (values[i + 2] >>> 46); - buf[off++] = (byte) (values[i + 2] >>> 38); - buf[off++] = (byte) (values[i + 2] >>> 30); - buf[off++] = (byte) (values[i + 2] >>> 22); - buf[off++] = (byte) (values[i + 2] >>> 14); - buf[off++] = (byte) (values[i + 2] >>> 6); - - buf[off] = (byte) (values[i + 2] << 2); - buf[off++] |= values[i + 3] >>> 56; - buf[off++] = (byte) (values[i + 3] >>> 48); - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 50); - buf[off++] = (byte) (values[i + 4] >>> 42); - buf[off++] = (byte) (values[i + 4] >>> 34); - buf[off++] = (byte) (values[i + 4] >>> 26); - buf[off++] = (byte) (values[i + 4] >>> 18); - buf[off++] = (byte) (values[i + 4] >>> 10); - buf[off++] = (byte) (values[i + 4] >>> 2); - - buf[off] = (byte) (values[i + 4] << 6); - buf[off++] |= values[i + 5] >>> 52; - buf[off++] = (byte) (values[i + 5] >>> 44); - buf[off++] = (byte) (values[i + 5] >>> 36); - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 54; - buf[off++] = (byte) (values[i + 6] >>> 46); - buf[off++] = (byte) (values[i + 6] >>> 38); - buf[off++] = (byte) (values[i + 6] >>> 30); - buf[off++] = (byte) (values[i + 6] >>> 22); - buf[off++] = (byte) (values[i + 6] >>> 14); - buf[off++] = (byte) (values[i + 6] >>> 6); - - buf[off] = (byte) (values[i + 6] << 2); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits59(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 51); - buf[off++] = (byte) (values[i + 0] >>> 43); - buf[off++] = (byte) (values[i + 0] >>> 35); - buf[off++] = (byte) (values[i + 0] >>> 27); - buf[off++] = (byte) (values[i + 0] >>> 19); - buf[off++] = (byte) (values[i + 0] >>> 11); - buf[off++] = (byte) (values[i + 0] >>> 3); - - buf[off] = (byte) (values[i + 0] << 5); - buf[off++] |= values[i + 1] >>> 54; - buf[off++] = (byte) (values[i + 1] >>> 46); - buf[off++] = (byte) (values[i + 1] >>> 38); - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 57; - buf[off++] = (byte) (values[i + 2] >>> 49); - buf[off++] = (byte) (values[i + 2] >>> 41); - buf[off++] = (byte) (values[i + 2] >>> 33); - buf[off++] = (byte) (values[i + 2] >>> 25); - buf[off++] = (byte) (values[i + 2] >>> 17); - buf[off++] = (byte) (values[i + 2] >>> 9); - buf[off++] = (byte) (values[i + 2] >>> 1); - - buf[off] = (byte) (values[i + 2] << 7); - buf[off++] |= values[i + 3] >>> 52; - buf[off++] = (byte) (values[i + 3] >>> 44); - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 55; - buf[off++] = (byte) (values[i + 4] >>> 47); - buf[off++] = (byte) (values[i + 4] >>> 39); - buf[off++] = (byte) (values[i + 4] >>> 31); - buf[off++] = (byte) (values[i + 4] >>> 23); - buf[off++] = (byte) (values[i + 4] >>> 15); - buf[off++] = (byte) (values[i + 4] >>> 7); - - buf[off] = (byte) (values[i + 4] << 1); - buf[off++] |= values[i + 5] >>> 58; - buf[off++] = (byte) (values[i + 5] >>> 50); - buf[off++] = (byte) (values[i + 5] >>> 42); - buf[off++] = (byte) (values[i + 5] >>> 34); - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 53; - buf[off++] = (byte) (values[i + 6] >>> 45); - buf[off++] = (byte) (values[i + 6] >>> 37); - buf[off++] = (byte) (values[i + 6] >>> 29); - buf[off++] = (byte) (values[i + 6] >>> 21); - buf[off++] = (byte) (values[i + 6] >>> 13); - buf[off++] = (byte) (values[i + 6] >>> 5); - - buf[off] = (byte) (values[i + 6] << 3); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits60(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 52); - buf[off++] = (byte) (values[i + 0] >>> 44); - buf[off++] = (byte) (values[i + 0] >>> 36); - buf[off++] = (byte) (values[i + 0] >>> 28); - buf[off++] = (byte) (values[i + 0] >>> 20); - buf[off++] = (byte) (values[i + 0] >>> 12); - buf[off++] = (byte) (values[i + 0] >>> 4); - - buf[off] = (byte) (values[i + 0] << 4); - buf[off++] |= values[i + 1] >>> 56; - buf[off++] = (byte) (values[i + 1] >>> 48); - buf[off++] = (byte) (values[i + 1] >>> 40); - buf[off++] = (byte) (values[i + 1] >>> 32); - buf[off++] = (byte) (values[i + 1] >>> 24); - buf[off++] = (byte) (values[i + 1] >>> 16); - buf[off++] = (byte) (values[i + 1] >>> 8); - buf[off++] = (byte) (values[i + 1]); - - buf[off++] = (byte) (values[i + 2] >>> 52); - buf[off++] = (byte) (values[i + 2] >>> 44); - buf[off++] = (byte) (values[i + 2] >>> 36); - buf[off++] = (byte) (values[i + 2] >>> 28); - buf[off++] = (byte) (values[i + 2] >>> 20); - buf[off++] = (byte) (values[i + 2] >>> 12); - buf[off++] = (byte) (values[i + 2] >>> 4); - - buf[off] = (byte) (values[i + 2] << 4); - buf[off++] |= values[i + 3] >>> 56; - buf[off++] = (byte) (values[i + 3] >>> 48); - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 52); - buf[off++] = (byte) (values[i + 4] >>> 44); - buf[off++] = (byte) (values[i + 4] >>> 36); - buf[off++] = (byte) (values[i + 4] >>> 28); - buf[off++] = (byte) (values[i + 4] >>> 20); - buf[off++] = (byte) (values[i + 4] >>> 12); - buf[off++] = (byte) (values[i + 4] >>> 4); - - buf[off] = (byte) (values[i + 4] << 4); - buf[off++] |= values[i + 5] >>> 56; - buf[off++] = (byte) (values[i + 5] >>> 48); - buf[off++] = (byte) (values[i + 5] >>> 40); - buf[off++] = (byte) (values[i + 5] >>> 32); - buf[off++] = (byte) (values[i + 5] >>> 24); - buf[off++] = (byte) (values[i + 5] >>> 16); - buf[off++] = (byte) (values[i + 5] >>> 8); - buf[off++] = (byte) (values[i + 5]); - - buf[off++] = (byte) (values[i + 6] >>> 52); - buf[off++] = (byte) (values[i + 6] >>> 44); - buf[off++] = (byte) (values[i + 6] >>> 36); - buf[off++] = (byte) (values[i + 6] >>> 28); - buf[off++] = (byte) (values[i + 6] >>> 20); - buf[off++] = (byte) (values[i + 6] >>> 12); - buf[off++] = (byte) (values[i + 6] >>> 4); - - buf[off] = (byte) (values[i + 6] << 4); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits61(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 53); - buf[off++] = (byte) (values[i + 0] >>> 45); - buf[off++] = (byte) (values[i + 0] >>> 37); - buf[off++] = (byte) (values[i + 0] >>> 29); - buf[off++] = (byte) (values[i + 0] >>> 21); - buf[off++] = (byte) (values[i + 0] >>> 13); - buf[off++] = (byte) (values[i + 0] >>> 5); - - buf[off] = (byte) (values[i + 0] << 3); - buf[off++] |= values[i + 1] >>> 58; - buf[off++] = (byte) (values[i + 1] >>> 50); - buf[off++] = (byte) (values[i + 1] >>> 42); - buf[off++] = (byte) (values[i + 1] >>> 34); - buf[off++] = (byte) (values[i + 1] >>> 26); - buf[off++] = (byte) (values[i + 1] >>> 18); - buf[off++] = (byte) (values[i + 1] >>> 10); - buf[off++] = (byte) (values[i + 1] >>> 2); - - buf[off] = (byte) (values[i + 1] << 6); - buf[off++] |= values[i + 2] >>> 55; - buf[off++] = (byte) (values[i + 2] >>> 47); - buf[off++] = (byte) (values[i + 2] >>> 39); - buf[off++] = (byte) (values[i + 2] >>> 31); - buf[off++] = (byte) (values[i + 2] >>> 23); - buf[off++] = (byte) (values[i + 2] >>> 15); - buf[off++] = (byte) (values[i + 2] >>> 7); - - buf[off] = (byte) (values[i + 2] << 1); - buf[off++] |= values[i + 3] >>> 60; - buf[off++] = (byte) (values[i + 3] >>> 52); - buf[off++] = (byte) (values[i + 3] >>> 44); - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 57; - buf[off++] = (byte) (values[i + 4] >>> 49); - buf[off++] = (byte) (values[i + 4] >>> 41); - buf[off++] = (byte) (values[i + 4] >>> 33); - buf[off++] = (byte) (values[i + 4] >>> 25); - buf[off++] = (byte) (values[i + 4] >>> 17); - buf[off++] = (byte) (values[i + 4] >>> 9); - buf[off++] = (byte) (values[i + 4] >>> 1); - - buf[off] = (byte) (values[i + 4] << 7); - buf[off++] |= values[i + 5] >>> 54; - buf[off++] = (byte) (values[i + 5] >>> 46); - buf[off++] = (byte) (values[i + 5] >>> 38); - buf[off++] = (byte) (values[i + 5] >>> 30); - buf[off++] = (byte) (values[i + 5] >>> 22); - buf[off++] = (byte) (values[i + 5] >>> 14); - buf[off++] = (byte) (values[i + 5] >>> 6); - - buf[off] = (byte) (values[i + 5] << 2); - buf[off++] |= values[i + 6] >>> 59; - buf[off++] = (byte) (values[i + 6] >>> 51); - buf[off++] = (byte) (values[i + 6] >>> 43); - buf[off++] = (byte) (values[i + 6] >>> 35); - buf[off++] = (byte) (values[i + 6] >>> 27); - buf[off++] = (byte) (values[i + 6] >>> 19); - buf[off++] = (byte) (values[i + 6] >>> 11); - buf[off++] = (byte) (values[i + 6] >>> 3); - - buf[off] = (byte) (values[i + 6] << 5); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits62(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 54); - buf[off++] = (byte) (values[i + 0] >>> 46); - buf[off++] = (byte) (values[i + 0] >>> 38); - buf[off++] = (byte) (values[i + 0] >>> 30); - buf[off++] = (byte) (values[i + 0] >>> 22); - buf[off++] = (byte) (values[i + 0] >>> 14); - buf[off++] = (byte) (values[i + 0] >>> 6); - - buf[off] = (byte) (values[i + 0] << 2); - buf[off++] |= values[i + 1] >>> 60; - buf[off++] = (byte) (values[i + 1] >>> 52); - buf[off++] = (byte) (values[i + 1] >>> 44); - buf[off++] = (byte) (values[i + 1] >>> 36); - buf[off++] = (byte) (values[i + 1] >>> 28); - buf[off++] = (byte) (values[i + 1] >>> 20); - buf[off++] = (byte) (values[i + 1] >>> 12); - buf[off++] = (byte) (values[i + 1] >>> 4); - - buf[off] = (byte) (values[i + 1] << 4); - buf[off++] |= values[i + 2] >>> 58; - buf[off++] = (byte) (values[i + 2] >>> 50); - buf[off++] = (byte) (values[i + 2] >>> 42); - buf[off++] = (byte) (values[i + 2] >>> 34); - buf[off++] = (byte) (values[i + 2] >>> 26); - buf[off++] = (byte) (values[i + 2] >>> 18); - buf[off++] = (byte) (values[i + 2] >>> 10); - buf[off++] = (byte) (values[i + 2] >>> 2); - - buf[off] = (byte) (values[i + 2] << 6); - buf[off++] |= values[i + 3] >>> 56; - buf[off++] = (byte) (values[i + 3] >>> 48); - buf[off++] = (byte) (values[i + 3] >>> 40); - buf[off++] = (byte) (values[i + 3] >>> 32); - buf[off++] = (byte) (values[i + 3] >>> 24); - buf[off++] = (byte) (values[i + 3] >>> 16); - buf[off++] = (byte) (values[i + 3] >>> 8); - buf[off++] = (byte) (values[i + 3]); - - buf[off++] = (byte) (values[i + 4] >>> 54); - buf[off++] = (byte) (values[i + 4] >>> 46); - buf[off++] = (byte) (values[i + 4] >>> 38); - buf[off++] = (byte) (values[i + 4] >>> 30); - buf[off++] = (byte) (values[i + 4] >>> 22); - buf[off++] = (byte) (values[i + 4] >>> 14); - buf[off++] = (byte) (values[i + 4] >>> 6); - - buf[off] = (byte) (values[i + 4] << 2); - buf[off++] |= values[i + 5] >>> 60; - buf[off++] = (byte) (values[i + 5] >>> 52); - buf[off++] = (byte) (values[i + 5] >>> 44); - buf[off++] = (byte) (values[i + 5] >>> 36); - buf[off++] = (byte) (values[i + 5] >>> 28); - buf[off++] = (byte) (values[i + 5] >>> 20); - buf[off++] = (byte) (values[i + 5] >>> 12); - buf[off++] = (byte) (values[i + 5] >>> 4); - - buf[off] = (byte) (values[i + 5] << 4); - buf[off++] |= values[i + 6] >>> 58; - buf[off++] = (byte) (values[i + 6] >>> 50); - buf[off++] = (byte) (values[i + 6] >>> 42); - buf[off++] = (byte) (values[i + 6] >>> 34); - buf[off++] = (byte) (values[i + 6] >>> 26); - buf[off++] = (byte) (values[i + 6] >>> 18); - buf[off++] = (byte) (values[i + 6] >>> 10); - buf[off++] = (byte) (values[i + 6] >>> 2); - - buf[off] = (byte) (values[i + 6] << 6); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) (values[i + 7]); - } - - static void packBits63(final long[] values, final int i, final byte[] buf, int off) { - buf[off++] = (byte) (values[i + 0] >>> 55); - buf[off++] = (byte) (values[i + 0] >>> 47); - buf[off++] = (byte) (values[i + 0] >>> 39); - buf[off++] = (byte) (values[i + 0] >>> 31); - buf[off++] = (byte) (values[i + 0] >>> 23); - buf[off++] = (byte) (values[i + 0] >>> 15); - buf[off++] = (byte) (values[i + 0] >>> 7); - - buf[off] = (byte) (values[i + 0] << 1); - buf[off++] |= values[i + 1] >>> 62; - buf[off++] = (byte) (values[i + 1] >>> 54); - buf[off++] = (byte) (values[i + 1] >>> 46); - buf[off++] = (byte) (values[i + 1] >>> 38); - buf[off++] = (byte) (values[i + 1] >>> 30); - buf[off++] = (byte) (values[i + 1] >>> 22); - buf[off++] = (byte) (values[i + 1] >>> 14); - buf[off++] = (byte) (values[i + 1] >>> 6); - - buf[off] = (byte) (values[i + 1] << 2); - buf[off++] |= values[i + 2] >>> 61; - buf[off++] = (byte) (values[i + 2] >>> 53); - buf[off++] = (byte) (values[i + 2] >>> 45); - buf[off++] = (byte) (values[i + 2] >>> 37); - buf[off++] = (byte) (values[i + 2] >>> 29); - buf[off++] = (byte) (values[i + 2] >>> 21); - buf[off++] = (byte) (values[i + 2] >>> 13); - buf[off++] = (byte) (values[i + 2] >>> 5); - - buf[off] = (byte) (values[i + 2] << 3); - buf[off++] |= values[i + 3] >>> 60; - buf[off++] = (byte) (values[i + 3] >>> 52); - buf[off++] = (byte) (values[i + 3] >>> 44); - buf[off++] = (byte) (values[i + 3] >>> 36); - buf[off++] = (byte) (values[i + 3] >>> 28); - buf[off++] = (byte) (values[i + 3] >>> 20); - buf[off++] = (byte) (values[i + 3] >>> 12); - buf[off++] = (byte) (values[i + 3] >>> 4); - - buf[off] = (byte) (values[i + 3] << 4); - buf[off++] |= values[i + 4] >>> 59; - buf[off++] = (byte) (values[i + 4] >>> 51); - buf[off++] = (byte) (values[i + 4] >>> 43); - buf[off++] = (byte) (values[i + 4] >>> 35); - buf[off++] = (byte) (values[i + 4] >>> 27); - buf[off++] = (byte) (values[i + 4] >>> 19); - buf[off++] = (byte) (values[i + 4] >>> 11); - buf[off++] = (byte) (values[i + 4] >>> 3); - - buf[off] = (byte) (values[i + 4] << 5); - buf[off++] |= values[i + 5] >>> 58; - buf[off++] = (byte) (values[i + 5] >>> 50); - buf[off++] = (byte) (values[i + 5] >>> 42); - buf[off++] = (byte) (values[i + 5] >>> 34); - buf[off++] = (byte) (values[i + 5] >>> 26); - buf[off++] = (byte) (values[i + 5] >>> 18); - buf[off++] = (byte) (values[i + 5] >>> 10); - buf[off++] = (byte) (values[i + 5] >>> 2); - - buf[off] = (byte) (values[i + 5] << 6); - buf[off++] |= values[i + 6] >>> 57; - buf[off++] = (byte) (values[i + 6] >>> 49); - buf[off++] = (byte) (values[i + 6] >>> 41); - buf[off++] = (byte) (values[i + 6] >>> 33); - buf[off++] = (byte) (values[i + 6] >>> 25); - buf[off++] = (byte) (values[i + 6] >>> 17); - buf[off++] = (byte) (values[i + 6] >>> 9); - buf[off++] = (byte) (values[i + 6] >>> 1); - - buf[off] = (byte) (values[i + 6] << 7); - buf[off++] |= values[i + 7] >>> 56; - buf[off++] = (byte) (values[i + 7] >>> 48); - buf[off++] = (byte) (values[i + 7] >>> 40); - buf[off++] = (byte) (values[i + 7] >>> 32); - buf[off++] = (byte) (values[i + 7] >>> 24); - buf[off++] = (byte) (values[i + 7] >>> 16); - buf[off++] = (byte) (values[i + 7] >>> 8); - buf[off] = (byte) values[i + 7]; - } - - static void unpackBits1(final long[] values, final int i, final byte[] buf, final int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off]) >>> 7) & 1; - values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 1; - values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 5) & 1; - values[i + 3] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 1; - values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 3) & 1; - values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 1; - values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 1; - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 1; - } - - static void unpackBits2(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 3; - values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 3; - values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 3; - values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 3; - values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 6) & 3; - values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 3; - values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 3; - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 3; - } - - static void unpackBits3(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 5; - values[i + 1] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 7; - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - values[i + 3] = (Byte.toUnsignedLong(buf[off]) >>> 4) & 7; - values[i + 4] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 7; - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - values[i + 6] = (Byte.toUnsignedLong(buf[off]) >>> 3) & 7; - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 7; - } - - static void unpackBits4(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 4; - values[i + 1] = Byte.toUnsignedLong(buf[off++]) & 0xf; - values[i + 2] = Byte.toUnsignedLong(buf[off]) >>> 4; - values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 0xf; - values[i + 4] = Byte.toUnsignedLong(buf[off]) >>> 4; - values[i + 5] = Byte.toUnsignedLong(buf[off++]) & 0xf; - values[i + 6] = Byte.toUnsignedLong(buf[off]) >>> 4; - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0xf; - } - - static void unpackBits5(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off]) >>> 1) & 0x1f; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off]) >>> 2) & 0x1f; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x1f; - } - - static void unpackBits6(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = Byte.toUnsignedLong(buf[off++]) & 0x3f; - - values[i + 4] = Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x3f; - } - - static void unpackBits7(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = Byte.toUnsignedLong(buf[off]) & 0x7f; - } - - static void unpackBits8(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]); - values[i + 1] = Byte.toUnsignedLong(buf[off++]); - values[i + 2] = Byte.toUnsignedLong(buf[off++]); - values[i + 3] = Byte.toUnsignedLong(buf[off++]); - values[i + 4] = Byte.toUnsignedLong(buf[off++]); - values[i + 5] = Byte.toUnsignedLong(buf[off++]); - values[i + 6] = Byte.toUnsignedLong(buf[off++]); - values[i + 7] = Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits9(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits10(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits11(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits12(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits13(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits14(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits15(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits16(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]); - values[i + 1] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]); - values[i + 3] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]); - values[i + 5] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]); - values[i + 7] = Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits17(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 11; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 13; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 15; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits18(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 14; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 14; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits19(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 17; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 15; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 13; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits20(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits21(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 15; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 17; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 19; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits22(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 18; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 18; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits23(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 21; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 19; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 17; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits24(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]); - values[i + 1] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]); - values[i + 3] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]); - values[i + 5] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]); - values[i + 7] = Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits25(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 19; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 21; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 23; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits26(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 22; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 22; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits27(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 25; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 23; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 21; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits28(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits29(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 23; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 25; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 27; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits30(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 26; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 26; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits31(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 29; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 27; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 25; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits32(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]); - values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]); - values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]); - values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]); - values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits33(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 27; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 29; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 30; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 31; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 32; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits34(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 30; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 30; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]); - } - - static void unpackBits35(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 33; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 31; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 29; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits36(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits37(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 31; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 33; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 30; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 35; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits38(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 34; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 34; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits39(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 37; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 35; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 33; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits40(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]); - values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]); - values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]); - values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]); - values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits41(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 35; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 37; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 38; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 39; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits42(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 38; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 38; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits43(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 41; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 39; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 42; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 37; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits44(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits45(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 42; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 39; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 41; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 38; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 43; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits46(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 44; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 42; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 44; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 42; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits47(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 46; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 45; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 43; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 42; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 41; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits48(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]); - values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]); - values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]); - values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]); - values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits49(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 42; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 43; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 45; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 46; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 47; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits50(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 44; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 46; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 44; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 46; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits51(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 46; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 49; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 47; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 50; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 45; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits52(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits53(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 45; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 50; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 47; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 23; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 52; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 49; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 46; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 51; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits54(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 52; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 50; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 52; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 50; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]); - } - - static void unpackBits55(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 47; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 54; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 53; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 45; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 52; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 51; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 50; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 49; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits56(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]); - values[i + 1] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]); - values[i + 3] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]); - values[i + 5] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]); - values[i + 7] = (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits57(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 49; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 50; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 51; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 52; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 53; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 45; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 7) << 54; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 55; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 47; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 1) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits58(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 52; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 54; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 3) << 56; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 52; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 54; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 3) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]); - } - - static void unpackBits59(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 51; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 54; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 57; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 49; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 52; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 55; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 47; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 1) << 58; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 53; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 45; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 7) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits60(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 2] = (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 6] = (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits61(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 53; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 45; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 7) << 58; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 55; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 47; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 1) << 60; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 57; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 49; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 54; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 3) << 59; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 51; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits62(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 54; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 3) << 60; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 58; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 56; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]); - - values[i + 4] = (Byte.toUnsignedLong(buf[off++])) << 54; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 3) << 60; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 58; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - - static void unpackBits63(final long[] values, final int i, final byte[] buf, int off) { - values[i + 0] = (Byte.toUnsignedLong(buf[off++])) << 55; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 47; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 39; - values[i + 0] |= (Byte.toUnsignedLong(buf[off++])) << 31; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 23; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 15; - values[i + 0] |= Byte.toUnsignedLong(buf[off++]) << 7; - values[i + 0] |= Byte.toUnsignedLong(buf[off]) >>> 1; - - values[i + 1] = (Byte.toUnsignedLong(buf[off++]) & 1) << 62; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 54; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 46; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 38; - values[i + 1] |= (Byte.toUnsignedLong(buf[off++])) << 30; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 22; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 14; - values[i + 1] |= Byte.toUnsignedLong(buf[off++]) << 6; - values[i + 1] |= Byte.toUnsignedLong(buf[off]) >>> 2; - - values[i + 2] = (Byte.toUnsignedLong(buf[off++]) & 3) << 61; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 53; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 45; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 37; - values[i + 2] |= (Byte.toUnsignedLong(buf[off++])) << 29; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 21; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 13; - values[i + 2] |= Byte.toUnsignedLong(buf[off++]) << 5; - values[i + 2] |= Byte.toUnsignedLong(buf[off]) >>> 3; - - values[i + 3] = (Byte.toUnsignedLong(buf[off++]) & 7) << 60; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 52; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 44; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 36; - values[i + 3] |= (Byte.toUnsignedLong(buf[off++])) << 28; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 20; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 12; - values[i + 3] |= Byte.toUnsignedLong(buf[off++]) << 4; - values[i + 3] |= Byte.toUnsignedLong(buf[off]) >>> 4; - - values[i + 4] = (Byte.toUnsignedLong(buf[off++]) & 0xf) << 59; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 51; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 43; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 35; - values[i + 4] |= (Byte.toUnsignedLong(buf[off++])) << 27; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 19; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 11; - values[i + 4] |= Byte.toUnsignedLong(buf[off++]) << 3; - values[i + 4] |= Byte.toUnsignedLong(buf[off]) >>> 5; - - values[i + 5] = (Byte.toUnsignedLong(buf[off++]) & 0x1f) << 58; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 50; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 42; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 34; - values[i + 5] |= (Byte.toUnsignedLong(buf[off++])) << 26; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 18; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 10; - values[i + 5] |= Byte.toUnsignedLong(buf[off++]) << 2; - values[i + 5] |= Byte.toUnsignedLong(buf[off]) >>> 6; - - values[i + 6] = (Byte.toUnsignedLong(buf[off++]) & 0x3f) << 57; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 49; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 41; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 33; - values[i + 6] |= (Byte.toUnsignedLong(buf[off++])) << 25; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 17; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 9; - values[i + 6] |= Byte.toUnsignedLong(buf[off++]) << 1; - values[i + 6] |= Byte.toUnsignedLong(buf[off]) >>> 7; - - values[i + 7] = (Byte.toUnsignedLong(buf[off++]) & 0x7f) << 56; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 48; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 40; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 32; - values[i + 7] |= (Byte.toUnsignedLong(buf[off++])) << 24; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 16; - values[i + 7] |= Byte.toUnsignedLong(buf[off++]) << 8; - values[i + 7] |= Byte.toUnsignedLong(buf[off]); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java deleted file mode 100644 index 6a2ddddd7..000000000 --- a/src/main/java/org/apache/datasketches/theta2/BytesCompactCompressedHashIterator.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -/* - * This is to uncompress serial version 4 sketch incrementally - */ -final class BytesCompactCompressedHashIterator implements HashIterator { - private byte[] bytes; - private int offset; - private int entryBits; - private int numEntries; - private int index; - private long previous; - private int offsetBits; - private long[] buffer; - private boolean isBlockMode; - - BytesCompactCompressedHashIterator( - final byte[] bytes, - final int offset, - final int entryBits, - final int numEntries - ) { - this.bytes = bytes; - this.offset = offset; - this.entryBits = entryBits; - this.numEntries = numEntries; - index = -1; - previous = 0; - offsetBits = 0; - buffer = new long[8]; - isBlockMode = numEntries >= 8; - } - - @Override - public long get() { - return buffer[index & 7]; - } - - @Override - public boolean next() { - if (++index == numEntries) { return false; } - if (isBlockMode) { - if ((index & 7) == 0) { - if (numEntries - index >= 8) { - unpack8(); - } else { - isBlockMode = false; - unpack1(); - } - } - } else { - unpack1(); - } - return true; - } - - private void unpack1() { - final int i = index & 7; - BitPacking.unpackBits(buffer, i, entryBits, bytes, offset, offsetBits); - offset += (offsetBits + entryBits) >>> 3; - offsetBits = (offsetBits + entryBits) & 7; - buffer[i] += previous; - previous = buffer[i]; - } - - private void unpack8() { - BitPacking.unpackBitsBlock8(buffer, 0, bytes, offset, entryBits); - offset += entryBits; - for (int i = 0; i < 8; i++) { - buffer[i] += previous; - previous = buffer[i]; - } - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java deleted file mode 100644 index 3586f54c4..000000000 --- a/src/main/java/org/apache/datasketches/theta2/BytesCompactHashIterator.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import org.apache.datasketches.common.ByteArrayUtil; - -/* - * This is to iterate over serial version 3 sketch representation - */ -final class BytesCompactHashIterator implements HashIterator { - final private byte[] bytes; - final private int offset; - final private int numEntries; - private int index; - - BytesCompactHashIterator( - final byte[] bytes, - final int offset, - final int numEntries - ) { - this.bytes = bytes; - this.offset = offset; - this.numEntries = numEntries; - index = -1; - } - - @Override - public long get() { - return ByteArrayUtil.getLongLE(bytes, offset + index * Long.BYTES); - } - - @Override - public boolean next() { - return ++index < numEntries; - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/CompactOperations.java b/src/main/java/org/apache/datasketches/theta2/CompactOperations.java deleted file mode 100644 index 719b4b19c..000000000 --- a/src/main/java/org/apache/datasketches/theta2/CompactOperations.java +++ /dev/null @@ -1,388 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; -import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; -import static org.apache.datasketches.theta2.PreambleUtil.insertP; -import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; - -/** - * @author Lee Rhodes - */ -final class CompactOperations { - - private CompactOperations() {} - - static CompactSketch componentsToCompact( //No error checking - final long thetaLong, - final int curCount, - final short seedHash, - final boolean srcEmpty, - final boolean srcCompact, - final boolean srcOrdered, - final boolean dstOrdered, - final MemorySegment dstWSeg, - final long[] hashArr) //may not be compacted, ordered or unordered, may be null - { - final boolean direct = dstWSeg != null; - final boolean empty = srcEmpty || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); - final boolean single = (curCount == 1) && (thetaLong == Long.MAX_VALUE); - final long[] hashArrOut; - if (!srcCompact) { - hashArrOut = CompactOperations.compactCache(hashArr, curCount, thetaLong, dstOrdered); - } else { - hashArrOut = hashArr; - } - if (!srcOrdered && dstOrdered && !empty && !single) { - Arrays.sort(hashArrOut); - } - //Note: for empty or single we always output the ordered form. - final boolean dstOrderedOut = (empty || single) ? true : dstOrdered; - if (direct) { - final int preLongs = computeCompactPreLongs(empty, curCount, thetaLong); - int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK; //always LE - flags |= empty ? EMPTY_FLAG_MASK : 0; - flags |= dstOrderedOut ? ORDERED_FLAG_MASK : 0; - flags |= single ? SINGLEITEM_FLAG_MASK : 0; - - final MemorySegment seg = - loadCompactMemorySegment(hashArrOut, seedHash, curCount, thetaLong, dstWSeg, (byte)flags, preLongs); - return new DirectCompactSketch(seg); - - } else { //Heap - if (empty) { - return EmptyCompactSketch.getInstance(); - } - if (single) { - return new SingleItemSketch(hashArrOut[0], seedHash); - } - return new HeapCompactSketch(hashArrOut, empty, seedHash, curCount, thetaLong, dstOrderedOut); - } - } - - /** - * Heapify or convert a source Theta Sketch MemorySegment image into a heap or target MemorySegment CompactSketch. - * This assumes hashSeed is OK; serVer = 3. - * @param srcSeg the given input source MemorySegment image. Can be Read Only. - * @param dstOrdered the desired ordering of the resulting CompactSketch - * @param dstWSeg Used for the target CompactSketch if it is MemorySegment-based. Must be Writable. - * @return a CompactSketch of the correct form. - */ - @SuppressWarnings("unused") - static CompactSketch segmentToCompact( - final MemorySegment srcSeg, - final boolean dstOrdered, - final MemorySegment dstWSeg) - { - //extract Pre0 fields and Flags from srcMem - final int srcPreLongs = extractPreLongs(srcSeg); - final int srcSerVer = extractSerVer(srcSeg); //not used - final int srcFamId = extractFamilyID(srcSeg); - final int srcLgArrLongs = extractLgArrLongs(srcSeg); - final int srcFlags = extractFlags(srcSeg); - final short srcSeedHash = (short) extractSeedHash(srcSeg); - - //srcFlags - final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0; - final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0; - final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0; - final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0; - final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0; - - final boolean single = srcSingleFlag - || SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags); - - //extract pre1 and pre2 fields - final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcSeg) : 0; - final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcSeg) : Long.MAX_VALUE; - - //do some basic checks ... - if (srcEmptyFlag) { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); } - if (single) { assert (curCount == 1) && (thetaLong == Long.MAX_VALUE); } - checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag); - - //dispatch empty and single cases - //Note: for empty and single we always output the ordered form. - final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered; - if (srcEmptyFlag) { - if (dstWSeg != null) { - MemorySegment.copy(EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); - return new DirectCompactSketch(dstWSeg); - } else { - return EmptyCompactSketch.getInstance(); - } - } - if (single) { - final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, srcPreLongs << 3); - final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash); - if (dstWSeg != null) { - MemorySegment.copy(sis.toByteArray(), 0, dstWSeg, JAVA_BYTE, 0, 16); - return new DirectCompactSketch(dstWSeg); - } else { //heap - return sis; - } - } - - //extract hashArr > 1 - final long[] hashArr; - if (srcCompactFlag) { - hashArr = new long[curCount]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, hashArr, 0, curCount); - } else { //update sketch, thus hashTable form - final int srcCacheLen = 1 << srcLgArrLongs; - final long[] tempHashArr = new long[srcCacheLen]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, srcPreLongs << 3, tempHashArr, 0, srcCacheLen); - hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut); - } - - final int flagsOut = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK - | ((dstOrderedOut) ? ORDERED_FLAG_MASK : 0); - - //load the destination. - if (dstWSeg != null) { - final MemorySegment tgtSeg = loadCompactMemorySegment(hashArr, srcSeedHash, curCount, thetaLong, dstWSeg, - (byte)flagsOut, srcPreLongs); - return new DirectCompactSketch(tgtSeg); - } else { //heap - return new HeapCompactSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong, - dstOrderedOut); - } - } - - private static final void checkFamilyAndFlags( - final int srcFamId, - final boolean srcCompactFlag, - final boolean srcReadOnlyFlag) { - final Family srcFamily = Family.idToFamily(srcFamId); - if (srcCompactFlag) { - if ((srcFamily == Family.COMPACT) && srcReadOnlyFlag) { return; } - } else { - if (srcFamily == Family.ALPHA) { return; } - if (srcFamily == Family.QUICKSELECT) { return; } - } - throw new SketchesArgumentException( - "Possible Corruption: Family does not match flags: Family: " - + srcFamily.toString() - + ", Compact Flag: " + srcCompactFlag - + ", ReadOnly Flag: " + srcReadOnlyFlag); - } - - //All arguments must be valid and correct including flags. - // Used as helper to create byte arrays as well as loading MemorySegment for direct compact sketches - //Input must be writable, return can be Read Only - static final MemorySegment loadCompactMemorySegment( - final long[] compactHashArr, - final short seedHash, - final int curCount, - final long thetaLong, - final MemorySegment dstWSeg, - final byte flags, - final int preLongs) - { - assert (dstWSeg != null) && (compactHashArr != null); - final int outLongs = preLongs + curCount; - final int outBytes = outLongs << 3; - final int dstBytes = (int) dstWSeg.byteSize(); - if (outBytes > dstBytes) { - throw new SketchesArgumentException("Insufficient Space in MemorySegment: " + dstBytes - + ", Need: " + outBytes); - } - final byte famID = (byte) Family.COMPACT.getID(); - - //Caution: The following loads directly into a MemorySegment without creating a heap byte[] first, - // which would act as a pre-clearing, initialization mechanism. So it is important to make sure - // that all fields are initialized, even those that are not used by the CompactSketch. - // Otherwise, uninitialized fields could be filled with off-heap garbage, which could cause - // other problems downstream if those fields are not filtered out first. - // As written below, all fields are initialized avoiding an extra copy. - - //The first 8 bytes (pre0) - insertPreLongs(dstWSeg, preLongs); //RF not used = 0 - insertSerVer(dstWSeg, SER_VER); - insertFamilyID(dstWSeg, famID); - //The following initializes the lgNomLongs and lgArrLongs to 0. - //They are not used in CompactSketches. - dstWSeg.set(JAVA_SHORT_UNALIGNED, LG_NOM_LONGS_BYTE, (short)0); - insertFlags(dstWSeg, flags); - insertSeedHash(dstWSeg, seedHash); - - if ((preLongs == 1) && (curCount == 1)) { //singleItem, theta = 1.0 - dstWSeg.set(JAVA_LONG_UNALIGNED, 8, compactHashArr[0]); - return dstWSeg; - } - if (preLongs > 1) { - insertCurCount(dstWSeg, curCount); - insertP(dstWSeg, (float) 1.0); - } - if (preLongs > 2) { - insertThetaLong(dstWSeg, thetaLong); - } - if (curCount > 0) { //theta could be < 1.0. - //dstWSeg.putLongArray(preLongs << 3, compactHashArr, 0, curCount); - MemorySegment.copy(compactHashArr, 0, dstWSeg, JAVA_LONG_UNALIGNED, preLongs << 3, curCount); - } - return dstWSeg; //if prelongs == 3 & curCount == 0, theta could be < 1.0. This can be RO - } - - /** - * Copies then compacts, cleans, and may sort the resulting array. - * The source cache can be a hash table with interstitial zeros or - * "dirty" values, which are hash values greater than theta. - * These can be generated by the Alpha sketch. - * @param srcCache anything - * @param curCount must be correct - * @param thetaLong The correct - * thetaLong. - * @param dstOrdered true if output array must be sorted - * @return the compacted array. - */ - static final long[] compactCache(final long[] srcCache, final int curCount, - final long thetaLong, final boolean dstOrdered) { - if (curCount == 0) { - return new long[0]; - } - final long[] cacheOut = new long[curCount]; - final int len = srcCache.length; - int j = 0; - for (int i = 0; i < len; i++) { //scan the full srcCache - final long v = srcCache[i]; - if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values - cacheOut[j++] = v; - } - if (j < curCount) { - throw new SketchesStateException( - "Possible Corruption: curCount parameter is incorrect."); - } - if (dstOrdered && (curCount > 1)) { - Arrays.sort(cacheOut); - } - return cacheOut; - } - - /* - * The truth table for empty, curCount and theta when compacting is as follows: - *
              -   * Num Theta CurCount Empty State    Name, Comments
              -   *  0    1.0     0      T     OK     EMPTY: The Normal Empty State
              -   *  1    1.0     0      F   Internal This can occur internally as the result of an intersection of two exact,
              -   *                                   disjoint sets, or AnotB of two exact, identical sets. There is no probability
              -   *                                   distribution, so this is converted internally to EMPTY {1.0, 0, T}.
              -   *                                   This is handled in SetOperation.createCompactSketch().
              -   *  2    1.0    !0      T   Error    Empty=T and curCount !0 should never coexist.
              -   *                                   This is checked in all compacting operations.
              -   *  3    1.0    !0      F     OK     EXACT: This corresponds to a sketch in exact mode
              -   *  4   <1.0     0      T   Internal This can be an initial UpdateSketch state if p < 1.0,
              -   *                                   so change theta to 1.0. Return {Th = 1.0, 0, T}.
              -   *                                   This is handled in UpdateSketch.compact() and toByteArray().
              -   *  5   <1.0     0      F     OK     This can result from set operations
              -   *  6   <1.0    !0      T   Error    Empty=T and curCount !0 should never coexist.
              -   *                                   This is checked in all compacting operations.
              -   *  7   <1.0    !0      F     OK     This corresponds to a sketch in estimation mode
              -   * 
              - * #4 is handled by correctThetaOnCompat(boolean, int) (below). - * #2 & #6 handled by checkIllegalCurCountAndEmpty(boolean, int) - */ - - /** - * This corrects a temporary anomalous condition where compact() is called on an UpdateSketch - * that was initialized with p < 1.0 and update() was never called. In this case Theta < 1.0, - * curCount = 0, and empty = true. The correction is to change Theta to 1.0, which makes the - * returning sketch empty. This should only be used in the compaction or serialization of an - * UpdateSketch. - * @param empty the given empty state - * @param curCount the given curCount - * @param thetaLong the given thetaLong - * @return thetaLong - */ - static final long correctThetaOnCompact(final boolean empty, final int curCount, - final long thetaLong) { //handles #4 above - return (empty && (curCount == 0)) ? Long.MAX_VALUE : thetaLong; - } - - /** - * This checks for the illegal condition where curCount > 0 and the state of - * empty = true. This check can be used anywhere a sketch is returned or a sketch is created - * from complete arguments. - * @param empty the given empty state - * @param curCount the given current count - */ //This handles #2 and #6 above - static final void checkIllegalCurCountAndEmpty(final boolean empty, final int curCount) { - if (empty && (curCount != 0)) { //this handles #2 and #6 above - throw new SketchesStateException("Illegal State: Empty=true and Current Count != 0."); - } - } - - /** - * This compute number of preamble longs for a compact sketch based on empty, - * curCount and thetaLong. - * This also accommodates for EmptyCompactSketch and SingleItemSketch. - * @param empty The given empty state - * @param curCount The given current count (retained entries) - * @param thetaLong the current thetaLong - * @return the number of preamble longs - */ - static final int computeCompactPreLongs(final boolean empty, final int curCount, - final long thetaLong) { - return (thetaLong < Long.MAX_VALUE) ? 3 : empty ? 1 : (curCount > 1) ? 2 : 1; - } - - /** - * This checks for the singleItem Compact Sketch. - * @param empty the given empty state - * @param curCount the given curCount - * @param thetaLong the given thetaLong - * @return true if notEmpty, curCount = 1 and theta = 1.0; - */ - static final boolean isSingleItem(final boolean empty, final int curCount, - final long thetaLong) { - return !empty && (curCount == 1) && (thetaLong == Long.MAX_VALUE); - } -} - diff --git a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java b/src/main/java/org/apache/datasketches/theta2/CompactSketch.java deleted file mode 100644 index 38fdd29cc..000000000 --- a/src/main/java/org/apache/datasketches/theta2/CompactSketch.java +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; -import static org.apache.datasketches.common.Family.idToFamily; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT; -import static org.apache.datasketches.theta2.PreambleUtil.extractEntryBitsV4; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractNumEntriesBytesV4; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLongV4; -import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; -import static org.apache.datasketches.theta2.SingleItemSketch.otherCheckForSingleItem; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -/** - * The parent class of all the CompactSketches. CompactSketches are never created directly. - * They are created as a result of the compact() method of an UpdateSketch, a result of a - * getResult() of a SetOperation, or from a heapify method. - * - *

              A CompactSketch is the simplest form of a Theta Sketch. It consists of a compact list - * (i.e., no intervening spaces) of hash values, which may be ordered or not, a value for theta - * and a seed hash. A CompactSketch is immutable (read-only), - * and the space required when stored is only the space required for the hash values and 8 to 24 - * bytes of preamble. An empty CompactSketch consumes only 8 bytes.

              - * - * @author Lee Rhodes - */ -public abstract class CompactSketch extends Sketch { - - /** - * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch. - * - *

              The resulting sketch will not retain any link to the source MemorySegment and all of its data will be - * copied to the heap CompactSketch.

              - * - *

              This method assumes that the sketch image was created with the correct hash seed, so it is not checked. - * The resulting on-heap CompactSketch will be given the seedHash derived from the given sketch image. - * However, Serial Version 1 sketch images do not have a seedHash field, - * so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.

              - * - * @param srcSeg an image of a CompactSketch. - * @return a CompactSketch on the heap. - */ - public static CompactSketch heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED, false); - } - - /** - * Heapify takes a CompactSketch image in a MemorySegment and instantiates an on-heap CompactSketch. - * - *

              The resulting sketch will not retain any link to the source MemorySegment and all of its data will be - * copied to the heap CompactSketch.

              - * - *

              This method checks if the given expectedSeed was used to create the source MemorySegment image. - * However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field, - * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

              - * - * @param srcSeg an image of a CompactSketch that was created using the given expectedSeed. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a CompactSketch on the heap. - */ - public static CompactSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { - return heapify(srcSeg, expectedSeed, true); - } - - private static CompactSketch heapify(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { - final int serVer = extractSerVer(srcSeg); - final int familyID = extractFamilyID(srcSeg); - final Family family = idToFamily(familyID); - if (family != Family.COMPACT) { - throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); - } - if (serVer == 4) { - return heapifyV4(srcSeg, seed, enforceSeed); - } - if (serVer == 3) { - final int flags = extractFlags(srcSeg); - final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0; - final boolean empty = (flags & EMPTY_FLAG_MASK) != 0; - if (enforceSeed && !empty) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } - return CompactOperations.segmentToCompact(srcSeg, srcOrdered, null); - } - //not SerVer 3, assume compact stored form - final short seedHash = Util.computeSeedHash(seed); - if (serVer == 1) { - return ForwardCompatibility.heapify1to3(srcSeg, seedHash); - } - if (serVer == 2) { - return ForwardCompatibility.heapify2to3(srcSeg, - enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); - } - throw new SketchesArgumentException("Unknown Serialization Version: " + serVer); - } - - /** - * Wrap takes the CompactSketch image in given MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * The wrap operation enables fast read-only merging and access to all the public read-only API. - * - *

              Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct sketches can be wrapped. - * Wrapping earlier serial version sketches will result in a heapify operation. - * These early versions were never designed to "wrap".

              - * - *

              Wrapping any subclass of this class that is empty or contains only a single item will - * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall space.

              - * - *

              This method assumes that the sketch image was created with the correct hash seed, so it is not checked. - * However, Serial Version 1 sketch images do not have a seedHash field, - * so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.

              - * - * @param srcSeg an image of a Sketch. - * @return a CompactSketch backed by the given MemorySegment except as above. - */ - public static CompactSketch wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED, false); - } - - /** - * Wrap takes the sketch image in the given MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * The wrap operation enables fast read-only merging and access to all the public read-only API. - * - *

              Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct sketches can be wrapped. - * Wrapping earlier serial version sketches will result in a heapify operation. - * These early versions were never designed to "wrap".

              - * - *

              Wrapping any subclass of this class that is empty or contains only a single item will - * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall space.

              - * - *

              This method checks if the given expectedSeed was used to create the source MemorySegment image. - * However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, - * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

              - * - * @param srcSeg an image of a Sketch that was created using the given expectedSeed. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a CompactSketch backed by the given MemorySegment except as above. - */ - public static CompactSketch wrap(final MemorySegment srcSeg, final long expectedSeed) { - return wrap(srcSeg, expectedSeed, true); - } - - private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { - final int serVer = extractSerVer(srcSeg); - final int familyID = extractFamilyID(srcSeg); - final Family family = Family.idToFamily(familyID); - if (family != Family.COMPACT) { - throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); - } - final short seedHash = Util.computeSeedHash(seed); - - if (serVer == 4) { - return DirectCompactCompressedSketch.wrapInstance(srcSeg, - enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); - } - else if (serVer == 3) { - if (PreambleUtil.isEmptyFlag(srcSeg)) { - return EmptyCompactSketch.getHeapInstance(srcSeg); - } - if (otherCheckForSingleItem(srcSeg)) { - return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); - } - //not empty & not singleItem - final int flags = extractFlags(srcSeg); - final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; - if (!compactFlag) { - throw new SketchesArgumentException( - "Corrupted: COMPACT family sketch image must have compact flag set"); - } - final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; - if (!readOnly) { - throw new SketchesArgumentException( - "Corrupted: COMPACT family sketch image must have Read-Only flag set"); - } - return DirectCompactSketch.wrapInstance(srcSeg, - enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); - } //end of serVer 3 - else if (serVer == 1) { - return ForwardCompatibility.heapify1to3(srcSeg, seedHash); - } - else if (serVer == 2) { - return ForwardCompatibility.heapify2to3(srcSeg, - enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); - } - throw new SketchesArgumentException( - "Corrupted: Serialization Version " + serVer + " not recognized."); - } - - /** - * Wrap takes the sketch image in the given MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * The wrap operation enables fast read-only merging and access to all the public read-only API. - * - *

              Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct sketches can be wrapped. - * Wrapping earlier serial version sketches will result in a heapify operation. - * These early versions were never designed to "wrap".

              - * - *

              Wrapping any subclass of this class that is empty or contains only a single item will - * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall space.

              - * - *

              This method checks if the DEFAULT_UPDATE_SEED was used to create the source MemorySegment image. - * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, - * so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.

              - * - * @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED. - * - * @return a CompactSketch backed by the given MemorySegment except as above. - */ - public static CompactSketch wrap(final byte[] bytes) { - return wrap(bytes, Util.DEFAULT_UPDATE_SEED, false); - } - - /** - * Wrap takes the sketch image in the given MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * The wrap operation enables fast read-only merging and access to all the public read-only API. - * - *

              Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct sketches can be wrapped. - * Wrapping earlier serial version sketches will result in a heapify operation. - * These early versions were never designed to "wrap".

              - * - *

              Wrapping any subclass of this class that is empty or contains only a single item will - * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall space.

              - * - *

              This method checks if the given expectedSeed was used to create the source MemorySegment image. - * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, - * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.

              - * - * @param bytes a byte array image of a Sketch that was created using the given expectedSeed. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a CompactSketch backed by the given MemorySegment except as above. - */ - public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) { - return wrap(bytes, expectedSeed, true); - } - - private static CompactSketch wrap(final byte[] bytes, final long seed, final boolean enforceSeed) { - final int serVer = bytes[PreambleUtil.SER_VER_BYTE]; - final int familyId = bytes[PreambleUtil.FAMILY_BYTE]; - final Family family = Family.idToFamily(familyId); - if (family != Family.COMPACT) { - throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); - } - final short seedHash = Util.computeSeedHash(seed); - if (serVer == 4) { - return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash); - } else if (serVer == 3) { - final int flags = bytes[FLAGS_BYTE]; - if ((flags & EMPTY_FLAG_MASK) > 0) { - return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes)); - } - final int preLongs = bytes[PREAMBLE_LONGS_BYTE]; - if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) { - return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); - } - //not empty & not singleItem - final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; - if (!compactFlag) { - throw new SketchesArgumentException( - "Corrupted: COMPACT family sketch image must have compact flag set"); - } - final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; - if (!readOnly) { - throw new SketchesArgumentException( - "Corrupted: COMPACT family sketch image must have Read-Only flag set"); - } - return WrappedCompactSketch.wrapInstance(bytes, - enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); - } else if (serVer == 1) { - return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash); - } else if (serVer == 2) { - return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes), - enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); - } - throw new SketchesArgumentException( - "Corrupted: Serialization Version " + serVer + " not recognized."); - } - - //Sketch Overrides - - @Override - public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); - - @Override - public int getCompactBytes() { - return getCurrentBytes(); - } - - @Override - int getCurrentDataLongs() { - return getRetainedEntries(true); - } - - @Override - public Family getFamily() { - return Family.COMPACT; - } - - @Override - public boolean hasMemorySegment() { - return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).hasMemorySegment()); - } - - @Override - public boolean isCompact() { - return true; - } - - @Override - public boolean isDirect() { - return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isDirect()); - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isSameResource(that)); - } - - @Override - public double getEstimate() { - return Sketch.estimate(getThetaLong(), getRetainedEntries()); - } - - /** - * gets the sketch as a compressed byte array - * @return the sketch as a compressed byte array - */ - public byte[] toByteArrayCompressed() { - if (!isOrdered() || getRetainedEntries() == 0 || (getRetainedEntries() == 1 && !isEstimationMode())) { - return toByteArray(); - } - return toByteArrayV4(); - } - - private int computeMinLeadingZeros() { - // compression is based on leading zeros in deltas between ordered hash values - // assumes ordered sketch - long previous = 0; - long ored = 0; - final HashIterator it = iterator(); - while (it.next()) { - final long delta = it.get() - previous; - ored |= delta; - previous = it.get(); - } - return Long.numberOfLeadingZeros(ored); - } - - private byte[] toByteArrayV4() { - final int preambleLongs = isEstimationMode() ? 2 : 1; - final int entryBits = 64 - computeMinLeadingZeros(); - final int compressedBits = entryBits * getRetainedEntries(); - - // store num_entries as whole bytes since whole-byte blocks will follow (most probably) - final int numEntriesBytes = wholeBytesToHoldBits(32 - Integer.numberOfLeadingZeros(getRetainedEntries())); - - final int sizeBytes = preambleLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(compressedBits); - final byte[] bytes = new byte[sizeBytes]; - final MemorySegment wseg = MemorySegment.ofArray(bytes); - int offsetBytes = 0; - wseg.set(JAVA_BYTE, offsetBytes++, (byte) preambleLongs); - wseg.set(JAVA_BYTE, offsetBytes++, (byte) 4); // to do: add constant - wseg.set(JAVA_BYTE, offsetBytes++, (byte) Family.COMPACT.getID()); - wseg.set(JAVA_BYTE, offsetBytes++, (byte) entryBits); - wseg.set(JAVA_BYTE, offsetBytes++, (byte) numEntriesBytes); - wseg.set(JAVA_BYTE, offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK)); - wseg.set(JAVA_SHORT_UNALIGNED, offsetBytes, getSeedHash()); - offsetBytes += Short.BYTES; - if (isEstimationMode()) { - wseg.set(JAVA_LONG_UNALIGNED, offsetBytes, getThetaLong()); - offsetBytes += Long.BYTES; - } - int numEntries = getRetainedEntries(); - for (int i = 0; i < numEntriesBytes; i++) { - wseg.set(JAVA_BYTE, offsetBytes++, (byte) (numEntries & 0xff)); - numEntries >>>= 8; - } - long previous = 0; - final long[] deltas = new long[8]; - final HashIterator it = iterator(); - int i; - for (i = 0; i + 7 < getRetainedEntries(); i += 8) { - for (int j = 0; j < 8; j++) { - it.next(); - deltas[j] = it.get() - previous; - previous = it.get(); - } - BitPacking.packBitsBlock8(deltas, 0, bytes, offsetBytes, entryBits); - offsetBytes += entryBits; - } - int offsetBits = 0; - for (; i < getRetainedEntries(); i++) { - it.next(); - final long delta = it.get() - previous; - previous = it.get(); - BitPacking.packBits(delta, entryBits, bytes, offsetBytes, offsetBits); - offsetBytes += (offsetBits + entryBits) >>> 3; - offsetBits = (offsetBits + entryBits) & 7; - } - return bytes; - } - - private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { - final int preLongs = extractPreLongs(srcSeg); - final int entryBits = extractEntryBitsV4(srcSeg); - final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg); - final short seedHash = (short) extractSeedHash(srcSeg); - if (enforceSeed) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } - int offsetBytes = 8; - long theta = Long.MAX_VALUE; - if (preLongs > 1) { - theta = extractThetaLongV4(srcSeg); - offsetBytes += Long.BYTES; - } - int numEntries = 0; - for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(srcSeg.get(JAVA_BYTE, offsetBytes++)) << (i << 3); - } - final long[] entries = new long[numEntries]; - final byte[] bytes = new byte[entryBits]; // temporary buffer for unpacking - int i; - for (i = 0; i + 7 < numEntries; i += 8) { - MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, entryBits); - BitPacking.unpackBitsBlock8(entries, i, bytes, 0, entryBits); - offsetBytes += entryBits; - } - if (i < numEntries) { - MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits)); - int offsetBits = 0; - offsetBytes = 0; - for (; i < numEntries; i++) { - BitPacking.unpackBits(entries, i, entryBits, bytes, offsetBytes, offsetBits); - offsetBytes += (offsetBits + entryBits) >>> 3; - offsetBits = (offsetBits + entryBits) & 7; - } - } - // undo deltas - long previous = 0; - for (i = 0; i < numEntries; i++) { - entries[i] += previous; - previous = entries[i]; - } - return new HeapCompactSketch(entries, false, seedHash, numEntries, theta, true); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java deleted file mode 100644 index f578dc6a1..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentBackgroundThetaPropagation.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import java.util.concurrent.atomic.AtomicBoolean; - -/** - * Background propagation thread. Propagates a given sketch or a hash value from local threads - * buffers into the shared sketch which stores the most up-to-date estimation of number of unique - * items. This propagation is done at the background by dedicated threads, which allows - * application threads to continue updating their local buffer. - * - * @author eshcar - */ -final class ConcurrentBackgroundThetaPropagation implements Runnable { - - // Shared sketch to absorb the data - private final ConcurrentSharedThetaSketch sharedThetaSketch; - - // Propagation flag of local buffer that is being processed. - // It is the synchronization primitive to coordinate the work of the propagation with the - // local buffer. Updated when the propagation completes. - private final AtomicBoolean localPropagationInProgress; - - // Sketch to be propagated to shared sketch. Can be null if only a single hash is propagated - private final Sketch sketchIn; - - // Hash of the datum to be propagated to shared sketch. Can be ConcurrentSharedThetaSketch.NOT_SINGLE_HASH - // if the data is propagated through a sketch. - private final long singleHash; - - // The propagation epoch. The data can be propagated only within the context of this epoch. - // The data should not be propagated if this epoch is not equal to the - // shared sketch epoch. - private final long epoch; - - ConcurrentBackgroundThetaPropagation(final ConcurrentSharedThetaSketch sharedThetaSketch, - final AtomicBoolean localPropagationInProgress, final Sketch sketchIn, final long singleHash, - final long epoch) { - this.sharedThetaSketch = sharedThetaSketch; - this.localPropagationInProgress = localPropagationInProgress; - this.sketchIn = sketchIn; - this.singleHash = singleHash; - this.epoch = epoch; - } - - /** - * Propagation protocol: - * 1) validate propagation is executed at the context of the right epoch, otherwise abort - * 2) handle propagation: either of a single hash or of a sketch - * 3) complete propagation: ping local buffer - */ - @Override - public void run() { - // 1) validate propagation is executed at the context of the right epoch, otherwise abort - if (!sharedThetaSketch.validateEpoch(epoch)) { - // invalid epoch - should not propagate - sharedThetaSketch.endPropagation(null, false); - return; - } - - // 2) handle propagation: either of a single hash or of a sketch - if (singleHash != ConcurrentSharedThetaSketch.NOT_SINGLE_HASH) { - sharedThetaSketch.propagate(singleHash); - } else if (sketchIn != null) { - final long volTheta = sharedThetaSketch.getVolatileTheta(); - assert volTheta <= sketchIn.getThetaLong() : - "volTheta = " + volTheta + ", bufTheta = " + sketchIn.getThetaLong(); - - // propagate values from input sketch one by one - final long[] cacheIn = sketchIn.getCache(); - - if (sketchIn.isOrdered()) { //Ordered compact, Use early stop - for (final long hashIn : cacheIn) { - if (hashIn >= volTheta) { - break; //early stop - } - sharedThetaSketch.propagate(hashIn); - } - } else { //not ordered, also may have zeros (gaps) in the array. - for (final long hashIn : cacheIn) { - if (hashIn > 0) { - sharedThetaSketch.propagate(hashIn); - } - } - } - } - - // 3) complete propagation: ping local buffer - sharedThetaSketch.endPropagation(localPropagationInProgress, false); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java deleted file mode 100644 index 317db8180..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentDirectQuickSelectSketch.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; - -import java.lang.foreign.MemorySegment; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SuppressFBWarnings; - -/** - * A concurrent shared sketch that is based on DirectQuickSelectSketch. - * It reflects all data processed by a single or multiple update threads, and can serve queries at - * any time. - * Background propagation threads are used to propagate data from thread local buffers into this - * sketch which stores the most up-to-date estimation of number of unique items. - * - * @author eshcar - * @author Lee Rhodes - */ -final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch - implements ConcurrentSharedThetaSketch { - - // The propagation thread - private ExecutorService executorService_; - - // A flag to coordinate between several eager propagation threads - private final AtomicBoolean sharedPropagationInProgress_; - - // Theta value of concurrent sketch - private volatile long volatileThetaLong_; - - // A snapshot of the estimated number of unique entries - private volatile double volatileEstimate_; - - // Num of retained entries in which the sketch toggles from sync (exact) mode to async - // propagation mode - private final long exactLimit_; - - // An epoch defines an interval between two resets. A propagation invoked at epoch i cannot - // affect the sketch at epoch j > i. - private volatile long epoch_; - - /** - * Construct a new sketch instance and initialize the given MemorySegment as its backing store. - * - * @param lgNomLongs See lgNomLongs. - * @param seed See Update Hash Seed. - * @param maxConcurrencyError the max error value including error induced by concurrency. - * @param dstSeg the given MemorySegment object destination. It cannot be null. - */ - ConcurrentDirectQuickSelectSketch(final int lgNomLongs, final long seed, - final double maxConcurrencyError, final MemorySegment dstSeg) { - super(lgNomLongs, seed, 1.0F, //p - ResizeFactor.X1, //rf, - dstSeg, false); //unionGadget - - volatileThetaLong_ = Long.MAX_VALUE; - volatileEstimate_ = 0; - exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), - maxConcurrencyError); - sharedPropagationInProgress_ = new AtomicBoolean(false); - epoch_ = 0; - initBgPropagationService(); - } - - ConcurrentDirectQuickSelectSketch(final UpdateSketch sketch, final long seed, - final double maxConcurrencyError, final MemorySegment dstSeg) { - super(sketch.getLgNomLongs(), seed, 1.0F, //p - ResizeFactor.X1, //rf, - dstSeg, - false); //unionGadget - - exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), - maxConcurrencyError); - sharedPropagationInProgress_ = new AtomicBoolean(false); - epoch_ = 0; - initBgPropagationService(); - for (final long hashIn : sketch.getCache()) { - propagate(hashIn); - } - wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, sketch.getThetaLong()); - updateVolatileTheta(); - updateEstimationSnapshot(); - } - - //Sketch overrides - - @Override - public double getEstimate() { - return volatileEstimate_; - } - - @Override - public boolean isEstimationMode() { - return (getRetainedEntries(false) > exactLimit_) || super.isEstimationMode(); - } - - @Override - public byte[] toByteArray() { - while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free - final byte[] res = super.toByteArray(); - sharedPropagationInProgress_.set(false); - return res; - } - - //UpdateSketch overrides - - @Override - public UpdateSketch rebuild() { - super.rebuild(); - updateEstimationSnapshot(); - return this; - } - - /** - * {@inheritDoc} - * Takes care of mutual exclusion with propagation thread. - */ - @Override - public void reset() { - advanceEpoch(); - super.reset(); - volatileThetaLong_ = Long.MAX_VALUE; - volatileEstimate_ = 0; - } - - @Override - UpdateReturnState hashUpdate(final long hash) { - final String msg = "No update method should be called directly to a shared theta sketch." - + " Updating the shared sketch is only permitted through propagation from local sketches."; - throw new UnsupportedOperationException(msg); - } - - //ConcurrentSharedThetaSketch declarations - - @Override - public long getExactLimit() { - return exactLimit_; - } - - @Override - public boolean startEagerPropagation() { - while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free - return (!isEstimationMode());// no eager propagation is allowed in estimation mode - } - - @Override - public void endPropagation(final AtomicBoolean localPropagationInProgress, final boolean isEager) { - //update volatile theta, uniques estimate and propagation flag - updateVolatileTheta(); - updateEstimationSnapshot(); - if (isEager) { - sharedPropagationInProgress_.set(false); - } - if (localPropagationInProgress != null) { - localPropagationInProgress.set(false); //clear local propagation flag - } - } - - @Override - public long getVolatileTheta() { - return volatileThetaLong_; - } - - @Override - public void awaitBgPropagationTermination() { - try { - executorService_.shutdown(); - while (!executorService_.awaitTermination(1, TimeUnit.MILLISECONDS)) { - Thread.sleep(1); - } - } catch (final InterruptedException e) { - e.printStackTrace(); - } - } - - @Override - public final void initBgPropagationService() { - executorService_ = ConcurrentPropagationService.getExecutorService(Thread.currentThread().threadId()); - } - - @Override - public boolean propagate(final AtomicBoolean localPropagationInProgress, - final Sketch sketchIn, final long singleHash) { - final long epoch = epoch_; - if ((singleHash != NOT_SINGLE_HASH) // namely, is a single hash and - && (getRetainedEntries(false) < exactLimit_)) { // a small sketch then propagate myself (blocking) - if (!startEagerPropagation()) { - endPropagation(localPropagationInProgress, true); - return false; - } - if (!validateEpoch(epoch)) { - endPropagation(null, true); // do not change local flag - return true; - } - propagate(singleHash); - endPropagation(localPropagationInProgress, true); - return true; - } - // otherwise, be nonblocking, let background thread do the work - final ConcurrentBackgroundThetaPropagation job = new ConcurrentBackgroundThetaPropagation( - this, localPropagationInProgress, sketchIn, singleHash, epoch); - executorService_.execute(job); - return true; - } - - @Override - public void propagate(final long singleHash) { - super.hashUpdate(singleHash); - } - - @Override - public void updateEstimationSnapshot() { - volatileEstimate_ = super.getEstimate(); - } - - @Override - public void updateVolatileTheta() { - volatileThetaLong_ = getThetaLong(); - } - - @Override - public boolean validateEpoch(final long epoch) { - return epoch_ == epoch; - } - - //Restricted - - /** - * Advances the epoch while there is no background propagation - * This ensures a propagation invoked before the reset cannot affect the sketch after the reset - * is completed. Ignore VO_VOLATILE_INCREMENT findbugs warning, it is False Positive. - */ - @SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "Likely False Positive, Fix Later") - private void advanceEpoch() { - awaitBgPropagationTermination(); - startEagerPropagation(); - ConcurrentPropagationService.resetExecutorService(Thread.currentThread().threadId()); - //no inspection NonAtomicOperationOnVolatileField - // this increment of a volatile field is done within the scope of the propagation - // synchronization and hence is done by a single thread. - epoch_++; - endPropagation(null, true); - initBgPropagationService(); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java deleted file mode 100644 index 56e254b51..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketch.java +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SuppressFBWarnings; - -/** - * A concurrent shared sketch that is based on HeapQuickSelectSketch. - * It reflects all data processed by a single or multiple update threads, and can serve queries at - * any time. - * Background propagation threads are used to propagate data from thread local buffers into this - * sketch which stores the most up-to-date estimation of number of unique items. - * - * @author eshcar - * @author Lee Rhodes - */ -final class ConcurrentHeapQuickSelectSketch extends HeapQuickSelectSketch - implements ConcurrentSharedThetaSketch { - - // The propagation thread - private volatile ExecutorService executorService_; - - //A flag to coordinate between several eager propagation threads - private final AtomicBoolean sharedPropagationInProgress_; - - // Theta value of concurrent sketch - private volatile long volatileThetaLong_; - - // A snapshot of the estimated number of unique entries - private volatile double volatileEstimate_; - - // Num of retained entries in which the sketch toggles from sync (exact) mode to async - // propagation mode - private final long exactLimit_; - - // An epoch defines an interval between two resets. A propagation invoked at epoch i cannot - // affect the sketch at epoch j > i. - private volatile long epoch_; - - /** - * Construct a new sketch instance on the java heap. - * - * @param lgNomLongs See lgNomLongs. - * @param seed See seed - * @param maxConcurrencyError the max error value including error induced by concurrency - * - */ - ConcurrentHeapQuickSelectSketch(final int lgNomLongs, final long seed, - final double maxConcurrencyError) { - super(lgNomLongs, seed, 1.0F, //p - ResizeFactor.X1, //rf, - false); //unionGadget - - volatileThetaLong_ = Long.MAX_VALUE; - volatileEstimate_ = 0; - exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), - maxConcurrencyError); - sharedPropagationInProgress_ = new AtomicBoolean(false); - epoch_ = 0; - initBgPropagationService(); - } - - ConcurrentHeapQuickSelectSketch(final UpdateSketch sketch, final long seed, - final double maxConcurrencyError) { - super(sketch.getLgNomLongs(), seed, 1.0F, //p - ResizeFactor.X1, //rf, - false); //unionGadget - - exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), - maxConcurrencyError); - sharedPropagationInProgress_ = new AtomicBoolean(false); - epoch_ = 0; - initBgPropagationService(); - for (final long hashIn : sketch.getCache()) { - propagate(hashIn); - } - thetaLong_ = sketch.getThetaLong(); - updateVolatileTheta(); - updateEstimationSnapshot(); - } - - //Sketch overrides - - @Override - public double getEstimate() { - return volatileEstimate_; - } - - @Override - public boolean isEstimationMode() { - return (getRetainedEntries(false) > exactLimit_) || super.isEstimationMode(); - } - - @Override - public byte[] toByteArray() { - while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free - final byte[] res = super.toByteArray(); - sharedPropagationInProgress_.set(false); - return res; - } - - //UpdateSketch overrides - - @Override - public UpdateSketch rebuild() { - super.rebuild(); - updateEstimationSnapshot(); - return this; - } - - /** - * {@inheritDoc} - * Takes care of mutual exclusion with propagation thread. - */ - @Override - public void reset() { - advanceEpoch(); - super.reset(); - volatileThetaLong_ = Long.MAX_VALUE; - volatileEstimate_ = 0; - } - - @Override - UpdateReturnState hashUpdate(final long hash) { - final String msg = "No update method should be called directly to a shared theta sketch." - + " Updating the shared sketch is only permitted through propagation from local sketches."; - throw new UnsupportedOperationException(msg); - } - - //ConcurrentSharedThetaSketch declarations - - @Override - public long getExactLimit() { - return exactLimit_; - } - - @Override - public boolean startEagerPropagation() { - while (!sharedPropagationInProgress_.compareAndSet(false, true)) { } //busy wait till free - return (!isEstimationMode());// no eager propagation is allowed in estimation mode - } - - @Override - public void endPropagation(final AtomicBoolean localPropagationInProgress, final boolean isEager) { - //update volatile theta, uniques estimate and propagation flag - updateVolatileTheta(); - updateEstimationSnapshot(); - if (isEager) { - sharedPropagationInProgress_.set(false); - } - if (localPropagationInProgress != null) { - localPropagationInProgress.set(false); //clear local propagation flag - } - } - - @Override - public long getVolatileTheta() { - return volatileThetaLong_; - } - - @Override - public void awaitBgPropagationTermination() { - try { - executorService_.shutdown(); - while (!executorService_.awaitTermination(1, TimeUnit.MILLISECONDS)) { - Thread.sleep(1); - } - } catch (final InterruptedException e) { - e.printStackTrace(); - } - } - - @Override - public void initBgPropagationService() { - executorService_ = ConcurrentPropagationService.getExecutorService(Thread.currentThread().threadId()); - } - - @Override - public boolean propagate(final AtomicBoolean localPropagationInProgress, - final Sketch sketchIn, final long singleHash) { - final long epoch = epoch_; - if ((singleHash != NOT_SINGLE_HASH) //namely, is a single hash and - && (getRetainedEntries(false) < exactLimit_)) { //a small sketch then propagate myself (blocking) - if (!startEagerPropagation()) { - endPropagation(localPropagationInProgress, true); - return false; - } - if (!validateEpoch(epoch)) { - endPropagation(null, true); // do not change local flag - return true; - } - propagate(singleHash); - endPropagation(localPropagationInProgress, true); - return true; - } - // otherwise, be nonblocking, let background thread do the work - final ConcurrentBackgroundThetaPropagation job = new ConcurrentBackgroundThetaPropagation( - this, localPropagationInProgress, sketchIn, singleHash, epoch); - executorService_.execute(job); - return true; - } - - @Override - public void propagate(final long singleHash) { - super.hashUpdate(singleHash); - } - - @Override - public void updateEstimationSnapshot() { - volatileEstimate_ = super.getEstimate(); - } - - @Override - public void updateVolatileTheta() { - volatileThetaLong_ = getThetaLong(); - } - - @Override - public boolean validateEpoch(final long epoch) { - return epoch_ == epoch; - } - - //Restricted - - /** - * Advances the epoch while there is no background propagation - * This ensures a propagation invoked before the reset cannot affect the sketch after the reset - * is completed. - */ - @SuppressFBWarnings(value = "VO_VOLATILE_INCREMENT", justification = "Likely False Positive, Fix Later") - private void advanceEpoch() { - awaitBgPropagationTermination(); - startEagerPropagation(); - ConcurrentPropagationService.resetExecutorService(Thread.currentThread().threadId()); - //no inspection NonAtomicOperationOnVolatileField - // this increment of a volatile field is done within the scope of the propagation - // synchronization and hence is done by a single thread - // Ignore a FindBugs warning - epoch_++; - endPropagation(null, true); - initBgPropagationService(); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java deleted file mode 100644 index b2867b5a5..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentHeapThetaBuffer.java +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentBufferInserted; -import static org.apache.datasketches.theta2.UpdateReturnState.ConcurrentPropagated; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; - -import java.lang.foreign.MemorySegment; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.thetacommon2.HashOperations; - -/** - * This is a theta filtering, bounded size buffer that operates in the context of a single writing - * thread. When the buffer becomes full its content is propagated into the shared sketch, which - * may be on a different thread. The limit on the buffer size is configurable. A bound of size 1 - * allows the combination of buffers and shared sketch to maintain an error bound in real-time - * that is close to the error bound of a sequential theta sketch. Allowing larger buffer sizes - * enables amortization of the cost propagations and substantially improves overall system throughput. - * The error caused by the buffering is essentially a perspective of time and synchronization - * and not really a true error. At the end of a stream, after all the buffers have synchronized with - * the shared sketch, there is no additional error. - * Propagation is done either synchronously by the updating thread, or asynchronously by a - * background propagation thread. - * - *

              This is a buffer, not a sketch, and it extends the HeapQuickSelectSketch - * in order to leverage some of the sketch machinery to make its work simple. However, if this - * buffer receives a query, like getEstimate(), the correct answer does not come from the super - * HeapQuickSelectSketch, which knows nothing about the concurrency relationship to the - * shared concurrent sketch, it must come from the shared concurrent sketch. As a result nearly all - * of the inherited sketch methods are redirected to the shared concurrent sketch. - * - * @author eshcar - * @author Lee Rhodes - */ -final class ConcurrentHeapThetaBuffer extends HeapQuickSelectSketch { - - // Shared sketch consisting of the global sample set and theta value. - private final ConcurrentSharedThetaSketch shared; - - // A flag indicating whether the shared sketch is in shared mode and requires eager propagation - // Initially this is true. Once it is set to false (estimation mode) it never flips back. - private boolean isExactMode; - - // A flag to indicate if we expect the propagated data to be ordered - private final boolean propagateOrderedCompact; - - // Propagation flag is set to true while propagation is in progress (or pending). - // It is the synchronization primitive to coordinate the work with the propagation thread. - private final AtomicBoolean localPropagationInProgress; - - ConcurrentHeapThetaBuffer(final int lgNomLongs, final long seed, - final ConcurrentSharedThetaSketch shared, final boolean propagateOrderedCompact, - final int maxNumLocalThreads) { - super(computeLogBufferSize(lgNomLongs, shared.getExactLimit(), maxNumLocalThreads), - seed, 1.0F, //p - ResizeFactor.X1, //rf - false); //not a union gadget - - this.shared = shared; - isExactMode = true; - this.propagateOrderedCompact = propagateOrderedCompact; - localPropagationInProgress = new AtomicBoolean(false); - } - - private static int computeLogBufferSize(final int lgNomLongs, final long exactSize, - final int maxNumLocalBuffers) { - return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers))); - } - - //concurrent restricted methods - - /** - * Propagates a single hash value to the shared sketch - * - * @param hash to be propagated - */ - private boolean propagateToSharedSketch(final long hash) { - //no inspection StatementWithEmptyBody - while (localPropagationInProgress.get()) { - } //busy wait until previous propagation completed - localPropagationInProgress.set(true); - final boolean res = shared.propagate(localPropagationInProgress, null, hash); - //in this case the parent empty_ and curCount_ were not touched - thetaLong_ = shared.getVolatileTheta(); - return res; - } - - /** - * Propagates the content of the buffer as a sketch to the shared sketch - */ - private void propagateToSharedSketch() { - //no inspection StatementWithEmptyBody - while (localPropagationInProgress.get()) { - } //busy wait until previous propagation completed - - final CompactSketch compactSketch = compact(propagateOrderedCompact, null); - localPropagationInProgress.set(true); - shared.propagate(localPropagationInProgress, compactSketch, - ConcurrentSharedThetaSketch.NOT_SINGLE_HASH); - super.reset(); - thetaLong_ = shared.getVolatileTheta(); - } - - //Public Sketch overrides proxies to shared concurrent sketch - - @Override - public int getCompactBytes() { - return shared.getCompactBytes(); - } - - @Override - public int getCurrentBytes() { - return shared.getCurrentBytes(); - } - - @Override - public double getEstimate() { - return shared.getEstimate(); - } - - @Override - public double getLowerBound(final int numStdDev) { - return shared.getLowerBound(numStdDev); - } - - @Override - public double getUpperBound(final int numStdDev) { - return shared.getUpperBound(numStdDev); - } - - @Override - public boolean hasMemorySegment() { - return shared.hasMemorySegment(); - } - - @Override - public boolean isDirect() { - return shared.isDirect(); - } - - @Override - public boolean isEmpty() { - return shared.isEmpty(); - } - - @Override - public boolean isEstimationMode() { - return shared.isEstimationMode(); - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return shared.isSameResource(that); - } - - //End of proxies - - @Override - public byte[] toByteArray() { - throw new UnsupportedOperationException("Local theta buffer need not be serialized"); - } - - //Public UpdateSketch overrides - - @Override - public void reset() { - super.reset(); - isExactMode = true; - localPropagationInProgress.set(false); - } - - //Restricted UpdateSketch overrides - - /** - * Updates buffer with given hash value. - * Triggers propagation to shared sketch if buffer is full. - * - * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. - * A negative hash value will throw an exception. - * @return - * See Update Return State - */ - @Override - UpdateReturnState hashUpdate(final long hash) { - if (isExactMode) { - isExactMode = !shared.isEstimationMode(); - } - HashOperations.checkHashCorruption(hash); - if ((getHashTableThreshold() == 0) || isExactMode ) { - //The over-theta and zero test - if (HashOperations.continueCondition(getThetaLong(), hash)) { - return RejectedOverTheta; //signal that hash was rejected due to theta or zero. - } - if (propagateToSharedSketch(hash)) { - return ConcurrentPropagated; - } - } - final UpdateReturnState state = super.hashUpdate(hash); - if (isOutOfSpace(getRetainedEntries(true) + 1)) { - propagateToSharedSketch(); - return ConcurrentPropagated; - } - if (state == UpdateReturnState.InsertedCountIncremented) { - return ConcurrentBufferInserted; - } - return state; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java deleted file mode 100644 index 92ca954fa..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentPropagationService.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -import org.apache.datasketches.common.SuppressFBWarnings; - -/** - * Pool of threads to serve all propagation tasks in the system. - * - * @author Eshcar Hillel - */ -final class ConcurrentPropagationService { - - static int NUM_POOL_THREADS = 3; // Default: 3 threads - private static volatile ConcurrentPropagationService instance = null; // Singleton - private static ExecutorService[] propagationExecutorService = null; - - @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "Fix later") - private ConcurrentPropagationService() { - propagationExecutorService = new ExecutorService[NUM_POOL_THREADS]; - } - - //Factory: Get the singleton - @SuppressFBWarnings(value = "SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA", justification = "Fix later") - private static ConcurrentPropagationService getInstance() { - if (instance == null) { - synchronized (ConcurrentPropagationService.class) { - if (instance == null) { - instance = new ConcurrentPropagationService(); //SpotBugs: SSD_DO_NOT_USE_INSTANCE_LOCK_ON_SHARED_STATIC_DATA - } - } - } - return instance; - } - - public static ExecutorService getExecutorService(final long id) { - return getInstance().initExecutorService((int) id % NUM_POOL_THREADS); - } - - @SuppressWarnings("static-access") - public static ExecutorService resetExecutorService(final long id) { - return getInstance().propagationExecutorService[(int) id % NUM_POOL_THREADS] = null; - } - - @SuppressWarnings("static-method") - private ExecutorService initExecutorService(final int i) { - if (propagationExecutorService[i] == null) { - propagationExecutorService[i] = Executors.newSingleThreadExecutor(); - } - return propagationExecutorService[i]; - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java deleted file mode 100644 index 5c89b3e68..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ConcurrentSharedThetaSketch.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import java.lang.foreign.MemorySegment; -import java.util.concurrent.atomic.AtomicBoolean; - -import org.apache.datasketches.common.MemorySegmentStatus; - -/** - * An internal interface to define the API of a concurrent shared theta sketch. - * It reflects all data processed by a single or multiple update threads, and can serve queries at - * any time. - * - * @author eshcar - */ -interface ConcurrentSharedThetaSketch extends MemorySegmentStatus { - - long NOT_SINGLE_HASH = -1L; - double MIN_ERROR = 0.0000001; - - static long computeExactLimit(final long k, final double error) { - return 2 * Math.min(k, (long) Math.ceil(1.0 / Math.pow(Math.max(error,MIN_ERROR), 2.0))); - } - - /** - * Returns flip point (number of updates) from exact to estimate mode. - * @return flip point from exact to estimate mode - */ - long getExactLimit(); - - /** - * Ensures mutual exclusion. No other thread can update the shared sketch while propagation is - * in progress - * @return true if eager propagation was started - */ - boolean startEagerPropagation(); - - /** - * Completes the propagation: end mutual exclusion block. - * Notifies the local thread the propagation is completed - * - * @param localPropagationInProgress the synchronization primitive through which propagator - * notifies local thread the propagation is completed - * @param isEager true if the propagation is in eager mode - */ - void endPropagation(AtomicBoolean localPropagationInProgress, boolean isEager); - - /** - * Returns the value of the volatile theta managed by the shared sketch - * @return the value of the volatile theta managed by the shared sketch - */ - long getVolatileTheta(); - - /** - * Awaits termination of background (lazy) propagation tasks - */ - void awaitBgPropagationTermination(); - - /** - * Init background (lazy) propagation service - */ - void initBgPropagationService(); - - /** - * (Eager) Propagates the given sketch or hash value into this sketch - * @param localPropagationInProgress the flag to be updated when propagation is done - * @param sketchIn any Theta sketch with the data - * @param singleHash a single hash value - * @return true if propagation successfully started - */ - boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch sketchIn, - final long singleHash); - - /** - * (Lazy/Eager) Propagates the given hash value into this sketch - * @param singleHash a single hash value - */ - void propagate(final long singleHash); - - /** - * Updates the estimation of the number of unique entries by capturing a snapshot of the sketch - * data, namely, volatile theta and the num of valid entries in the sketch - */ - void updateEstimationSnapshot(); - - /** - * Updates the value of the volatile theta by extracting it from the underlying sketch managed - * by the shared sketch - */ - void updateVolatileTheta(); - - /** - * Validates the shared sketch is in the context of the given epoch - * - * @param epoch the epoch number to be validates - * @return true iff the shared sketch is in the context of the given epoch - */ - boolean validateEpoch(long epoch); - - //The following mirrors are public methods that already exist on the "extends" side of the dual - // inheritance. They are provided here to allow casts to this interface access - // to these methods without having to cast back to the extended parent class. - // - //This allows an internal class to cast either the Concurrent Direct or Concurrent Heap - //shared class to this interface and have access to the above special concurrent methods as - //well as the methods below. - // - //For the external user all of the below methods can be obtained by casting the shared - //sketch to UpdateSketch. However, these methods here also act as an alias so that an - //attempt to access these methods from the local buffer will be diverted to the shared - //sketch. - - //From Sketch and MemoryStatus - - int getCompactBytes(); - - int getCurrentBytes(); - - double getEstimate(); - - double getLowerBound(int numStdDev); - - double getUpperBound(int numStdDev); - - boolean isEmpty(); - - boolean isEstimationMode(); - - byte[] toByteArray(); - - int getRetainedEntries(boolean valid); - - CompactSketch compact(); - - CompactSketch compact(boolean ordered, MemorySegment wseg); - - UpdateSketch rebuild(); - - void reset(); -} - diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java deleted file mode 100644 index 288f5ba42..000000000 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactCompressedSketch.java +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.extractEntryBitsV4; -import static org.apache.datasketches.theta2.PreambleUtil.extractNumEntriesBytesV4; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLongV4; -import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; - -/** - * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. - * - *

              This sketch can only be associated with a Serialization Version 4 format binary image.

              - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - */ -final class DirectCompactCompressedSketch extends DirectCompactSketch { - /** - * Construct this sketch with the given MemorySegment. - * @param seg Read-only MemorySegment object. - */ - DirectCompactCompressedSketch(final MemorySegment seg) { - super(seg); - } - - /** - * Wraps the given MemorySegment, which must be a SerVer 4 compressed CompactSketch image. - * Must check the validity of the MemorySegment before calling. - * @param srcSeg The source MemorySegment - * @param seedHash The update seedHash. - * See Seed Hash. - * @return this sketch - */ - static DirectCompactCompressedSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { - Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); - return new DirectCompactCompressedSketch(srcSeg); - } - - //Sketch Overrides - - @Override - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - if (dstSeg != null) { - MemorySegment.copy(seg_, 0, dstSeg, 0, getCurrentBytes()); - return new DirectCompactSketch(dstSeg); - } - return CompactSketch.heapify(seg_); - } - - @Override - public int getCurrentBytes() { - final int preLongs = extractPreLongs(seg_); - final int entryBits = extractEntryBitsV4(seg_); - final int numEntriesBytes = extractNumEntriesBytesV4(seg_); - return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits); - } - - private static final int START_PACKED_DATA_EXACT_MODE = 8; - private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; - - @Override - public int getRetainedEntries(final boolean valid) { //compact is always valid - // number of entries is stored using variable length encoding - // most significant bytes with all zeros are not stored - // one byte in the preamble has the number of non-zero bytes used - final int preLongs = extractPreLongs(seg_); // if > 1 then the second long has theta - final int numEntriesBytes = extractNumEntriesBytesV4(seg_); - int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE; - int numEntries = 0; - for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(seg_.get(JAVA_BYTE, offsetBytes++)) << (i << 3); - } - return numEntries; - } - - @Override - public long getThetaLong() { - final int preLongs = extractPreLongs(seg_); - return (preLongs > 1) ? extractThetaLongV4(seg_) : Long.MAX_VALUE; - } - - @Override - public boolean isEmpty() { - return false; - } - - @Override - public boolean isOrdered() { - return true; - } - - @Override - public HashIterator iterator() { - return new MemorySegmentCompactCompressedHashIterator( - seg_, - (extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) - + extractNumEntriesBytesV4(seg_), - extractEntryBitsV4(seg_), - getRetainedEntries() - ); - } - - //restricted methods - - @Override - long[] getCache() { - final int numEntries = getRetainedEntries(); - final long[] cache = new long[numEntries]; - int i = 0; - final HashIterator it = iterator(); - while (it.next()) { - cache[i++] = it.get(); - } - return cache; - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java deleted file mode 100644 index 689166a06..000000000 --- a/src/main/java/org/apache/datasketches/theta2/DirectCompactSketch.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta2.CompactOperations.segmentToCompact; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.SingleItemSketch.otherCheckForSingleItem; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; - -/** - * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered - * or unordered. It is not empty, not a single item. - * - *

              This sketch can only be associated with a Serialization Version 3 format binary image.

              - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - * - * @author Lee Rhodes - */ -class DirectCompactSketch extends CompactSketch { - final MemorySegment seg_; - - /** - * Construct this sketch with the given MemorySegment. - * @param seg Read-only MemorySegment object with the order bit properly set. - */ - DirectCompactSketch(final MemorySegment seg) { - seg_ = seg; - } - - /** - * Wraps the given MemorySegment, which must be a SerVer 3, CompactSketch image. - * Must check the validity of the MemorySegment before calling. The order bit must be set properly. - * @param srcSeg the given MemorySegment - * @param seedHash The update seedHash. - * See Seed Hash. - * @return this sketch - */ - static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { - Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); - return new DirectCompactSketch(srcSeg); - } - - //Sketch Overrides - - @Override - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - return segmentToCompact(seg_, dstOrdered, dstSeg); - } - - @Override - public int getCurrentBytes() { - if (otherCheckForSingleItem(seg_)) { return 16; } - final int preLongs = extractPreLongs(seg_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); - return (preLongs + curCount) << 3; - } - - @Override - public int getRetainedEntries(final boolean valid) { //compact is always valid - if (otherCheckForSingleItem(seg_)) { return 1; } - final int preLongs = extractPreLongs(seg_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); - return curCount; - } - - @Override - public long getThetaLong() { - final int preLongs = extractPreLongs(seg_); - return (preLongs > 2) ? extractThetaLong(seg_) : Long.MAX_VALUE; - } - - @Override - public boolean hasMemorySegment() { - return seg_ != null && seg_.scope().isAlive(); - } - - @Override - public boolean isDirect() { - return hasMemorySegment() && seg_.isNative(); - } - - @Override - public boolean isEmpty() { - final boolean emptyFlag = PreambleUtil.isEmptyFlag(seg_); - final long thetaLong = getThetaLong(); - final int curCount = getRetainedEntries(true); - return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); - } - - @Override - public boolean isOrdered() { - return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0; - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return hasMemorySegment() && Util.isSameResource(seg_, that); - - } - - @Override - public HashIterator iterator() { - return new MemorySegmentHashIterator(seg_, getRetainedEntries(true), getThetaLong()); - } - - @Override - public byte[] toByteArray() { - checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries()); - final int outBytes = getCurrentBytes(); - final byte[] byteArrOut = new byte[outBytes]; - MemorySegment.copy(seg_, JAVA_BYTE, 0, byteArrOut, 0, outBytes); - return byteArrOut; - } - - //restricted methods - - @Override - long[] getCache() { - if (otherCheckForSingleItem(seg_)) { return new long[] { seg_.get(JAVA_LONG_UNALIGNED, 8) }; } - final int preLongs = extractPreLongs(seg_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); - if (curCount > 0) { - final long[] cache = new long[curCount]; - MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, preLongs << 3, cache, 0, curCount); - return cache; - } - return new long[0]; - } - - @Override - int getCompactPreambleLongs() { - return extractPreLongs(seg_); - } - - @Override - int getCurrentPreambleLongs() { - return extractPreLongs(seg_); - } - - @Override - MemorySegment getMemorySegment() { - return seg_; - } - - @Override - short getSeedHash() { - return (short) extractSeedHash(seg_); - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java deleted file mode 100644 index 29e60a180..000000000 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketch.java +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT; -import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.getSegBytes; -import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgNomLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor; -import static org.apache.datasketches.theta2.PreambleUtil.insertP; -import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong; -import static org.apache.datasketches.theta2.Rebuilder.actLgResizeFactor; -import static org.apache.datasketches.theta2.Rebuilder.moveAndResize; -import static org.apache.datasketches.theta2.Rebuilder.quickSelectAndRebuild; -import static org.apache.datasketches.theta2.Rebuilder.resize; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedRebuilt; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * The default Theta Sketch using the QuickSelect algorithm. - * This subclass implements methods, which affect the state (update, rebuild, reset) - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - * - * @author Lee Rhodes - * @author Kevin Lang - */ -class DirectQuickSelectSketch extends DirectQuickSelectSketchR { - - private DirectQuickSelectSketch( - final long seed, - final MemorySegment wseg) { - super(seed, wseg); - } - - /** - * Construct a new sketch instance and initialize the given MemorySegment as its backing store. - * - * @param lgNomLongs See lgNomLongs. - * @param seed See Update Hash Seed. - * @param p - * See Sampling Probability, p - * @param rf Resize Factor - * See Resize Factor - * @param dstSeg the given MemorySegment object destination. It cannot be null. - * It will be cleared prior to use. - * @param unionGadget true if this sketch is implementing the Union gadget function. - * Otherwise, it is behaving as a normal QuickSelectSketch. - */ - DirectQuickSelectSketch( - final int lgNomLongs, - final long seed, - final float p, - final ResizeFactor rf, - final MemorySegment dstSeg, - final boolean unionGadget) { - this( - checkSegSize(lgNomLongs, rf, dstSeg, unionGadget), - //SpotBugs CT_CONSTRUCTOR_THROW is false positive. - //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J - lgNomLongs, - seed, - p, - rf, - dstSeg, - unionGadget); - } - - private DirectQuickSelectSketch( - final boolean secure, //required part of Finalizer Attack prevention - final int lgNomLongs, - final long seed, - final float p, - final ResizeFactor rf, - final MemorySegment dstSeg, - final boolean unionGadget) { - super(seed, dstSeg); - //Choose family, preambleLongs - final Family family; - final int preambleLongs; - if (unionGadget) { - preambleLongs = Family.UNION.getMinPreLongs(); - family = Family.UNION; - } - else { - preambleLongs = Family.QUICKSELECT.getMinPreLongs(); - family = Family.QUICKSELECT; - } - - //Choose RF, minReqBytes, lgArrLongs. - final int lgRF = rf.lg(); - final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - - //@formatter:off - //Build preamble - insertPreLongs(dstSeg, preambleLongs); //byte 0 - insertLgResizeFactor(dstSeg, lgRF); //byte 0 - insertSerVer(dstSeg, SER_VER); //byte 1 - insertFamilyID(dstSeg, family.getID()); //byte 2 - insertLgNomLongs(dstSeg, lgNomLongs); //byte 3 - insertLgArrLongs(dstSeg, lgArrLongs); //byte 4 - //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4 - insertFlags(dstSeg, EMPTY_FLAG_MASK); //byte 5 - insertSeedHash(dstSeg, Util.computeSeedHash(seed)); //bytes 6,7 - insertCurCount(dstSeg, 0); //bytes 8-11 - insertP(dstSeg, p); //bytes 12-15 - final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); - insertThetaLong(dstSeg, thetaLong); //bytes 16-23 - if (unionGadget) { - insertUnionThetaLong(dstSeg, thetaLong); - } - //@formatter:on - - //clear hash table area - dstSeg.asSlice(preambleLongs << 3, Long.BYTES << lgArrLongs).fill((byte)0); - - hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - } - - private static final boolean checkSegSize( - final int lgNomLongs, final ResizeFactor rf, final MemorySegment dstSeg, final boolean unionGadget) { - final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); - final int lgRF = rf.lg(); - final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); - final long curSegCapBytes = dstSeg.byteSize(); - if (curSegCapBytes < minReqBytes) { - throw new SketchesArgumentException( - "MemorySegment capacity is too small: " + curSegCapBytes + " < " + minReqBytes); - } - return true; - } - - /** - * Wrap a sketch around the given source MemorySegment containing sketch data that originated from - * this sketch. - * @param srcSeg The given MemorySegment object must be in hash table form and not read only. - * @param seed See Update Hash Seed - * @return instance of this sketch - */ - static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final long seed) { - final int preambleLongs = extractPreLongs(srcSeg); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - - UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); - checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); - - if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { - //If incorrect it sets it to X2 which always works. - insertLgResizeFactor(srcSeg, ResizeFactor.X2.lg()); - } - - final DirectQuickSelectSketch dqss = - new DirectQuickSelectSketch(seed, srcSeg); - dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - return dqss; - } - - /** - * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from - * this sketch. This does NO validity checking of the given MemorySegment. - * @param srcSeg The given MemorySegment must be in hash table form and not read only. - * @param seed See Update Hash Seed - * @return instance of this sketch - */ - static DirectQuickSelectSketch fastWritableWrap(final MemorySegment srcSeg, final long seed) { - final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - - final DirectQuickSelectSketch dqss = - new DirectQuickSelectSketch(seed, srcSeg); - dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - return dqss; - } - - //Sketch - - //UpdateSketch - - @Override - public UpdateSketch rebuild() { - final int lgNomLongs = getLgNomLongs(); - final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - if (getRetainedEntries(true) > (1 << lgNomLongs)) { - quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); - } - return this; - } - - @Override - public void reset() { - //clear hash table - //hash table size and hashTableThreshold stays the same - //lgArrLongs stays the same - //thetaLongs resets to p - final int arrLongs = 1 << getLgArrLongs(); - final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - final int preBytes = preambleLongs << 3; - wseg_.asSlice(preBytes, arrLongs * 8L).fill((byte)0); - //flags: bigEndian = readOnly = compact = ordered = false; empty = true. - wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); - wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); - final float p = wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); - final long thetaLong = (long) (p * LONG_MAX_VALUE_AS_DOUBLE); - wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); - } - - //restricted methods - - @Override - UpdateReturnState hashUpdate(final long hash) { - HashOperations.checkHashCorruption(hash); - - wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) (wseg_.get(JAVA_BYTE, FLAGS_BYTE) & ~EMPTY_FLAG_MASK)); - final long thetaLong = getThetaLong(); - final int lgNomLongs = getLgNomLongs(); - //The over-theta test - if (HashOperations.continueCondition(thetaLong, hash)) { - return RejectedOverTheta; //signal that hash was rejected due to theta or zero. - } - - final int lgArrLongs = getLgArrLongs(); - final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - - //The duplicate test - final int index = - HashOperations.hashSearchOrInsertMemorySegment(wseg_, lgArrLongs, hash, preambleLongs << 3); - if (index >= 0) { - return RejectedDuplicate; //Duplicate, not inserted - } - //insertion occurred, increment curCount - final int curCount = getRetainedEntries(true) + 1; - wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); //update curCount - - if (isOutOfSpace(curCount)) { //we need to do something, we are out of space - - if (lgArrLongs > lgNomLongs) { //at full size, rebuild - //Assumes no dirty values, changes thetaLong, curCount_ - assert (lgArrLongs == (lgNomLongs + 1)) : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs; - //rebuild, refresh curCount based on # values in the hashtable. - quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); - return InsertedCountIncrementedRebuilt; - } //end of rebuild, exit - - else { //Not at full size, resize. Should not get here if lgRF = 0 and segCap is too small. - final int lgRF = getLgRF(); - final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF); - int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1); - - if (actLgRF > 0) { //Expand in current MemorySegment - //lgArrLongs will change; thetaLong, curCount will not - resize(wseg_, preambleLongs, lgArrLongs, tgtLgArrLongs); - hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); - return InsertedCountIncrementedResized; - } //end of Expand in current MemorySegment, exit. - - else { - //Request more space, then resize. lgArrLongs will change; thetaLong, curCount will not - final int preBytes = preambleLongs << 3; - tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1); - final int tgtArrBytes = 8 << tgtLgArrLongs; - final int reqBytes = tgtArrBytes + preBytes; - final MemorySegment newDstSeg = MemorySegment.ofArray(new byte[reqBytes]); - - moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong); - wseg_ = newDstSeg; - - hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); - return InsertedCountIncrementedResized; - } //end of Request more space to resize - } //end of resize - } //end of isOutOfSpace - return InsertedCountIncremented; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java deleted file mode 100644 index 483cbbed6..000000000 --- a/src/main/java/org/apache/datasketches/theta2/DirectQuickSelectSketchR.java +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; -import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_RESIZE_FACTOR_BIT; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT; -import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * The default Theta Sketch using the QuickSelect algorithm. - * This is the read-only implementation with non-functional methods, which affect the state. - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - * - * @author Lee Rhodes - * @author Kevin Lang - */ -class DirectQuickSelectSketchR extends UpdateSketch { - static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space - final long seed_; //provided, kept only on heap, never serialized. - int hashTableThreshold_; //computed, kept only on heap, never serialized. - MemorySegment wseg_; //A MemorySegment for child class, but no write methods here - - //only called by DirectQuickSelectSketch and below - DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) { - seed_ = seed; - wseg_ = wseg; - } - - /** - * Wrap a sketch around the given source MemorySegment containing sketch data that originated from - * this sketch. - * @param srcSeg the source MemorySegment. - * The given MemorySegment object must be in hash table form and not read only. - * @param seed See Update Hash Seed - * @return instance of this sketch - */ - static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final long seed) { - final int preambleLongs = extractPreLongs(srcSeg); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - - UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); - checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); - - final DirectQuickSelectSketchR dqssr = - new DirectQuickSelectSketchR(seed, srcSeg); - dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - return dqssr; - } - - /** - * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from - * this sketch. This does NO validity checking of the given MemorySegment. - * @param srcSeg The given MemorySegment object must be in hash table form and not read only. - * @param seed See Update Hash Seed - * @return instance of this sketch - */ - static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, final long seed) { - final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; - final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - - final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, srcSeg); - dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - return dqss; - } - - //Sketch - - @Override - public int getCurrentBytes() { - //not compact - final byte lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE); - final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - final int lengthBytes = (preLongs + (1 << lgArrLongs)) << 3; - return lengthBytes; - } - - @Override - public double getEstimate() { - final int curCount = extractCurCount(wseg_); - final long thetaLong = extractThetaLong(wseg_); - return Sketch.estimate(thetaLong, curCount); - } - - @Override - public Family getFamily() { - final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; - return Family.idToFamily(familyID); - } - - @Override - public int getRetainedEntries(final boolean valid) { //always valid - return wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); - } - - @Override - public long getThetaLong() { - return isEmpty() ? Long.MAX_VALUE : wseg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); - } - - @Override - public boolean hasMemorySegment() { - return wseg_ != null && wseg_.scope().isAlive(); - } - - @Override - public boolean isDirect() { - return hasMemorySegment() && wseg_.isNative(); - } - - @Override - public boolean isEmpty() { - return PreambleUtil.isEmptyFlag(wseg_); - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return hasMemorySegment() && Util.isSameResource(wseg_, that); - } - - @Override - public HashIterator iterator() { - return new MemorySegmentHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); - } - - @Override - public byte[] toByteArray() { //MY_FAMILY is stored in wseg_ - checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wseg_)); - final int lengthBytes = getCurrentBytes(); - final byte[] byteArray = new byte[lengthBytes]; - final MemorySegment seg = MemorySegment.ofArray(byteArray); - MemorySegment.copy(wseg_, 0, seg, 0, lengthBytes); - final long thetaLong = - correctThetaOnCompact(isEmpty(), extractCurCount(wseg_), extractThetaLong(wseg_)); - insertThetaLong(wseg_, thetaLong); - return byteArray; - } - - //UpdateSketch - - @Override - public final int getLgNomLongs() { - return PreambleUtil.extractLgNomLongs(wseg_); - } - - @Override - float getP() { - return wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); - } - - @Override - public ResizeFactor getResizeFactor() { - return ResizeFactor.getRF(getLgRF()); - } - - @Override - long getSeed() { - return seed_; - } - - @Override - public UpdateSketch rebuild() { - throw new SketchesReadOnlyException(); - } - - @Override - public void reset() { - throw new SketchesReadOnlyException(); - } - - //restricted methods - - @Override - long[] getCache() { - final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - final long[] cacheArr = new long[1 << lgArrLongs]; - MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3, cacheArr, 0, 1 << lgArrLongs); - return cacheArr; - } - - @Override - int getCompactPreambleLongs() { - return computeCompactPreLongs(isEmpty(), getRetainedEntries(true), getThetaLong()); - } - - @Override - int getCurrentPreambleLongs() { - return PreambleUtil.extractPreLongs(wseg_); - } - - @Override - MemorySegment getMemorySegment() { - return wseg_; - } - - @Override - short getSeedHash() { - return (short) PreambleUtil.extractSeedHash(wseg_); - } - - @Override - boolean isDirty() { - return false; //Always false for QuickSelectSketch - } - - @Override - boolean isOutOfSpace(final int numEntries) { - return numEntries > hashTableThreshold_; - } - - @Override - int getLgArrLongs() { - return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - } - - int getLgRF() { //only Direct needs this - return (wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; - } - - @Override - UpdateReturnState hashUpdate(final long hash) { - throw new SketchesReadOnlyException(); - } - - /** - * Returns the cardinality limit given the current size of the hash table array. - * - * @param lgNomLongs See lgNomLongs. - * @param lgArrLongs See lgArrLongs. - * @return the hash table threshold - */ - @SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "False Positive, see the code comments") - protected static final int getOffHeapHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { - //SpotBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, - //but this allows us to tune these constants for different sketches. - final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) (fraction * (1 << lgArrLongs)); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java deleted file mode 100644 index 28cf578f8..000000000 --- a/src/main/java/org/apache/datasketches/theta2/EmptyCompactSketch.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * Singleton empty CompactSketch. - * - * @author Lee Rhodes - */ -final class EmptyCompactSketch extends CompactSketch { - - //For backward compatibility, a candidate long must have Flags= compact, read-only, - // COMPACT-Family=3, SerVer=3, PreLongs=1, and be exactly 8 bytes long. The seedHash is ignored. - // NOTE: The empty and ordered flags may or may not be set - private static final long EMPTY_SKETCH_MASK = 0X00_00_EB_00_00_FF_FF_FFL; - private static final long EMPTY_SKETCH_TEST = 0X00_00_0A_00_00_03_03_01L; - //When returning a byte array the empty and ordered bits are also set - static final byte[] EMPTY_COMPACT_SKETCH_ARR = { 1, 3, 3, 0, 0, 0x1E, 0, 0 }; - private static final EmptyCompactSketch EMPTY_COMPACT_SKETCH = new EmptyCompactSketch(); - - private EmptyCompactSketch() {} - - static synchronized EmptyCompactSketch getInstance() { - return EMPTY_COMPACT_SKETCH; - } - - //This should be a heapify - static synchronized EmptyCompactSketch getHeapInstance(final MemorySegment srcSeg) { - final long pre0 = srcSeg.get(JAVA_LONG_UNALIGNED, 0); - if (testCandidatePre0(pre0)) { - return EMPTY_COMPACT_SKETCH; - } - final long maskedPre0 = pre0 & EMPTY_SKETCH_MASK; - throw new SketchesArgumentException("Input MemorySegment does not match required Preamble. " - + "MemorySegment Pre0: " + Long.toHexString(maskedPre0) - + ", required Pre0: " + Long.toHexString(EMPTY_SKETCH_TEST)); - } - - @Override - // This returns with ordered flag = true independent of dstOrdered. - // This is required for fast detection. - // The hashSeed is ignored and set == 0. - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { - if (dstWSeg == null) { return EmptyCompactSketch.getInstance(); } - //dstWSeg.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8); - MemorySegment.copy(EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); - return new DirectCompactSketch(dstWSeg); - } - - //static - - static boolean testCandidatePre0(final long candidate) { - return (candidate & EMPTY_SKETCH_MASK) == EMPTY_SKETCH_TEST; - } - - @Override - public int getCurrentBytes() { - return 8; - } - - @Override - public double getEstimate() { return 0; } - - @Override - public int getRetainedEntries(final boolean valid) { - return 0; - } - - @Override - public long getThetaLong() { - return Long.MAX_VALUE; - } - - @Override - public boolean isEmpty() { - return true; - } - - @Override - public boolean isOrdered() { - return true; - } - - @Override - public HashIterator iterator() { - return new HeapCompactHashIterator(new long[0]); - } - - /** - * Returns 8 bytes representing a CompactSketch that the following flags set: - * ordered, compact, empty, readOnly. The SerVer is 3, the Family is COMPACT(3), - * and the PreLongs = 1. The seedHash is zero. - */ - @Override - public byte[] toByteArray() { - return EMPTY_COMPACT_SKETCH_ARR; - } - - @Override - long[] getCache() { - return new long[0]; - } - - @Override - int getCompactPreambleLongs() { - return 1; - } - - @Override - int getCurrentPreambleLongs() { - return 1; - } - - @Override - short getSeedHash() { - return 0; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java deleted file mode 100644 index 6758c98e2..000000000 --- a/src/main/java/org/apache/datasketches/theta2/ForwardCompatibility.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * Used to convert older serialization versions 1 and 2 to version 3. The Serialization - * Version is the version of the sketch binary image format and should not be confused with the - * version number of the Open Source DataSketches Library. - * - * @author Lee Rhodes - */ -final class ForwardCompatibility { - - private ForwardCompatibility() { } - - /** - * Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch. - * Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored - * in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will - * be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit. - * - * @param srcSeg the image of a SerVer 1 sketch - * - * @param seedHash See Seed Hash. - * The seedHash that matches the seedHash of the original seed used to construct the sketch. - * Note: SerVer 1 sketches do not have the concept of the SeedHash, so the seedHash provided here - * MUST be derived from the actual seed that was used when the SerVer 1 sketches were built. - * @return a SerVer 3 {@link CompactSketch}. - */ - static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short seedHash) { - final int segCap = (int) srcSeg.byteSize(); - final int preLongs = extractPreLongs(srcSeg); //always 3 for serVer 1 - if (preLongs != 3) { - throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs); - } - final int familyId = extractFamilyID(srcSeg); //1,2,3 - if ((familyId < 1) || (familyId > 3)) { - throw new SketchesArgumentException("Family ID (Sketch Type) must be 1 to 3: " + familyId); - } - final int curCount = extractCurCount(srcSeg); - final long thetaLong = extractThetaLong(srcSeg); - final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE); - - if (empty || (segCap <= 24)) { //return empty - return EmptyCompactSketch.getInstance(); - } - - final int reqCap = (curCount + preLongs) << 3; - validateInputSize(reqCap, segCap); - - if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) { - final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); - return new SingleItemSketch(hash, seedHash); - } - //theta < 1.0 and/or curCount > 1 - - final long[] compactOrderedCache = new long[curCount]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); - return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); - } - - /** - * Convert a serialization version (SerVer) 2 sketch to a SerVer 3 HeapCompactOrderedSketch. - * Note: SerVer 2 sketches can have metadata-longs of 1,2 or 3 and are always stored - * in a compact ordered form (not as a hash table), but with 4 different sketch types. - * @param srcSeg the image of a SerVer 2 sketch - * @param seedHash See Seed Hash. - * The seed used for building the sketch image in srcMem - * @return a SerVer 3 HeapCompactOrderedSketch - */ - static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short seedHash) { - final int segCap = (int) srcSeg.byteSize(); - final int preLongs = extractPreLongs(srcSeg); //1,2 or 3 - final int familyId = extractFamilyID(srcSeg); //1,2,3,4 - if ((familyId < 1) || (familyId > 4)) { - throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId); - } - int reqBytesIn = 8; - int curCount = 0; - long thetaLong = Long.MAX_VALUE; - if (preLongs == 1) { - reqBytesIn = 8; - validateInputSize(reqBytesIn, segCap); - return EmptyCompactSketch.getInstance(); - } - if (preLongs == 2) { //includes pre0 + count, no theta (== 1.0) - reqBytesIn = preLongs << 3; - validateInputSize(reqBytesIn, segCap); - curCount = extractCurCount(srcSeg); - if (curCount == 0) { - return EmptyCompactSketch.getInstance(); - } - if (curCount == 1) { - reqBytesIn = (preLongs + 1) << 3; - validateInputSize(reqBytesIn, segCap); - final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); - return new SingleItemSketch(hash, seedHash); - } - //curCount > 1 - reqBytesIn = (curCount + preLongs) << 3; - validateInputSize(reqBytesIn, segCap); - final long[] compactOrderedCache = new long[curCount]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); - return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong,true); - } - if (preLongs == 3) { //pre0 + count + theta - reqBytesIn = (preLongs) << 3; // - validateInputSize(reqBytesIn, segCap); - curCount = extractCurCount(srcSeg); - thetaLong = extractThetaLong(srcSeg); - if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) { - return EmptyCompactSketch.getInstance(); - } - if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) { - reqBytesIn = (preLongs + 1) << 3; - validateInputSize(reqBytesIn, segCap); - final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); - return new SingleItemSketch(hash, seedHash); - } - //curCount > 1 and/or theta < 1.0 - reqBytesIn = (curCount + preLongs) << 3; - validateInputSize(reqBytesIn, segCap); - final long[] compactOrderedCache = new long[curCount]; - //srcSeg.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); - return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); - } - throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs); - } - - private static final void validateInputSize(final int reqBytesIn, final int segCap) { - if (reqBytesIn > segCap) { - throw new SketchesArgumentException( - "Input MemorySegment or byte[] size is too small: Required Bytes: " + reqBytesIn - + ", bytesIn: " + segCap); - } - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/HashIterator.java b/src/main/java/org/apache/datasketches/theta2/HashIterator.java deleted file mode 100644 index c8cf9aba9..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HashIterator.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -/** - * This is used to iterate over the retained hash values of the Theta sketch. - * @author Lee Rhodes - */ -public interface HashIterator { - - /** - * Gets the hash value - * @return the hash value - */ - long get(); - - /** - * Returns true at the next hash value in sequence. - * If false, the iteration is done. - * @return true at the next hash value in sequence. - */ - boolean next(); -} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java deleted file mode 100644 index b534ee8e2..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HeapAlphaSketch.java +++ /dev/null @@ -1,596 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.Math.max; -import static java.lang.Math.min; -import static java.lang.Math.sqrt; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.common.Util.checkBounds; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; -import static org.apache.datasketches.theta2.PreambleUtil.extractP; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountNotIncremented; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; -import static org.apache.datasketches.thetacommon2.HashOperations.STRIDE_MASK; - -import java.lang.foreign.MemorySegment; -import java.util.Objects; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * This sketch uses the - * Theta Sketch Framework - * and the - * Alpha TCF algorithm - * with a single cache. - * - * @author Lee Rhodes - * @author Kevin Lang - */ -final class HeapAlphaSketch extends HeapUpdateSketch { - private static final int ALPHA_MIN_LG_NOM_LONGS = 9; //The smallest Log2 k allowed => 512. - private final double alpha_; // computed from lgNomLongs - private final long split1_; // computed from alpha and p - - private int lgArrLongs_; - private int hashTableThreshold_; //never serialized - private int curCount_ = 0; - private long thetaLong_; - private boolean empty_ = true; - - private long[] cache_; - private boolean dirty_ = false; - - private HeapAlphaSketch(final int lgNomLongs, final long seed, final float p, - final ResizeFactor rf, final double alpha, final long split1) { - super(lgNomLongs, seed, p, rf); - alpha_ = alpha; - split1_ = split1; - } - - /** - * Get a new sketch instance on the java heap. - * - * @param lgNomLongs See lgNomLongs - * @param seed See Update Hash Seed - * @param p See Sampling Probability, p - * @param rf See Resize Factor - * @return instance of this sketch - */ - static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, final float p, - final ResizeFactor rf) { - - if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) { - throw new SketchesArgumentException( - "This sketch requires a minimum nominal entries of " + (1 << ALPHA_MIN_LG_NOM_LONGS)); - } - - final double nomLongs = (1L << lgNomLongs); - final double alpha = nomLongs / (nomLongs + 1.0); - final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); - - final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, seed, p, rf, alpha, split1); - - final int lgArrLongs = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); - has.lgArrLongs_ = lgArrLongs; - has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); - has.curCount_ = 0; - has.thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); - has.empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false; - has.cache_ = new long[1 << lgArrLongs]; - return has; - } - - /** - * Heapify a sketch from a MemorySegment object containing sketch data. - * @param srcSeg The source MemorySegment object. - * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See seed - * @return instance of this sketch - */ - static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { - Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); - checkBounds(0, 24, srcSeg.byteSize()); - final int preambleLongs = extractPreLongs(srcSeg); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - - checkAlphaFamily(srcSeg, preambleLongs, lgNomLongs); - checkSegIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); - - final float p = extractP(srcSeg); //bytes 12-15 - final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 - ResizeFactor segRF = ResizeFactor.getRF(seglgRF); - - final double nomLongs = (1L << lgNomLongs); - final double alpha = nomLongs / (nomLongs + 1.0); - final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); - - if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { - segRF = ResizeFactor.X2; //X2 always works. - } - - final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, segRF, alpha, split1); - has.lgArrLongs_ = lgArrLongs; - has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); - has.curCount_ = extractCurCount(srcSeg); - has.thetaLong_ = extractThetaLong(srcSeg); - has.empty_ = PreambleUtil.isEmptyFlag(srcSeg); - has.cache_ = new long[1 << lgArrLongs]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table - return has; - } - - //Sketch - - @Override - public Family getFamily() { - return Family.ALPHA; - } - - @Override - public HashIterator iterator() { - return new HeapHashIterator(cache_, thetaLong_); - } - - @Override - public double getEstimate() { - return (thetaLong_ > split1_) - ? Sketch.estimate(thetaLong_, curCount_) - : (1 << lgNomLongs_) * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong_); - } - - @Override - public double getLowerBound(final int numStdDev) { - if ((numStdDev < 1) || (numStdDev > 3)) { - throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3."); - } - double lb; - if (isEstimationMode()) { - final int validCount = getRetainedEntries(true); - if (validCount > 0) { - final double est = getEstimate(); - final double var = getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), validCount); - lb = est - (numStdDev * sqrt(var)); - lb = max(lb, 0.0); - } - else { - lb = 0.0; - } - } - else { - lb = curCount_; - } - return lb; - } - - @Override - public int getRetainedEntries(final boolean valid) { - if (curCount_ > 0) { - if (valid && isDirty()) { - final int curCount = HashOperations.countPart(getCache(), getLgArrLongs(), getThetaLong()); - return curCount; - } - } - return curCount_; - } - - @Override - public long getThetaLong() { - return thetaLong_; - } - - @Override - public double getUpperBound(final int numStdDev) { - if ((numStdDev < 1) || (numStdDev > 3)) { - throw new SketchesArgumentException("numStdDev can only be the values 1, 2 or 3."); - } - if (isEstimationMode()) { - final double var = - getVariance(1 << lgNomLongs_, getP(), alpha_, getTheta(), getRetainedEntries(true)); - return getEstimate() + (numStdDev * sqrt(var)); - } - return curCount_; - } - - @Override - public boolean isEmpty() { - return empty_; - } - - /* - * Alpha Sketch Preamble Layout ( same as Theta UpdateSketch ) - *
              -   * Long || Start Byte Adr:
              -   * Adr:
              -   *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |        0           |
              -   *  0   ||    Seed Hash    | Flags  |  LgArr | LgNom  | FamID  | SerVer | lgRF | PreLongs=3  |
              -   *
              -   *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
              -   *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
              -   *
              -   *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
              -   *  2   ||---------------------------------Theta---------------------------------------------|
              -   * 
              - */ - - @Override - public byte[] toByteArray() { - return toByteArray(Family.ALPHA.getMinPreLongs(), (byte) Family.ALPHA.getID()); - } - - //UpdateSketch - - @Override - public UpdateSketch rebuild() { - if (isDirty()) { - rebuildDirty(); - } - return this; - } - - @Override - public final void reset() { - final int lgArrLongs = - ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, getResizeFactor().lg(), ThetaUtil.MIN_LG_ARR_LONGS); - if (lgArrLongs == lgArrLongs_) { - final int arrLongs = cache_.length; - assert (1 << lgArrLongs_) == arrLongs; - java.util.Arrays.fill(cache_, 0L); - } - else { - cache_ = new long[1 << lgArrLongs]; - lgArrLongs_ = lgArrLongs; - } - hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); - empty_ = true; - curCount_ = 0; - thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE); - dirty_ = false; - } - - //restricted methods - - @Override - int getCompactPreambleLongs() { - return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_); - } - - @Override - int getCurrentPreambleLongs() { - return Family.ALPHA.getMinPreLongs(); - } - - @Override - long[] getCache() { - return cache_; - } - - @Override - boolean isDirty() { - return dirty_; - } - - @Override - boolean isOutOfSpace(final int numEntries) { - return numEntries > hashTableThreshold_; - } - - @Override - int getLgArrLongs() { - return lgArrLongs_; - } - - @Override - UpdateReturnState hashUpdate(final long hash) { - HashOperations.checkHashCorruption(hash); - empty_ = false; - - //The over-theta test - if (HashOperations.continueCondition(thetaLong_, hash)) { - return RejectedOverTheta; //signal that hash was rejected due to theta. - } - - //The duplicate/inserted tests - if (dirty_) { //may have dirty values, must be at tgt size - return enhancedHashInsert(cache_, hash); - } - - //NOT dirty, the other duplicate or inserted test - if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) { - return UpdateReturnState.RejectedDuplicate; - } - //insertion occurred, must increment - curCount_++; - final int r = (thetaLong_ > split1_) ? 0 : 1; //are we in sketch mode? (i.e., seen k+1 inserts?) - if (r == 0) { //not yet sketch mode (has not seen k+1 inserts), but could be sampling - if (curCount_ > (1 << lgNomLongs_)) { // > k - //Reached the k+1 insert. Must be at tgt size or larger. - //Transition to Sketch Mode. Happens only once. - //Decrement theta, make dirty, don't bother check size, already not-empty. - thetaLong_ = (long) (thetaLong_ * alpha_); - dirty_ = true; //now may have dirty values - } - else { - //inserts (not entries!) <= k. It may not be at tgt size. - //Check size, don't decrement theta. cnt already ++, empty_ already false; - if (isOutOfSpace(curCount_)) { - resizeClean(); //not dirty, not at tgt size. - } - } - } - else { //r > 0: sketch mode and not dirty (e.g., after a rebuild). - //dec theta, make dirty, cnt already ++, must be at tgt size or larger. check for rebuild - assert (lgArrLongs_ > lgNomLongs_) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_; - thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta - dirty_ = true; //now may have dirty values - if (isOutOfSpace(curCount_)) { - rebuildDirty(); // at tgt size and maybe dirty - } - } - return UpdateReturnState.InsertedCountIncremented; - } - - /** - * Enhanced Knuth-style Open Addressing, Double Hash insert. - * The insertion process will overwrite an already existing, dirty (over-theta) value if one is - * found in the search. - * If an empty cell is found first, it will be inserted normally. - * - * @param hashTable the hash table to insert into - * @param hash must not be 0. If not a duplicate, it will be inserted into the hash array - * @return See Update Return State - */ - final UpdateReturnState enhancedHashInsert(final long[] hashTable, final long hash) { - final int arrayMask = (1 << lgArrLongs_) - 1; // arrayLongs -1 - // make odd and independent of curProbe: - final int stride = (2 * (int) ((hash >>> lgArrLongs_) & STRIDE_MASK)) + 1; - int curProbe = (int) (hash & arrayMask); - long curTableHash = hashTable[curProbe]; - final int loopIndex = curProbe; - - // This is the enhanced part - // Search for duplicate or zero, or opportunity to replace garbage. - while ((curTableHash != hash) && (curTableHash != 0)) { - // curHash is not a duplicate and not zero - - if (curTableHash >= thetaLong_) { // curTableHash is garbage, do enhanced insert - final int rememberPos = curProbe; // remember its position. - // Now we must make sure there are no duplicates in this search path, - // so we keep searching - curProbe = (curProbe + stride) & arrayMask; // move forward - curTableHash = hashTable[curProbe]; - while ((curTableHash != hash) && (curTableHash != 0)) { - curProbe = (curProbe + stride) & arrayMask; - curTableHash = hashTable[curProbe]; - } - // curTableHash is a duplicate or zero - if (curTableHash == hash) { - return RejectedDuplicate; // duplicate, just return - } - assert (curTableHash == 0); // must be zero - // Now that we know there are no duplicates we can - // go back and insert at first garbage value position - hashTable[rememberPos] = hash; - thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta - dirty_ = true; //the decremented theta could have produced a new dirty value - return InsertedCountNotIncremented; - } - - // curTableHash was not a duplicate, not zero, and NOT garbage, - // so we keep searching - assert (curTableHash < thetaLong_); - curProbe = (curProbe + stride) & arrayMask; - curTableHash = hashTable[curProbe]; - - // ensure no infinite loop - if (curProbe == loopIndex) { - throw new SketchesArgumentException("No empty slot in table!"); - } - // end of Enhanced insert - } // end while and search - - // curTableHash is a duplicate or zero and NOT garbage - if (curTableHash == hash) { - return RejectedDuplicate; // duplicate, just return - } - // must be zero, so insert and increment - assert (curTableHash == 0); - hashTable[curProbe] = hash; - thetaLong_ = (long) (thetaLong_ * alpha_); //decrement theta - dirty_ = true; //the decremented theta could have produced a new dirty value - if (++curCount_ > hashTableThreshold_) { - rebuildDirty(); //at tgt size and maybe dirty - } - return InsertedCountIncremented; - } - - //At tgt size or greater - //Checks for rare lockup condition - // Used by hashUpdate(), rebuild() - private final void rebuildDirty() { - final int curCountBefore = curCount_; - forceRebuildDirtyCache(); //changes curCount_ only - if (curCountBefore == curCount_) { - //clean but unsuccessful at reducing count, must take drastic measures, very rare. - forceResizeCleanCache(1); - } - } - - //curCount > hashTableThreshold - //Checks for rare lockup condition - // Used by hashUpdate() - private final void resizeClean() { - //must resize, but are we at tgt size? - final int lgTgtLongs = lgNomLongs_ + 1; - if (lgTgtLongs > lgArrLongs_) { - //not yet at tgt size - final ResizeFactor rf = getResizeFactor(); - final int lgDeltaLongs = lgTgtLongs - lgArrLongs_; //must be > 0 - final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0 - forceResizeCleanCache(lgResizeFactor); - } - else { - //at tgt size or larger, no dirty values, must take drastic measures, very rare. - forceResizeCleanCache(1); - } - } - - //Force resize. Changes lgArrLongs_ only. Theta doesn't change, count doesn't change. - // Used by rebuildDirty(), resizeClean() - private final void forceResizeCleanCache(final int lgResizeFactor) { - assert (!dirty_); // Should never be dirty before a resize. - lgArrLongs_ += lgResizeFactor; // new tgt size - final long[] tgtArr = new long[1 << lgArrLongs_]; - final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); - assert (curCount_ == newCount); - curCount_ = newCount; - cache_ = tgtArr; - hashTableThreshold_ = setHashTableThreshold(lgNomLongs_, lgArrLongs_); - } - - //Cache stays the same size. Must be dirty. Theta doesn't change, count will change. - // Used by rebuildDirtyAtTgtSize() - private final void forceRebuildDirtyCache() { - final long[] tgtArr = new long[1 << lgArrLongs_]; - curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); - cache_ = tgtArr; - dirty_ = false; - //hashTableThreshold stays the same - } - - // @formatter:off - /** - * Computes an estimate of the error variance based on Historic Inverse Probability (HIP) - * estimators. See Cohen: All-Distances Sketches, Revisited: HIP Estimators for Massive Graph - * Analysis, Nov 2014. - *
              -   * Table of sketch states and how Upper and Lower Bounds are computed
              -   *
              -   * Theta P    Count  Empty  EstMode Est   UB  LB   Comments
              -   * 1.0   1.0  0      T      F       0     0   0    Empty Sketch-mode only sketch
              -   * 1.0   1.0  N      F      F       N     N   N    Degenerate Sketch-mode only sketch
              -   * <1.0  1.0  -      F      T       est   HIP HIP  Normal Sketch-mode only sketch
              -   *  P    <1.0 0      T      F       0     0   0    Virgin sampling sketch
              -   *  P    <1.0 N      F      T       est   HIP HIP  Degenerate sampling sketch
              -   *  <P   <1.0 N      F      T       est   HIP HIP  Sampling sketch also in sketch-mode
              -   * 
              - * @param k alias for nominal entries. - * @param p See Sampling Probability, p. - * @param alpha the value of alpha for this sketch - * @param theta See theta. - * @param count the current valid count. - * @return the variance. - */ - // @formatter:on - private static final double getVariance(final double k, final double p, final double alpha, - final double theta, final int count) { - final double kPlus1 = k + 1.0; - final double y = 1.0 / p; - final double ySq = y * y; - final double ySqMinusY = ySq - y; - final int r = getR(theta, alpha, p); - final double result; - if (r == 0) { - result = count * ySqMinusY; - } - else if (r == 1) { - result = kPlus1 * ySqMinusY; //term1 - } - else { //r > 1 - final double b = 1.0 / alpha; - final double bSq = b * b; - final double x = p / theta; - final double xSq = x * x; - final double term1 = kPlus1 * ySqMinusY; - final double term2 = y / (1.0 - bSq); - final double term3 = (((y * bSq) - (y * xSq) - b - bSq) + x + (x * b)); - result = term1 + (term2 * term3); - } - final double term4 = (1 - theta) / (theta * theta); - return result + term4; - } - - /** - * Computes whether there have been 0, 1, or 2 or more actual insertions into the cache in a - * numerically safe way. - * @param theta See Theta. - * @param alpha internal computed value alpha. - * @param p See Sampling Probability, p. - * @return R. - */ - private static final int getR(final double theta, final double alpha, final double p) { - final double split1 = (p * (alpha + 1.0)) / 2.0; - if (theta > split1) { return 0; } - if (theta > (alpha * split1)) { return 1; } - return 2; - } - - /** - * Returns the cardinality limit given the current size of the hash table array. - * - * @param lgNomLongs See lgNomLongs. - * @param lgArrLongs See lgArrLongs. - * @return the hash table threshold - */ - private static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { - final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) Math.floor(fraction * (1 << lgArrLongs)); - } - - static void checkAlphaFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { - //Check Family - final int familyID = extractFamilyID(seg); //byte 2 - final Family family = Family.idToFamily(familyID); - if (family.equals(Family.ALPHA)) { - if (preambleLongs != Family.ALPHA.getMinPreLongs()) { - throw new SketchesArgumentException( - "Possible corruption: Invalid PreambleLongs value for ALPHA: " + preambleLongs); - } - } - else { - throw new SketchesArgumentException( - "Possible corruption: Invalid Family: " + family.toString()); - } - - //Check lgNomLongs - if (lgNomLongs < ALPHA_MIN_LG_NOM_LONGS) { - throw new SketchesArgumentException( - "Possible corruption: This sketch requires a minimum nominal entries of " - + (1 << ALPHA_MIN_LG_NOM_LONGS)); - } - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java deleted file mode 100644 index bd06f6ecd..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HeapCompactHashIterator.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -final class HeapCompactHashIterator implements HashIterator { - private long[] cache; - private int index; - - HeapCompactHashIterator(final long[] cache) { - this.cache = cache; - index = -1; - } - - @Override - public long get() { - return cache[index]; - } - - @Override - public boolean next() { - return ++index < cache.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java deleted file mode 100644 index d7040841c..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HeapCompactSketch.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta2.CompactOperations.componentsToCompact; -import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; -import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; -import static org.apache.datasketches.theta2.CompactOperations.isSingleItem; -import static org.apache.datasketches.theta2.CompactOperations.loadCompactMemorySegment; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK; - -import java.lang.foreign.MemorySegment; - -/** - * Parent class of the Heap Compact Sketches. - * - * @author Lee Rhodes - */ -final class HeapCompactSketch extends CompactSketch { - private final long thetaLong_; //computed - private final int curCount_; - private final int preLongs_; //computed - private final short seedHash_; - private final boolean empty_; - private final boolean ordered_; - private final boolean singleItem_; - private final long[] cache_; - - /** - * Constructs this sketch from correct, valid components. - * @param cache in compact form - * @param empty The correct Empty. - * @param seedHash The correct - * Seed Hash. - * @param curCount correct value - * @param thetaLong The correct - * thetaLong. - */ - HeapCompactSketch(final long[] cache, final boolean empty, final short seedHash, - final int curCount, final long thetaLong, final boolean ordered) { - seedHash_ = seedHash; - curCount_ = curCount; - empty_ = empty; - ordered_ = ordered; - cache_ = cache; - //computed - thetaLong_ = correctThetaOnCompact(empty, curCount, thetaLong); - preLongs_ = computeCompactPreLongs(empty, curCount, thetaLong); //considers singleItem - singleItem_ = isSingleItem(empty, curCount, thetaLong); - checkIllegalCurCountAndEmpty(empty, curCount); - } - - //Sketch - - @Override - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - if (dstSeg == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } - return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), - true, ordered_, dstOrdered, dstSeg, getCache().clone()); - } - - @Override - public int getCurrentBytes() { - return (preLongs_ + curCount_) << 3; - } - - @Override - public int getRetainedEntries(final boolean valid) { - return curCount_; - } - - @Override - public long getThetaLong() { - return thetaLong_; - } - - @Override - public boolean isEmpty() { - return empty_; - } - - @Override - public boolean isOrdered() { - return ordered_; - } - - @Override - public HashIterator iterator() { - return new HeapCompactHashIterator(cache_); - } - - //restricted methods - - @Override - long[] getCache() { - return cache_; - } - - @Override - int getCompactPreambleLongs() { - return preLongs_; - } - - @Override - int getCurrentPreambleLongs() { //already compact; ignored - return preLongs_; - } - - @Override - short getSeedHash() { - return seedHash_; - } - - //use of a MemorySegment is convenient. The byteArray and MemorySegment are loaded simultaneously. - @Override - public byte[] toByteArray() { - final int bytes = getCurrentBytes(); - final byte[] byteArray = new byte[bytes]; - final MemorySegment dstSeg = MemorySegment.ofArray(byteArray); - final int emptyBit = isEmpty() ? EMPTY_FLAG_MASK : 0; - final int orderedBit = ordered_ ? ORDERED_FLAG_MASK : 0; - final int singleItemBit = singleItem_ ? SINGLEITEM_FLAG_MASK : 0; - final byte flags = (byte) (emptyBit | READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK - | orderedBit | singleItemBit); - final int preLongs = getCompactPreambleLongs(); - loadCompactMemorySegment(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), - dstSeg, flags, preLongs); - return byteArray; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java deleted file mode 100644 index 29ae42a0e..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HeapHashIterator.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -/** - * @author Lee Rhodes - */ -final class HeapHashIterator implements HashIterator { - private long[] cache; - private long thetaLong; - private int index; - private long hash; - - HeapHashIterator(final long[] cache, final long thetaLong) { - this.cache = cache; - this.thetaLong = thetaLong; - index = -1; - hash = 0; - } - - @Override - public long get() { - return hash; - } - - @Override - public boolean next() { - while (++index < cache.length) { - hash = cache[index]; - if ((hash != 0) && (hash < thetaLong)) { - return true; - } - } - return false; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java deleted file mode 100644 index 1da4521b0..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HeapQuickSelectSketch.java +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.Math.max; -import static java.lang.Math.min; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgNomLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; -import static org.apache.datasketches.theta2.PreambleUtil.extractP; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncremented; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedRebuilt; -import static org.apache.datasketches.theta2.UpdateReturnState.InsertedCountIncrementedResized; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedDuplicate; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedOverTheta; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * @author Lee Rhodes - * @author Kevin Lang - */ -class HeapQuickSelectSketch extends HeapUpdateSketch { - private final Family MY_FAMILY; - - private final int preambleLongs_; - private int lgArrLongs_; - private int hashTableThreshold_; //never serialized - int curCount_; - long thetaLong_; - boolean empty_; - - private long[] cache_; - - private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float p, - final ResizeFactor rf, final int preambleLongs, final Family family) { - super(lgNomLongs, seed, p, rf); - preambleLongs_ = preambleLongs; - MY_FAMILY = family; - } - - /** - * Construct a new sketch instance on the java heap. - * - * @param lgNomLongs See lgNomLongs. - * @param seed See seed - * @param p See Sampling Probability, p - * @param rf See Resize Factor - * @param unionGadget true if this sketch is implementing the Union gadget function. - * Otherwise, it is behaving as a normal QuickSelectSketch. - */ - HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float p, - final ResizeFactor rf, final boolean unionGadget) { - super(lgNomLongs, seed, p, rf); - - //Choose family, preambleLongs - if (unionGadget) { - preambleLongs_ = Family.UNION.getMinPreLongs(); - MY_FAMILY = Family.UNION; - } - else { - preambleLongs_ = Family.QUICKSELECT.getMinPreLongs(); - MY_FAMILY = Family.QUICKSELECT; - } - - lgArrLongs_ = ThetaUtil.startingSubMultiple(lgNomLongs + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); - hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs_); - curCount_ = 0; - thetaLong_ = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); - empty_ = true; //other flags: bigEndian = readOnly = compact = ordered = false; - cache_ = new long[1 << lgArrLongs_]; - } - - /** - * Heapify a sketch from a MemorySegment UpdateSketch or Union object - * containing sketch data. - * @param srcSeg The source MemorySegment object. - * @param seed See seed - * @return instance of this sketch - */ - static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final long seed) { - final int preambleLongs = extractPreLongs(srcSeg); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - - checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); - checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); - - final float p = extractP(srcSeg); //bytes 12-15 - final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 - ResizeFactor segRF = ResizeFactor.getRF(seglgRF); - final int familyID = extractFamilyID(srcSeg); - final Family family = Family.idToFamily(familyID); - - if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { - segRF = ResizeFactor.X2; //X2 always works. - } - - final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, segRF, - preambleLongs, family); - hqss.lgArrLongs_ = lgArrLongs; - hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); - hqss.curCount_ = extractCurCount(srcSeg); - hqss.thetaLong_ = extractThetaLong(srcSeg); - hqss.empty_ = PreambleUtil.isEmptyFlag(srcSeg); - hqss.cache_ = new long[1 << lgArrLongs]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table - return hqss; - } - - //Sketch - - @Override - public double getEstimate() { - return Sketch.estimate(thetaLong_, curCount_); - } - - @Override - public Family getFamily() { - return MY_FAMILY; - } - - @Override - public int getRetainedEntries(final boolean valid) { - return curCount_; - } - - @Override - public long getThetaLong() { - return empty_ ? Long.MAX_VALUE : thetaLong_; - } - - @Override - public boolean isEmpty() { - return empty_; - } - - @Override - public HashIterator iterator() { - return new HeapHashIterator(cache_, thetaLong_); - } - - @Override - public byte[] toByteArray() { - return toByteArray(preambleLongs_, (byte) MY_FAMILY.getID()); - } - - //UpdateSketch - - @Override - public UpdateSketch rebuild() { - if (getRetainedEntries(true) > (1 << getLgNomLongs())) { - quickSelectAndRebuild(); - } - return this; - } - - @Override - public void reset() { - final ResizeFactor rf = getResizeFactor(); - final int lgArrLongsSM = ThetaUtil.startingSubMultiple(lgNomLongs_ + 1, rf.lg(), ThetaUtil.MIN_LG_ARR_LONGS); - if (lgArrLongsSM == lgArrLongs_) { - final int arrLongs = cache_.length; - assert (1 << lgArrLongs_) == arrLongs; - java.util.Arrays.fill(cache_, 0L); - } - else { - cache_ = new long[1 << lgArrLongsSM]; - lgArrLongs_ = lgArrLongsSM; - } - hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); - empty_ = true; - curCount_ = 0; - thetaLong_ = (long)(getP() * LONG_MAX_VALUE_AS_DOUBLE); - } - - //restricted methods - - @Override - long[] getCache() { - return cache_; - } - - @Override - int getCompactPreambleLongs() { - return CompactOperations.computeCompactPreLongs(empty_, curCount_, thetaLong_); - } - - @Override - int getCurrentPreambleLongs() { - return preambleLongs_; - } - - //only used by ConcurrentHeapThetaBuffer & Test - int getHashTableThreshold() { - return hashTableThreshold_; - } - - @Override - int getLgArrLongs() { - return lgArrLongs_; - } - - @Override - UpdateReturnState hashUpdate(final long hash) { - HashOperations.checkHashCorruption(hash); - empty_ = false; - - //The over-theta test - if (HashOperations.continueCondition(thetaLong_, hash)) { - return RejectedOverTheta; //signal that hash was rejected due to theta. - } - - //The duplicate test - if (HashOperations.hashSearchOrInsert(cache_, lgArrLongs_, hash) >= 0) { - return RejectedDuplicate; //Duplicate, not inserted - } - //insertion occurred, must increment curCount - curCount_++; - - if (isOutOfSpace(curCount_)) { //we need to do something, we are out of space - //must rebuild or resize - if (lgArrLongs_ <= lgNomLongs_) { //resize - resizeCache(); - return InsertedCountIncrementedResized; - } - //Already at tgt size, must rebuild - assert (lgArrLongs_ == (lgNomLongs_ + 1)) : "lgArr: " + lgArrLongs_ + ", lgNom: " + lgNomLongs_; - quickSelectAndRebuild(); //Changes thetaLong_, curCount_, reassigns cache - return InsertedCountIncrementedRebuilt; - } - return InsertedCountIncremented; - } - - @Override - boolean isDirty() { - return false; - } - - @Override - boolean isOutOfSpace(final int numEntries) { - return numEntries > hashTableThreshold_; - } - - //Must resize. Changes lgArrLongs_, cache_, hashTableThreshold; - // theta and count don't change. - // Used by hashUpdate() - private final void resizeCache() { - final ResizeFactor rf = getResizeFactor(); - final int lgMaxArrLongs = lgNomLongs_ + 1; - final int lgDeltaLongs = lgMaxArrLongs - lgArrLongs_; - final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0 - lgArrLongs_ += lgResizeFactor; // new arr size - - final long[] tgtArr = new long[1 << lgArrLongs_]; - final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); - - assert newCount == curCount_; //Assumes no dirty values. - curCount_ = newCount; - - cache_ = tgtArr; - hashTableThreshold_ = getHashTableThreshold(lgNomLongs_, lgArrLongs_); - } - - //array stays the same size. Changes theta and thus count - private final void quickSelectAndRebuild() { - final int arrLongs = 1 << lgArrLongs_; // generally 2 * k, - - final int pivot = (1 << lgNomLongs_) + 1; // pivot for QS = k + 1 - - thetaLong_ = selectExcludingZeros(cache_, curCount_, pivot); //messes up the cache_ - - // now we rebuild to clean up dirty data, update count, reconfigure as a hash table - final long[] tgtArr = new long[arrLongs]; - curCount_ = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_); - cache_ = tgtArr; - //hashTableThreshold stays the same - } - - /** - * Returns the cardinality limit given the current size of the hash table array. - * - * @param lgNomLongs See lgNomLongs. - * @param lgArrLongs See lgArrLongs. - * @return the hash table threshold - */ - private static final int getHashTableThreshold(final int lgNomLongs, final int lgArrLongs) { - final double fraction = (lgArrLongs <= lgNomLongs) ? ThetaUtil.RESIZE_THRESHOLD : ThetaUtil.REBUILD_THRESHOLD; - return (int) (fraction * (1 << lgArrLongs)); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java deleted file mode 100644 index 5e2840ac6..000000000 --- a/src/main/java/org/apache/datasketches/theta2/HeapUpdateSketch.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta2.CompactOperations.correctThetaOnCompact; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; -import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.insertFlags; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgNomLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor; -import static org.apache.datasketches.theta2.PreambleUtil.insertP; -import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * The parent class for Heap Updatable Theta Sketches. - * - * @author Lee Rhodes - */ -abstract class HeapUpdateSketch extends UpdateSketch { - final int lgNomLongs_; - private final long seed_; - private final float p_; - private final ResizeFactor rf_; - - HeapUpdateSketch(final int lgNomLongs, final long seed, final float p, final ResizeFactor rf) { - lgNomLongs_ = Math.max(lgNomLongs, ThetaUtil.MIN_LG_NOM_LONGS); - seed_ = seed; - p_ = p; - rf_ = rf; - } - - //Sketch - - @Override - public int getCurrentBytes() { - final int preLongs = getCurrentPreambleLongs(); - final int dataLongs = getCurrentDataLongs(); - return (preLongs + dataLongs) << 3; - } - - //UpdateSketch - - @Override - public final int getLgNomLongs() { - return lgNomLongs_; - } - - @Override - float getP() { - return p_; - } - - @Override - public ResizeFactor getResizeFactor() { - return rf_; - } - - @Override - long getSeed() { - return seed_; - } - - //restricted methods - - @Override - short getSeedHash() { - return Util.computeSeedHash(getSeed()); - } - - //Used by HeapAlphaSketch and HeapQuickSelectSketch / Theta UpdateSketch - byte[] toByteArray(final int preLongs, final byte familyID) { - if (isDirty()) { rebuild(); } - checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries(true)); - final int preBytes = (preLongs << 3) & 0X3F; //24 bytes - final int dataBytes = getCurrentDataLongs() << 3; - final byte[] byteArrOut = new byte[preBytes + dataBytes]; - - final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); - - //preamble first 8 bytes. Note: only compact can be reduced to 8 bytes. - final int lgRf = getResizeFactor().lg() & 0x3; - insertPreLongs(segOut, preLongs); //byte 0 low 6 bits - insertLgResizeFactor(segOut, lgRf); //byte 0 high 2 bits - insertSerVer(segOut, SER_VER); //byte 1 - insertFamilyID(segOut, familyID); //byte 2 - insertLgNomLongs(segOut, getLgNomLongs()); //byte 3 - insertLgArrLongs(segOut, getLgArrLongs()); //byte 4 - insertSeedHash(segOut, getSeedHash()); //bytes 6 & 7 - - insertCurCount(segOut, this.getRetainedEntries(true)); - insertP(segOut, getP()); - final long thetaLong = - correctThetaOnCompact(isEmpty(), getRetainedEntries(true), getThetaLong()); - insertThetaLong(segOut, thetaLong); - - //Flags: BigEnd=0, ReadOnly=0, Empty=X, compact=0, ordered=0 - final byte flags = isEmpty() ? (byte) EMPTY_FLAG_MASK : 0; - insertFlags(segOut, flags); - - //Data - final int arrLongs = 1 << getLgArrLongs(); - final long[] cache = getCache(); - //segOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut - - MemorySegment.copy(cache, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, arrLongs); - return byteArrOut; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/Intersection.java b/src/main/java/org/apache/datasketches/theta2/Intersection.java deleted file mode 100644 index 91f0e470b..000000000 --- a/src/main/java/org/apache/datasketches/theta2/Intersection.java +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.common.Util.floorPowerOf2; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * The API for intersection operations - * - * @author Lee Rhodes - */ -public abstract class Intersection extends SetOperation { - - @Override - public Family getFamily() { - return Family.INTERSECTION; - } - - /** - * Gets the result of this operation as an ordered CompactSketch on the Java heap. - * This does not disturb the underlying data structure of this intersection. - * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an - * exception will be thrown. This is because a virgin Intersection object represents the - * Universal Set, which has an infinite number of values. - * @return the result of this operation as an ordered CompactSketch on the Java heap - */ - public CompactSketch getResult() { - return getResult(true, null); - } - - /** - * Gets the result of this operation as a CompactSketch in the given dstSeg. - * This does not disturb the underlying data structure of this intersection. - * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an - * exception will be thrown. This is because a virgin Intersection object represents the - * Universal Set, which has an infinite number of values. - * - *

              Note that presenting an intersection with an empty sketch sets the internal - * state of the intersection to empty = true, and current count = 0. This is consistent with - * the mathematical definition of the intersection of any set with the empty set is - * always empty.

              - * - *

              Presenting an intersection with a null argument will throw an exception.

              - * - * @param dstOrdered - * See Destination Ordered - * - * @param dstSeg the destination MemorySegment. - * - * @return the result of this operation as a CompactSketch stored in the given dstSeg, - * which can be either on or off-heap.. - */ - public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); - - /** - * Returns true if there is a valid intersection result available - * @return true if there is a valid intersection result available - */ - public abstract boolean hasResult(); - - /** - * Resets this Intersection for stateful operations only. - * The seed remains intact, otherwise reverts to - * the Universal Set: theta = 1.0, no retained data and empty = false. - */ - public abstract void reset(); - - /** - * Serialize this intersection to a byte array form. - * @return byte array of this intersection - */ - public abstract byte[] toByteArray(); - - /** - * Intersect the given sketch with the internal state. - * This method can be repeatedly called. - * If the given sketch is null the internal state becomes the empty sketch. - * Theta will become the minimum of thetas seen so far. - * @param sketchIn the given sketch - */ - public abstract void intersect(Sketch sketchIn); - - /** - * Perform intersect set operation on the two given sketch arguments and return the result as an - * ordered CompactSketch on the heap. - * @param a The first sketch argument - * @param b The second sketch argument - * @return an ordered CompactSketch on the heap - */ - public CompactSketch intersect(final Sketch a, final Sketch b) { - return intersect(a, b, true, null); - } - - /** - * Perform intersect set operation on the two given sketches and return the result as a - * CompactSketch. - * @param a The first sketch argument - * @param b The second sketch argument - * @param dstOrdered - * See Destination Ordered. - * @param dstSeg the destination MemorySegment. - * @return the result as a CompactSketch. - */ - public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered, - MemorySegment dstSeg); - - /** - * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. - * This method assumes the Default Update Seed. - * If the given source MemorySegment is read-only, the returned object will also be read-only. - * @param srcSeg The source MemorySegment image. - * @return an Intersection that wraps a source MemorySegment that contains an Intersection image - */ - public static Intersection wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. - * If the given source MemorySegment is read-only, the returned object will also be read-only. - * @param srcSeg The source MemorySegment image. - * @param expectedSeed See seed - * @return an Intersection that wraps a source MemorySegment that contains an Intersection image - */ - public static Intersection wrap(final MemorySegment srcSeg, final long expectedSeed) { - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); - if (serVer != 3) { - throw new SketchesArgumentException("SerVer must be 3: " + serVer); - } - return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); - } - - // Restricted - - /** - * Returns the maximum lgArrLongs given the capacity of the MemorySegment. - * @param dstSeg the given MemorySegment - * @return the maximum lgArrLongs given the capacity of the MemorySegment - */ - protected static int getMaxLgArrLongs(final MemorySegment dstSeg) { - final int preBytes = CONST_PREAMBLE_LONGS << 3; - final long cap = dstSeg.byteSize(); - return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3); - } - - protected static void checkMinSizeMemorySegment(final MemorySegment seg) { - final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << ThetaUtil.MIN_LG_ARR_LONGS);//280 - final long cap = seg.byteSize(); - if (cap < minBytes) { - throw new SketchesArgumentException( - "MemorySegment must be at least " + minBytes + " bytes. Actual capacity: " + cap); - } - } - - /** - * Compact first 2^lgArrLongs of given array - * @param srcCache anything - * @param lgArrLongs The correct - * lgArrLongs. - * @param curCount must be correct - * @param thetaLong The correct - * thetaLong. - * @param dstOrdered true if output array must be sorted - * @return the compacted array - */ //Only used in IntersectionImpl & Test - static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs, - final int curCount, final long thetaLong, final boolean dstOrdered) { - if (curCount == 0) { - return new long[0]; - } - final long[] cacheOut = new long[curCount]; - final int len = 1 << lgArrLongs; - int j = 0; - for (int i = 0; i < len; i++) { - final long v = srcCache[i]; - if (v <= 0L || v >= thetaLong ) { continue; } - cacheOut[j++] = v; - } - assert curCount == j; - if (dstOrdered) { - Arrays.sort(cacheOut); - } - return cacheOut; - } - - protected static void segChecks(final MemorySegment srcSeg) { - //Get Preamble - //Note: Intersection does not use lgNomLongs (or k), per se. - //seedHash loaded and checked in private constructor - final int preLongs = extractPreLongs(srcSeg); - final int serVer = extractSerVer(srcSeg); - final int famID = extractFamilyID(srcSeg); - final boolean empty = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; - final int curCount = extractCurCount(srcSeg); - //Checks - if (preLongs != CONST_PREAMBLE_LONGS) { - throw new SketchesArgumentException( - "MemorySegment PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs); - } - if (serVer != SER_VER) { - throw new SketchesArgumentException("Serialization Version must equal " + SER_VER); - } - Family.INTERSECTION.checkFamilyID(famID); - if (empty) { - if (curCount != 0) { - throw new SketchesArgumentException( - "srcSeg empty state inconsistent with curCount: " + empty + "," + curCount); - } - //empty = true AND curCount_ = 0: OK - } //else empty = false, curCount could be anything - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java deleted file mode 100644 index be1c94707..000000000 --- a/src/main/java/org/apache/datasketches/theta2/IntersectionImpl.java +++ /dev/null @@ -1,569 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.Math.min; -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.common.Util.clearBits; -import static org.apache.datasketches.common.Util.setBits; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.P_FLOAT; -import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; -import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; -import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.insertFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertP; -import static org.apache.datasketches.theta2.PreambleUtil.insertPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.setEmpty; -import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnlyMemorySegment; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon2.HashOperations.minLgHashTableSize; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Intersection operation for Theta Sketches. - * - *

              This implementation uses data either on-heap or off-heap in a given MemorySegment - * that is owned and managed by the caller. - * The off-heap MemorySegment, which if managed properly, will greatly reduce the need for - * the JVM to perform garbage collection.

              - * - * @author Lee Rhodes - * @author Kevin Lang - */ -final class IntersectionImpl extends Intersection { - protected final short seedHash_; - protected final boolean readOnly_; //True if this sketch is to be treated as read only - protected final MemorySegment wseg_; - protected final int maxLgArrLongs_; //only used with MemorySegment, not serialized - - //Note: Intersection does not use lgNomLongs or k, per se. - protected int lgArrLongs_; //current size of hash table - protected int curCount_; //curCount of HT, if < 0 means Universal Set (US) is true - protected long thetaLong_; - protected boolean empty_; //A virgin intersection represents the Universal Set, so empty is FALSE! - protected long[] hashTable_; //retained entries of the intersection, on-heap only. - - /** - * Constructor: Sets the class finals and computes, sets and checks the seedHash. - * @param wseg Can be either a Source(e.g. wrap) or Destination (new offHeap) MemorySegment. - * @param seed Used to validate incoming sketch arguments. - * @param dstSegFlag The given MemorySegment is a Destination (new offHeap) MemorySegment. - * @param readOnly True if MemorySegment is to be treated as read only. - */ - protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstSegFlag, - final boolean readOnly) { - readOnly_ = readOnly; - if (wseg != null) { - wseg_ = wseg; - if (dstSegFlag) { //DstSeg: compute & store seedHash, no seedHash checking - checkMinSizeMemorySegment(wseg); - maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap - seedHash_ = Util.computeSeedHash(seed); - wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); - } else { //SrcSeg:gets and stores the seedHash, checks seg_seedHash against the seed - seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); - Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); //check for seed hash conflict - maxLgArrLongs_ = 0; - } - } else { //compute & store seedHash - wseg_ = null; - maxLgArrLongs_ = 0; - seedHash_ = Util.computeSeedHash(seed); - } - } - - /** - * Factory: Construct a new Intersection target on the java heap. - * Called by SetOperationBuilder, test. - * - * @param seed See Seed - * @return a new IntersectionImpl on the Java heap - */ - static IntersectionImpl initNewHeapInstance(final long seed) { - final boolean dstSegFlag = false; - final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); - impl.hardReset(); - return impl; - } - - /** - * Factory: Construct a new Intersection target direct to the given destination MemorySegment. - * Called by SetOperationBuilder, test. - * - * @param seed See Seed - * @param dstSeg destination MemorySegment - * @return a new IntersectionImpl that may be off-heap - */ - static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegment dstSeg) { - //Load Preamble - //Pre0 - dstSeg.asSlice(0, CONST_PREAMBLE_LONGS << 3).fill((byte)0); - insertPreLongs(dstSeg, CONST_PREAMBLE_LONGS); //RF not used = 0 - insertSerVer(dstSeg, SER_VER); - insertFamilyID(dstSeg, Family.INTERSECTION.getID()); - //lgNomLongs not used by Intersection - //lgArrLongs set by hardReset - //flags are already 0: bigEndian = readOnly = compact = ordered = empty = false; - //seedHash loaded and checked in IntersectionImpl constructor - //Pre1 - //CurCount set by hardReset - insertP(dstSeg, (float) 1.0); //not used by intersection - //Pre2 - //thetaLong set by hardReset - - //Initialize - final boolean dstSegFlag = true; - final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstSegFlag, readOnly); - impl.hardReset(); - return impl; - } - - /** - * Factory: Heapify an intersection target from a MemorySegment image containing data. - * @param srcSeg The source MemorySegment object. - * @param seed See seed - * @return a IntersectionImpl instance on the Java heap - */ - static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long seed) { - final boolean dstSegFlag = false; - final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); - segChecks(srcSeg); - - //Initialize - impl.lgArrLongs_ = extractLgArrLongs(srcSeg); - impl.curCount_ = extractCurCount(srcSeg); - impl.thetaLong_ = extractThetaLong(srcSeg); - impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; - if (!impl.empty_) { - if (impl.curCount_ > 0) { - impl.hashTable_ = new long[1 << impl.lgArrLongs_]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_); - } - } - return impl; - } - - /** - * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. - * If the given source MemorySegment is read-only, the returned object will also be read-only. - * @param srcSeg The source MemorySegment image. - * @param seed See seed - * @param readOnly True if MemorySegment is to be treated as read only - * @return a IntersectionImpl that wraps a source MemorySegment that contains an Intersection image - */ - static IntersectionImpl wrapInstance( - final MemorySegment srcSeg, - final long seed, - final boolean readOnly) { - final boolean dstSegFlag = false; - final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstSegFlag, readOnly); - segChecks(srcSeg); - impl.lgArrLongs_ = extractLgArrLongs(srcSeg); - impl.curCount_ = extractCurCount(srcSeg); - impl.thetaLong_ = extractThetaLong(srcSeg); - impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; - return impl; - } - - @Override - public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, final MemorySegment dstSeg) { - if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } - hardReset(); - intersect(a); - intersect(b); - final CompactSketch csk = getResult(dstOrdered, dstSeg); - hardReset(); - return csk; - } - - @Override - public void intersect(final Sketch sketchIn) { - if (sketchIn == null) { - throw new SketchesArgumentException("Intersection argument must not be null."); - } - if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } - if (empty_ || sketchIn.isEmpty()) { //empty rule - //Because of the def of null above and the Empty Rule (which is OR), empty_ must be true. - //Whatever the current internal state, we make our local empty. - resetToEmpty(); - return; - } - Util.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); - //Set minTheta - thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule - empty_ = false; - if (wseg_ != null) { - insertThetaLong(wseg_, thetaLong_); - clearEmpty(wseg_); //false - } - - // The truth table for the following state machine. MinTheta is set above. - // Incoming sketch is not null and not empty, but could have 0 count and Theta < 1.0 - // Case curCount sketchInEntries | Actions - // 1 <0 0 | First intersect, set curCount = 0; HT = null; minTh; exit - // 2 0 0 | set curCount = 0; HT = null; minTh; exit - // 3 >0 0 | set curCount = 0; HT = null; minTh; exit - // 4 | Not used - // 5 <0 >0 | First intersect, clone SketchIn; exit - // 6 0 >0 | set curCount = 0; HT = null; minTh; exit - // 7 >0 >0 | Perform full intersect - final int sketchInEntries = sketchIn.getRetainedEntries(true); - - //states 1,2,3,6 - if (curCount_ == 0 || sketchInEntries == 0) { - curCount_ = 0; - if (wseg_ != null) { insertCurCount(wseg_, 0); } - hashTable_ = null; //No need for a HT. Don't bother clearing seg if valid - } //end of states 1,2,3,6 - - // state 5 - else if (curCount_ < 0 && sketchInEntries > 0) { - curCount_ = sketchIn.getRetainedEntries(true); - final int requiredLgArrLongs = minLgHashTableSize(curCount_, ThetaUtil.REBUILD_THRESHOLD); - final int priorLgArrLongs = lgArrLongs_; //prior only used in error message - lgArrLongs_ = requiredLgArrLongs; - - if (wseg_ != null) { //Off heap, check if current dstSeg is large enough - insertCurCount(wseg_, curCount_); - insertLgArrLongs(wseg_, lgArrLongs_); - if (requiredLgArrLongs <= maxLgArrLongs_) { - wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); - } - else { //not enough space in dstSeg - final int requiredBytes = (8 << requiredLgArrLongs) + 24; - final int givenBytes = (8 << priorLgArrLongs) + 24; - throw new SketchesArgumentException( - "Insufficient internal MemorySegment space: " + requiredBytes + " > " + givenBytes); - } - } - else { //On the heap, allocate a HT - hashTable_ = new long[1 << lgArrLongs_]; - } - moveDataToTgt(sketchIn); - } //end of state 5 - - //state 7 - else if (curCount_ > 0 && sketchInEntries > 0) { - //Sets resulting hashTable, curCount and adjusts lgArrLongs - performIntersect(sketchIn); - } //end of state 7 - - else { - assert false : "Should not happen"; - } - } - - @Override - MemorySegment getMemorySegment() { return wseg_; } - - @Override - public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { - if (curCount_ < 0) { - throw new SketchesStateException( - "Calling getResult() with no intervening intersections would represent the infinite set, " - + "which is not a legal result."); - } - long[] compactCache; - final boolean srcOrdered, srcCompact; - if (curCount_ == 0) { - compactCache = new long[0]; - srcCompact = true; - srcOrdered = false; //hashTable, even though empty - return CompactOperations.componentsToCompact( - thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, - dstSeg, compactCache); - } - //else curCount > 0 - final long[] hashTable; - if (wseg_ != null) { - final int htLen = 1 << lgArrLongs_; - hashTable = new long[htLen]; - MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); - } else { - hashTable = hashTable_; - } - compactCache = compactCachePart(hashTable, lgArrLongs_, curCount_, thetaLong_, dstOrdered); - srcCompact = true; - srcOrdered = dstOrdered; - return CompactOperations.componentsToCompact( - thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, - dstSeg, compactCache); - } - - @Override - public boolean hasMemorySegment() { - return wseg_ != null && wseg_.scope().isAlive(); - } - - @Override - public boolean hasResult() { - return hasMemorySegment() ? wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; - } - - @Override - public boolean isDirect() { - return hasMemorySegment() && wseg_.isNative(); - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return hasMemorySegment() && Util.isSameResource(wseg_, that); - } - - @Override - public void reset() { - hardReset(); - } - - @Override - public byte[] toByteArray() { - final int preBytes = CONST_PREAMBLE_LONGS << 3; - final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0; - final byte[] byteArrOut = new byte[preBytes + dataBytes]; - if (wseg_ != null) { - MemorySegment.copy(wseg_, JAVA_BYTE, 0, byteArrOut, 0, preBytes + dataBytes); - } - else { - final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); - - //preamble - segOut.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 - segOut.set(JAVA_BYTE, SER_VER_BYTE, (byte) SER_VER); - segOut.set(JAVA_BYTE, FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); - segOut.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 0); //not used - segOut.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); - if (empty_) { setBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } - else { clearBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } - segOut.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); - segOut.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount_); - segOut.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, (float) 1.0); - segOut.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - - //data - if (curCount_ > 0) { - MemorySegment.copy(hashTable_, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, 1 << lgArrLongs_); - } - } - return byteArrOut; - } - - //restricted - - /** - * Gets the number of retained entries from this operation. If negative, it is interpreted - * as the infinite Universal Set. - */ - @Override - int getRetainedEntries() { - return curCount_; - } - - @Override - boolean isEmpty() { - return empty_; - } - - @Override - long[] getCache() { - if (wseg_ == null) { - return hashTable_ != null ? hashTable_ : new long[0]; - } - //offHeap - final int arrLongs = 1 << lgArrLongs_; - final long[] outArr = new long[arrLongs]; - MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); - return outArr; - } - - @Override - short getSeedHash() { - return seedHash_; - } - - @Override - long getThetaLong() { - return thetaLong_; - } - - private void performIntersect(final Sketch sketchIn) { - // curCount and input data are nonzero, match against HT - assert curCount_ > 0 && !empty_; - final long[] hashTable; - if (wseg_ != null) { - final int htLen = 1 << lgArrLongs_; - hashTable = new long[htLen]; - MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); - } else { - hashTable = hashTable_; - } - //allocate space for matching - final long[] matchSet = new long[ min(curCount_, sketchIn.getRetainedEntries(true)) ]; - - int matchSetCount = 0; - final boolean isOrdered = sketchIn.isOrdered(); - final HashIterator it = sketchIn.iterator(); - while (it.next()) { - final long hashIn = it.get(); - if (hashIn < thetaLong_) { - final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn); - if (foundIdx != -1) { - matchSet[matchSetCount++] = hashIn; - } - } else { - if (isOrdered) { break; } // early stop - } - } - //reduce effective array size to minimum - curCount_ = matchSetCount; - lgArrLongs_ = minLgHashTableSize(matchSetCount, ThetaUtil.REBUILD_THRESHOLD); - if (wseg_ != null) { - insertCurCount(wseg_, matchSetCount); - insertLgArrLongs(wseg_, lgArrLongs_); - wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); //clear for rebuild - } else { - Arrays.fill(hashTable_, 0, 1 << lgArrLongs_, 0L); //clear for rebuild - } - - if (curCount_ > 0) { - moveDataToTgt(matchSet, matchSetCount); //move matchSet to target - } else { - if (thetaLong_ == Long.MAX_VALUE) { - empty_ = true; - } - } - } - - private void moveDataToTgt(final long[] arr, final int count) { - final int arrLongsIn = arr.length; - int tmpCnt = 0; - if (wseg_ != null) { //Off Heap puts directly into mem - final int preBytes = CONST_PREAMBLE_LONGS << 3; - final int lgArrLongs = lgArrLongs_; - final long thetaLong = thetaLong_; - for (int i = 0; i < arrLongsIn; i++ ) { - final long hashIn = arr[i]; - if (continueCondition(thetaLong, hashIn)) { continue; } - hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hashIn, preBytes); - tmpCnt++; - } - } else { //On Heap. Assumes HT exists and is large enough - for (int i = 0; i < arrLongsIn; i++ ) { - final long hashIn = arr[i]; - if (continueCondition(thetaLong_, hashIn)) { continue; } - hashInsertOnly(hashTable_, lgArrLongs_, hashIn); - tmpCnt++; - } - } - assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count; - } - - private void moveDataToTgt(final Sketch sketch) { - final int count = sketch.getRetainedEntries(); - int tmpCnt = 0; - if (wseg_ != null) { //Off Heap puts directly into mem - final int preBytes = CONST_PREAMBLE_LONGS << 3; - final int lgArrLongs = lgArrLongs_; - final long thetaLong = thetaLong_; - final HashIterator it = sketch.iterator(); - while (it.next()) { - final long hash = it.get(); - if (continueCondition(thetaLong, hash)) { continue; } - hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hash, preBytes); - tmpCnt++; - } - } else { //On Heap. Assumes HT exists and is large enough - final HashIterator it = sketch.iterator(); - while (it.next()) { - final long hash = it.get(); - if (continueCondition(thetaLong_, hash)) { continue; } - hashInsertOnly(hashTable_, lgArrLongs_, hash); - tmpCnt++; - } - } - assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count; - } - - private void hardReset() { - resetCommon(); - if (wseg_ != null) { - insertCurCount(wseg_, -1); //Universal Set - clearEmpty(wseg_); //false - } - curCount_ = -1; //Universal Set - empty_ = false; - } - - private void resetToEmpty() { - resetCommon(); - if (wseg_ != null) { - insertCurCount(wseg_, 0); - setEmpty(wseg_); //true - } - curCount_ = 0; - empty_ = true; - } - - private void resetCommon() { - if (wseg_ != null) { - if (readOnly_) { throw new SketchesReadOnlyException(); } - wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS).fill((byte)0); - insertLgArrLongs(wseg_, ThetaUtil.MIN_LG_ARR_LONGS); - insertThetaLong(wseg_, Long.MAX_VALUE); - } - lgArrLongs_ = ThetaUtil.MIN_LG_ARR_LONGS; - thetaLong_ = Long.MAX_VALUE; - hashTable_ = null; - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java deleted file mode 100644 index 2ecdf18a0..000000000 --- a/src/main/java/org/apache/datasketches/theta2/JaccardSimilarity.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.Math.max; -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA; -import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA; -import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA; - -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Jaccard similarity of two Theta Sketches. - * - * @author Lee Rhodes - */ -public final class JaccardSimilarity { - private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB - private static final double[] ONES = {1.0, 1.0, 1.0}; - - private JaccardSimilarity() { } - - /** - * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index - * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each - * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are - * distinct from each other. A Jaccard of .95 means the overlap between the two - * populations is 95% of the union of the two populations. - * - *

              Note: For very large pairs of sketches, where the configured nominal entries of the sketches - * are 2^25 or 2^26, this method may produce unpredictable results. - * - * @param sketchA given sketch A - * @param sketchB given sketch B - * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index. - * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations. - */ - public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) { - //Corner case checks - if (sketchA == null || sketchB == null) { return ZEROS.clone(); } - if (sketchA == sketchB) { return ONES.clone(); } - if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); } - if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); } - - final int countA = sketchA.getRetainedEntries(true); - final int countB = sketchB.getRetainedEntries(true); - - //Create the Union - final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS; - final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS; - final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK); - final Union union = - SetOperation.builder().setNominalEntries(newK).buildUnion(); - union.union(sketchA); - union.union(sketchB); - final Sketch unionAB = union.getResult(false, null); - final long thetaLongUAB = unionAB.getThetaLong(); - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - final int countUAB = unionAB.getRetainedEntries(true); - - //Check for identical data - if (countUAB == countA && countUAB == countB - && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { - return ONES.clone(); - } - - //Create the Intersection - final Intersection inter = SetOperation.builder().buildIntersection(); - inter.intersect(sketchA); - inter.intersect(sketchB); - inter.intersect(unionAB); //ensures that intersection is a subset of the union - final Sketch interABU = inter.getResult(false, null); - - final double lb = getLowerBoundForBoverA(unionAB, interABU); - final double est = getEstimateOfBoverA(unionAB, interABU); - final double ub = getUpperBoundForBoverA(unionAB, interABU); - return new double[] {lb, est, ub}; - } - - /** - * Returns true if the two given sketches have exactly the same hash values and the same - * theta values. Thus, they are equivalent. - * @param sketchA the given sketch A - * @param sketchB the given sketch B - * @return true if the two given sketches have exactly the same hash values and the same - * theta values. - */ - public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) { - //Corner case checks - if (sketchA == null || sketchB == null) { return false; } - if (sketchA == sketchB) { return true; } - if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; } - if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; } - - final int countA = sketchA.getRetainedEntries(true); - final int countB = sketchB.getRetainedEntries(true); - - //Create the Union - final Union union = - SetOperation.builder().setNominalEntries(ceilingPowerOf2(countA + countB)).buildUnion(); - union.union(sketchA); - union.union(sketchB); - final Sketch unionAB = union.getResult(); - final long thetaLongUAB = unionAB.getThetaLong(); - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - final int countUAB = unionAB.getRetainedEntries(true); - - //Check for identical counts and thetas - if (countUAB == countA && countUAB == countB - && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { - return true; - } - return false; - } - - /** - * Tests similarity of a measured Sketch against an expected Sketch. - * Computes the lower bound of the Jaccard index JLB of the measured and - * expected sketches. - * if JLB ≥ threshold, then the sketches are considered to be - * similar with a confidence of 97.7%. - * - * @param measured the sketch to be tested - * @param expected the reference sketch that is considered to be correct. - * @param threshold a real value between zero and one. - * @return if true, the similarity of the two sketches is greater than the given threshold - * with at least 97.7% confidence. - */ - public static boolean similarityTest(final Sketch measured, final Sketch expected, - final double threshold) { - //index 0: the lower bound - //index 1: the mean estimate - //index 2: the upper bound - final double jRatioLB = jaccard(measured, expected)[0]; //choosing the lower bound - return jRatioLB >= threshold; - } - - /** - * Tests dissimilarity of a measured Sketch against an expected Sketch. - * Computes the upper bound of the Jaccard index JUB of the measured and - * expected sketches. - * if JUB ≤ threshold, then the sketches are considered to be - * dissimilar with a confidence of 97.7%. - * - * @param measured the sketch to be tested - * @param expected the reference sketch that is considered to be correct. - * @param threshold a real value between zero and one. - * @return if true, the dissimilarity of the two sketches is greater than the given threshold - * with at least 97.7% confidence. - */ - public static boolean dissimilarityTest(final Sketch measured, final Sketch expected, - final double threshold) { - //index 0: the lower bound - //index 1: the mean estimate - //index 2: the upper bound - final double jRatioUB = jaccard(measured, expected)[2]; //choosing the upper bound - return jRatioUB <= threshold; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java deleted file mode 100644 index ab5e588bc..000000000 --- a/src/main/java/org/apache/datasketches/theta2/PreambleUtil.java +++ /dev/null @@ -1,532 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.common.Util.LS; -import static org.apache.datasketches.common.Util.zeroPad; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteOrder; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -//@formatter:off - -/** - * This class defines the preamble data structure and provides basic utilities for some of the key - * fields. - * - *

              The intent of the design of this class was to isolate the detailed knowledge of the bit and - * byte layout of the serialized form of the sketches derived from the Sketch class into one place. - * This allows the possibility of the introduction of different serialization - * schemes with minimal impact on the rest of the library.

              - * - *

              - * MAP: Low significance bytes of this long data structure are on the right. However, the - * multi-byte integers (int and long) are stored in native byte order. The - * byte values are treated as unsigned.

              - * - *

              An empty CompactSketch only requires 8 bytes. - * Flags: notSI, Ordered*, Compact, Empty*, ReadOnly, LE. - * (*) Earlier versions did not set these.

              - * - *
              - * Long || Start Byte Adr:
              - * Adr:
              - *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
              - *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 1   |
              - * 
              - * - *

              A SingleItemSketch (extends CompactSketch) requires an 8 byte preamble plus a single - * hash item of 8 bytes. Flags: SingleItem*, Ordered, Compact, notEmpty, ReadOnly, LE. - * (*) Earlier versions did not set these.

              - * - *
              - * Long || Start Byte Adr:
              - * Adr:
              - *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
              - *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 1   |
              - *
              - *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
              - *  1   ||---------------------------Single long hash----------------------------------------|
              - * 
              - * - *

              An exact (non-estimating) CompactSketch requires 16 bytes of preamble plus a compact array of - * longs.

              - * - *
              - * Long || Start Byte Adr:
              - * Adr:
              - *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
              - *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 2   |
              - *
              - *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
              - *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
              - *
              - *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
              - *  2   ||----------------------Start of Compact Long Array----------------------------------|
              - * 
              - * - *

              An estimating CompactSketch requires 24 bytes of preamble plus a compact array of longs.

              - * - *
              - * Long || Start Byte Adr:
              - * Adr:
              - *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
              - *  0   ||    Seed Hash    | Flags  |        |        | FamID  | SerVer |     PreLongs = 3   |
              - *
              - *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
              - *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
              - *
              - *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
              - *  2   ||------------------------------THETA_LONG-------------------------------------------|
              - *
              - *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
              - *  3   ||----------------------Start of Compact Long Array----------------------------------|
              - *  
              - * - *

              The UpdateSketch and AlphaSketch require 24 bytes of preamble followed by a non-compact - * array of longs representing a hash table.

              - * - *

              The following table applies to both the Theta UpdateSketch and the Alpha Sketch

              - *
              - * Long || Start Byte Adr:
              - * Adr:
              - *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
              - *  0   ||    Seed Hash    | Flags  |  LgArr |  lgNom | FamID  | SerVer | RF, PreLongs = 3   |
              - *
              - *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
              - *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
              - *
              - *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
              - *  2   ||------------------------------THETA_LONG-------------------------------------------|
              - *
              - *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
              - *  3   ||----------------------Start of Hash Table of longs---------------------------------|
              - *  
              - * - *

              Union objects require 32 bytes of preamble plus a non-compact array of longs representing a - * hash table.

              - * - *
              - * Long || Start Byte Adr:
              - * Adr:
              - *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
              - *  0   ||    Seed Hash    | Flags  |  LgArr |  lgNom | FamID  | SerVer | RF, PreLongs = 4   |
              - *
              - *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
              - *  1   ||-----------------p-----------------|----------Retained Entries Count---------------|
              - *
              - *      ||   23   |   22   |   21    |  20   |   19   |   18   |   17   |    16              |
              - *  2   ||------------------------------THETA_LONG-------------------------------------------|
              - *
              - *      ||   31   |   30   |   29   |   28   |   27   |   26   |   25   |    24              |
              - *  3   ||---------------------------UNION THETA LONG----------------------------------------|
              - *
              - *      ||   39   |   38   |   37   |   36   |   35   |   34   |   33   |    32              |
              - *  4   ||----------------------Start of Hash Table of longs---------------------------------|
              - *
              - *  
              - * - * @author Lee Rhodes - */ -final class PreambleUtil { - - private PreambleUtil() {} - - // ###### DO NOT MESS WITH THIS FROM HERE ... - // Preamble byte Addresses - static final int PREAMBLE_LONGS_BYTE = 0; //lower 6 bits in byte. - static final int LG_RESIZE_FACTOR_BIT = 6; //upper 2 bits in byte. Not used by compact, direct - static final int SER_VER_BYTE = 1; - static final int FAMILY_BYTE = 2; //SerVer1,2 was SKETCH_TYPE_BYTE - static final int LG_NOM_LONGS_BYTE = 3; //not used by compact - static final int LG_ARR_LONGS_BYTE = 4; //not used by compact - static final int FLAGS_BYTE = 5; - static final int SEED_HASH_SHORT = 6; //byte 6,7 - static final int RETAINED_ENTRIES_INT = 8; //8 byte aligned - static final int P_FLOAT = 12; //4 byte aligned, not used by compact - static final int THETA_LONG = 16; //8-byte aligned - static final int UNION_THETA_LONG = 24; //8-byte aligned, only used by Union - - // flag bit masks - static final int BIG_ENDIAN_FLAG_MASK = 1; //SerVer 1, 2, 3 - static final int READ_ONLY_FLAG_MASK = 2; //Set but not read. Reserved. SerVer 1, 2, 3 - static final int EMPTY_FLAG_MASK = 4; //SerVer 2, 3 - static final int COMPACT_FLAG_MASK = 8; //SerVer 2 was NO_REBUILD_FLAG_MASK, 3 - static final int ORDERED_FLAG_MASK = 16;//SerVer 2 was UNORDERED_FLAG_MASK, 3 - static final int SINGLEITEM_FLAG_MASK = 32;//SerVer 3 - //The last 2 bits of the flags byte are reserved and assumed to be zero, for now. - - //Backward compatibility: SerVer1 preamble always 3 longs, SerVer2 preamble: 1, 2, 3 longs - // SKETCH_TYPE_BYTE 2 //SerVer1, SerVer2 - // V1, V2 types: Alpha = 1, QuickSelect = 2, SetSketch = 3; V3 only: Buffered QS = 4 - static final int LG_RESIZE_RATIO_BYTE_V1 = 5; //used by SerVer 1 - static final int FLAGS_BYTE_V1 = 6; //used by SerVer 1 - - //Other constants - static final int SER_VER = 3; - - // serial version 4 compressed ordered sketch, not empty, not single item - static final int ENTRY_BITS_BYTE_V4 = 3; // number of bits packed in deltas between hashes - static final int NUM_ENTRIES_BYTES_BYTE_V4 = 4; // number of bytes used for the number of entries - static final int THETA_LONG_V4 = 8; //8-byte aligned - - static final boolean NATIVE_ORDER_IS_BIG_ENDIAN = - (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); - - /** - * Computes the number of bytes required for an updatable sketch using a hash-table cache. - * This does not apply for compact sketches. - * @param lgArrLongs log2(current hash-table size) - * @param preambleLongs current preamble size - * @return the size in bytes - */ - static final int getSegBytes(final int lgArrLongs, final int preambleLongs) { - return (8 << lgArrLongs) + (preambleLongs << 3); - } - - // STRINGS - - /** - * Returns a human readable string summary of the preamble state of the given byte array. - * Used primarily in testing. - * - * @param byteArr the given byte array. - * @return the summary preamble string. - */ - static String preambleToString(final byte[] byteArr) { - final MemorySegment seg = MemorySegment.ofArray(byteArr); - return preambleToString(seg); - } - - /** - * Returns a human readable string summary of the preamble state of the given MemorySegment. - * Note: other than making sure that the given MemorySegment size is large - * enough for just the preamble, this does not do much value checking of the contents of the - * preamble as this is primarily a tool for debugging the preamble visually. - * - * @param seg the given MemorySegment. - * @return the summary preamble string. - */ - static String preambleToString(final MemorySegment seg) { - final int preLongs = getAndCheckPreLongs(seg); - final int rfId = extractLgResizeFactor(seg); - final ResizeFactor rf = ResizeFactor.getRF(rfId); - final int serVer = extractSerVer(seg); - final int familyId = extractFamilyID(seg); - final Family family = Family.idToFamily(familyId); - final int lgNomLongs = extractLgNomLongs(seg); - final int lgArrLongs = extractLgArrLongs(seg); - - //Flags - final int flags = extractFlags(seg); - final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " - + zeroPad(Integer.toBinaryString(flags), 8); - final String nativeOrder = ByteOrder.nativeOrder().toString(); - final boolean bigEndian = (flags & BIG_ENDIAN_FLAG_MASK) > 0; - final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) > 0; - final boolean empty = (flags & EMPTY_FLAG_MASK) > 0; - final boolean compact = (flags & COMPACT_FLAG_MASK) > 0; - final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0; - final boolean singleItem = (flags & SINGLEITEM_FLAG_MASK) > 0; //!empty && (preLongs == 1); - - final int seedHash = extractSeedHash(seg); - - //assumes preLongs == 1; empty or singleItem - int curCount = singleItem ? 1 : 0; - float p = (float) 1.0; //preLongs 1 or 2 - long thetaLong = Long.MAX_VALUE; //preLongs 1 or 2 - long thetaULong = thetaLong; //preLongs 1, 2 or 3 - - if (preLongs == 2) { //exact (non-estimating) CompactSketch - curCount = extractCurCount(seg); - p = extractP(seg); - } - else if (preLongs == 3) { //Update Sketch - curCount = extractCurCount(seg); - p = extractP(seg); - thetaLong = extractThetaLong(seg); - thetaULong = thetaLong; - } - else if (preLongs == 4) { //Union - curCount = extractCurCount(seg); - p = extractP(seg); - thetaLong = extractThetaLong(seg); - thetaULong = extractUnionThetaLong(seg); - } - //else the same as an empty sketch or singleItem - - final double thetaDbl = thetaLong / Util.LONG_MAX_VALUE_AS_DOUBLE; - final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16); - final double thetaUDbl = thetaULong / Util.LONG_MAX_VALUE_AS_DOUBLE; - final String thetaUHex = zeroPad(Long.toHexString(thetaULong), 16); - - final StringBuilder sb = new StringBuilder(); - sb.append(LS); - sb.append("### SKETCH PREAMBLE SUMMARY:").append(LS); - sb.append("Native Byte Order : ").append(nativeOrder).append(LS); - sb.append("Byte 0: Preamble Longs : ").append(preLongs).append(LS); - sb.append("Byte 0: ResizeFactor : ").append(rfId + ", " + rf.toString()).append(LS); - sb.append("Byte 1: Serialization Version: ").append(serVer).append(LS); - sb.append("Byte 2: Family : ").append(familyId + ", " + family.toString()).append(LS); - sb.append("Byte 3: LgNomLongs : ").append(lgNomLongs).append(LS); - sb.append("Byte 4: LgArrLongs : ").append(lgArrLongs).append(LS); - sb.append("Byte 5: Flags Field : ").append(flagsStr).append(LS); - sb.append(" Bit Flag Name : State:").append(LS); - sb.append(" 0 BIG_ENDIAN_STORAGE : ").append(bigEndian).append(LS); - sb.append(" 1 READ_ONLY : ").append(readOnly).append(LS); - sb.append(" 2 EMPTY : ").append(empty).append(LS); - sb.append(" 3 COMPACT : ").append(compact).append(LS); - sb.append(" 4 ORDERED : ").append(ordered).append(LS); - sb.append(" 5 SINGLE_ITEM : ").append(singleItem).append(LS); - sb.append("Bytes 6-7 : Seed Hash Hex : ").append(Integer.toHexString(seedHash)).append(LS); - if (preLongs == 1) { - sb.append(" --ABSENT FIELDS, ASSUMED:").append(LS); - sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); - sb.append("Bytes 12-15: P : ").append(p).append(LS); - sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); - sb.append(" Theta (long) : ").append(thetaLong).append(LS); - sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); - } - else if (preLongs == 2) { - sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); - sb.append("Bytes 12-15: P : ").append(p).append(LS); - sb.append(" --ABSENT, ASSUMED:").append(LS); - sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); - sb.append(" Theta (long) : ").append(thetaLong).append(LS); - sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); - } - else if (preLongs == 3) { - sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); - sb.append("Bytes 12-15: P : ").append(p).append(LS); - sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); - sb.append(" Theta (long) : ").append(thetaLong).append(LS); - sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); - } - else { //preLongs == 4 - sb.append("Bytes 8-11 : CurrentCount : ").append(curCount).append(LS); - sb.append("Bytes 12-15: P : ").append(p).append(LS); - sb.append("Bytes 16-23: Theta (double) : ").append(thetaDbl).append(LS); - sb.append(" Theta (long) : ").append(thetaLong).append(LS); - sb.append(" Theta (long,hex) : ").append(thetaHex).append(LS); - sb.append("Bytes 25-31: ThetaU (double) : ").append(thetaUDbl).append(LS); - sb.append(" ThetaU (long) : ").append(thetaULong).append(LS); - sb.append(" ThetaU (long,hex): ").append(thetaUHex).append(LS); - } - sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS); - sb.append( "Data Bytes : ").append(curCount * 8).append(LS); - sb.append( "TOTAL Sketch Bytes : ").append((preLongs + curCount) * 8).append(LS); - sb.append( "TOTAL Capacity Bytes : ").append(seg.byteSize()).append(LS); - sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS); - return sb.toString(); - } - - //@formatter:on - - static int extractPreLongs(final MemorySegment seg) { - return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - } - - static int extractLgResizeFactor(final MemorySegment seg) { - return (seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; - } - - static int extractLgResizeRatioV1(final MemorySegment seg) { - return seg.get(JAVA_BYTE, LG_RESIZE_RATIO_BYTE_V1) & 0X3; - } - - static int extractSerVer(final MemorySegment seg) { - return seg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; - } - - static int extractFamilyID(final MemorySegment seg) { - return seg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; - } - - static int extractLgNomLongs(final MemorySegment seg) { - return seg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; - } - - static int extractLgArrLongs(final MemorySegment seg) { - return seg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - } - - static int extractFlags(final MemorySegment seg) { - return seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; - } - - static int extractFlagsV1(final MemorySegment seg) { - return seg.get(JAVA_BYTE, FLAGS_BYTE_V1) & 0XFF; - } - - static int extractSeedHash(final MemorySegment seg) { - return seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT) & 0XFFFF; - } - - static int extractCurCount(final MemorySegment seg) { - return seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); - } - - static float extractP(final MemorySegment seg) { - return seg.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); - } - - static long extractThetaLong(final MemorySegment seg) { - return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); - } - - static long extractUnionThetaLong(final MemorySegment seg) { - return seg.get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG); - } - - static int extractEntryBitsV4(final MemorySegment seg) { - return seg.get(JAVA_BYTE, ENTRY_BITS_BYTE_V4) & 0XFF; - } - - static int extractNumEntriesBytesV4(final MemorySegment seg) { - return seg.get(JAVA_BYTE, NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF; - } - - static long extractThetaLongV4(final MemorySegment seg) { - return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG_V4); - } - - /** - * Sets PreLongs in the low 6 bits and sets LgRF in the upper 2 bits = 0. - * @param seg the target MemorySegment - * @param preLongs the given number of preamble longs - */ - static void insertPreLongs(final MemorySegment seg, final int preLongs) { - seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); - } - - /** - * Sets the top 2 lgRF bits and does not affect the lower 6 bits (PreLongs). - * To work properly, this should be called after insertPreLongs(). - * @param seg the target MemorySegment - * @param rf the given lgRF bits - */ - static void insertLgResizeFactor(final MemorySegment seg, final int rf) { - final int curByte = seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0xFF; - final int shift = LG_RESIZE_FACTOR_BIT; // shift in bits - final int mask = 3; - final byte newByte = (byte) (((rf & mask) << shift) | (~(mask << shift) & curByte)); - seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, newByte); - } - - static void insertSerVer(final MemorySegment seg, final int serVer) { - seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) serVer); - } - - static void insertFamilyID(final MemorySegment seg, final int famId) { - seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) famId); - } - - static void insertLgNomLongs(final MemorySegment seg, final int lgNomLongs) { - seg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) lgNomLongs); - } - - static void insertLgArrLongs(final MemorySegment seg, final int lgArrLongs) { - seg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs); - } - - static void insertFlags(final MemorySegment seg, final int flags) { - seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); - } - - static void insertSeedHash(final MemorySegment seg, final int seedHash) { - seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, (short) seedHash); - } - - static void insertCurCount(final MemorySegment seg, final int curCount) { - seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); - } - - static void insertP(final MemorySegment seg, final float p) { - seg.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, p); - } - - static void insertThetaLong(final MemorySegment seg, final long thetaLong) { - seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); - } - - static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) { - seg.set(JAVA_LONG_UNALIGNED, UNION_THETA_LONG, unionThetaLong); - } - - static void setEmpty(final MemorySegment seg) { - int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; - flags |= EMPTY_FLAG_MASK; - seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); - } - - static void clearEmpty(final MemorySegment seg) { - int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; - flags &= ~EMPTY_FLAG_MASK; - seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); - } - - static boolean isEmptyFlag(final MemorySegment seg) { - return ((extractFlags(seg) & EMPTY_FLAG_MASK) > 0); - } - - /** - * Checks MemorySegment for capacity to hold the preamble and returns the extracted preLongs. - * @param seg the given MemorySegment - * @return the extracted prelongs value. - */ - static int getAndCheckPreLongs(final MemorySegment seg) { - final long cap = seg.byteSize(); - if (cap < 8) { - throwNotBigEnough(cap, 8); - } - final int preLongs = extractPreLongs(seg); - final int required = Math.max(preLongs << 3, 8); - if (cap < required) { - throwNotBigEnough(cap, required); - } - return preLongs; - } - - static final short checkSegmentSeedHash(final MemorySegment seg, final long seed) { - final short seedHashSeg = (short) extractSeedHash(seg); - Util.checkSeedHashes(seedHashSeg, Util.computeSeedHash(seed)); //throws if bad seedHash - return seedHashSeg; - } - - private static void throwNotBigEnough(final long cap, final int required) { - throw new SketchesArgumentException( - "Possible Corruption: Size of byte array or MemorySegment not large enough: Size: " + cap - + ", Required: " + required); - } - - static int wholeBytesToHoldBits(final int bits) { - return (bits >>> 3) + ((bits & 7) > 0 ? 1 : 0); - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java b/src/main/java/org/apache/datasketches/theta2/Rebuilder.java deleted file mode 100644 index aced2c645..000000000 --- a/src/main/java/org/apache/datasketches/theta2/Rebuilder.java +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; -import static org.apache.datasketches.theta2.PreambleUtil.LG_ARR_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.extractCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.insertCurCount; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgArrLongs; -import static org.apache.datasketches.theta2.PreambleUtil.insertThetaLong; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.HashOperations; - -/** - * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments. - * - *

              NOTE: These operations copy data from the input MemorySegment into local arrays, perform the required operations on the - * arrays, and then copies the result to the destination MemorySegment. Attempting to perform these operations directly on the - * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are - * vectorized at the machine level and are quite fast. Measurements reveal that this is a good tradeoff.

              - * - * @author Lee Rhodes - */ -final class Rebuilder { - - private Rebuilder() {} - - /** - * Rebuild the hashTable in the given MemorySegment at its current size. Changes theta and thus count. - * This assumes a MemorySegment preamble of standard form with correct values of curCount and thetaLong. - * ThetaLong and curCount will change. - * Afterwards, caller must update local class members curCount and thetaLong from MemorySegment. - * - * @param seg the given MemorySegment - * @param preambleLongs size of preamble in longs - * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch - */ - static final void quickSelectAndRebuild(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { - - //Copy data from input segment into local buffer array for QS algorithm - final int lgArrLongs = extractLgArrLongs(seg); - final int arrLongs = 1 << lgArrLongs; - final long[] tmpArr = new long[arrLongs]; - final int preBytes = preambleLongs << 3; - MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, tmpArr, 0, arrLongs); - - //Do the QuickSelect on a tmp arr to create new thetaLong - final int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS - final long newThetaLong = selectExcludingZeros(tmpArr, extractCurCount(seg), pivot); - insertThetaLong(seg, newThetaLong); //UPDATE thetaLong - - //Rebuild to clean up dirty data, update count - final long[] tgtArr = new long[arrLongs]; - final int newCurCount = - HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong); - insertCurCount(seg, newCurCount); //UPDATE curCount - - //put the rebuilt array back into MemorySegment - MemorySegment.copy(tgtArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, arrLongs); - } - - /** - * Moves me (the entire updatable sketch) to a new larger MemorySegment location and rebuilds the hash table. - * This assumes a MemorySegment preamble of standard form with the correct value of thetaLong. - * Afterwards, the caller must update the local MemorySegment reference, lgArrLongs - * and hashTableThreshold from the destination MemorySegment and free the source MemorySegment. - * - * @param srcSeg the source MemorySegment - * @param preambleLongs size of preamble in longs - * @param srcLgArrLongs size (log_base2) of source hash table - * @param dstSeg the destination MemorySegment, which may be garbage - * @param dstLgArrLongs the destination hash table target size - * @param thetaLong theta as a long - */ - static final void moveAndResize(final MemorySegment srcSeg, final int preambleLongs, - final int srcLgArrLongs, final MemorySegment dstSeg, final int dstLgArrLongs, final long thetaLong) { - - //Move Preamble to destination MemorySegment - final int preBytes = preambleLongs << 3; - MemorySegment.copy(srcSeg, 0, dstSeg, 0, preBytes); - - //Bulk copy source Hash Table to local buffer array - final int srcHTLen = 1 << srcLgArrLongs; - final long[] srcHTArr = new long[srcHTLen]; - MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); - - //Create destination buffer - final int dstHTLen = 1 << dstLgArrLongs; - final long[] dstHTArr = new long[dstHTLen]; - - //Rebuild hash table in destination buffer - HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong); - - //Bulk copy to destination MemorySegment - MemorySegment.copy(dstHTArr, 0, dstSeg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); - dstSeg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update lgArrLongs in dstSeg - } - - /** - * Resizes existing hash array into a larger one within a single MemorySegment, assuming enough space. - * This assumes a preamble of standard form with the correct value of thetaLong. - * The lgArrLongs will change. - * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold - * from the returned MemorySegment. - * - * @param seg the source and destination MemorySegment - * @param preambleLongs the size of the preamble in longs - * @param srcLgArrLongs the size of the source hash table - * @param tgtLgArrLongs the LgArrLongs value for the new hash table - */ - static final void resize(final MemorySegment seg, final int preambleLongs, - final int srcLgArrLongs, final int tgtLgArrLongs) { - - //Preamble stays in place - final int preBytes = preambleLongs << 3; - - //Bulk copy source to on-heap buffer - final int srcHTLen = 1 << srcLgArrLongs; //current value - final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer - //seg.getLongArray(preBytes, srcHTArr, 0, srcHTLen); - MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); - - //Create destination on-heap buffer - final int dstHTLen = 1 << tgtLgArrLongs; - final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer - - //Rebuild hash table in destination buffer - HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, extractThetaLong(seg)); - - //Bulk copy to destination segment - MemorySegment.copy(dstHTArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); - insertLgArrLongs(seg, tgtLgArrLongs); //update in mem - } - - /** - * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be - * an integer value between zero and the given lgRF, inclusive; - * @param capBytes the current MemorySegment capacity in bytes - * @param lgArrLongs the current lg hash table size in longs - * @param preLongs the current preamble size in longs - * @param lgRF the configured lg Resize Factor - * @return the actual log2 Resize Factor that can be used to grow the hash table - */ - static final int actLgResizeFactor(final long capBytes, final int lgArrLongs, final int preLongs, - final int lgRF) { - final int maxHTLongs = Util.floorPowerOf2(((int)(capBytes >>> 3) - preLongs)); - final int lgFactor = Math.max(Integer.numberOfTrailingZeros(maxHTLongs) - lgArrLongs, 0); - return (lgFactor >= lgRF) ? lgRF : lgFactor; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperation.java b/src/main/java/org/apache/datasketches/theta2/SetOperation.java deleted file mode 100644 index e8ed24fd1..000000000 --- a/src/main/java/org/apache/datasketches/theta2/SetOperation.java +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.common.Family.idToFamily; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.MemorySegmentStatus; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -/** - * The parent API for all Set Operations - * - * @author Lee Rhodes - */ -public abstract class SetOperation implements MemorySegmentStatus { - static final int CONST_PREAMBLE_LONGS = 3; - - /** - * Constructor - */ - SetOperation() {} - - /** - * Makes a new builder - * - * @return a new builder - */ - public static final SetOperationBuilder builder() { - return new SetOperationBuilder(); - } - - /** - * Heapify takes the SetOperations image in MemorySegment and instantiates an on-heap - * SetOperation using the - * Default Update Seed. - * The resulting SetOperation will not retain any link to the source MemorySegment. - * - *

              Note: Only certain set operators during stateful operations can be serialized and thus - * heapified.

              - * - * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. - * @return a Heap-based SetOperation from the given MemorySegment - */ - public static SetOperation heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify takes the SetOperation image in MemorySegment and instantiates an on-heap - * SetOperation using the given expectedSeed. - * The resulting SetOperation will not retain any link to the source MemorySegment. - * - *

              Note: Only certain set operators during stateful operations can be serialized and thus - * heapified.

              - * - * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a Heap-based SetOperation from the given MemorySegment - */ - public static SetOperation heapify(final MemorySegment srcSeg, final long expectedSeed) { - final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); - final Family family = idToFamily(famID); - switch (family) { - case UNION : { - return UnionImpl.heapifyInstance(srcSeg, expectedSeed); - } - case INTERSECTION : { - return IntersectionImpl.heapifyInstance(srcSeg, expectedSeed); - } - default: { - throw new SketchesArgumentException("SetOperation cannot heapify family: " - + family.toString()); - } - } - } - - /** - * Wrap takes the SetOperation image in MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * This method assumes the Default Update Seed. - * If the given source MemorySegment is read-only, the returned object will also be read-only. - * - *

              Note: Only certain set operators during stateful operations can be serialized and thus wrapped.

              - * - * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. - * @return a SetOperation backed by the given MemorySegment - */ - public static SetOperation wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap takes the SetOperation image in MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * If the given source MemorySegment is read-only, the returned object will also be read-only. - * - *

              Note: Only certain set operators during stateful operations can be serialized and thus wrapped.

              - * - * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a SetOperation backed by the given MemorySegment - */ - public static SetOperation wrap(final MemorySegment srcSeg, final long expectedSeed) { - final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); - final Family family = idToFamily(famID); - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); - if (serVer != 3) { - throw new SketchesArgumentException("SerVer must be 3: " + serVer); - } - switch (family) { - case UNION : { - return UnionImpl.wrapInstance(srcSeg, expectedSeed); - } - case INTERSECTION : { - return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); - } - default: - throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString()); - } - } - - /** - * Returns the maximum required storage bytes given a nomEntries parameter for Union operations - * @param nomEntries Nominal Entries - * This will become the ceiling power of 2 if it is not. - * @return the maximum required storage bytes given a nomEntries parameter - */ - public static int getMaxUnionBytes(final int nomEntries) { - final int nomEnt = ceilingPowerOf2(nomEntries); - return (nomEnt << 4) + (Family.UNION.getMaxPreLongs() << 3); - } - - /** - * Returns the maximum required storage bytes given a nomEntries parameter for Intersection - * operations - * @param nomEntries Nominal Entries - * This will become the ceiling power of 2 if it is not. - * @return the maximum required storage bytes given a nomEntries parameter - */ - public static int getMaxIntersectionBytes(final int nomEntries) { - final int nomEnt = ceilingPowerOf2(nomEntries); - final int bytes = (nomEnt << 4) + (Family.INTERSECTION.getMaxPreLongs() << 3); - return bytes; - } - - /** - * Returns the maximum number of bytes for the returned CompactSketch, given the - * value of nomEntries of the first sketch A of AnotB. - * @param nomEntries this value must be a power of 2. - * @return the maximum number of bytes. - */ - public static int getMaxAnotBResultBytes(final int nomEntries) { - final int ceil = ceilingPowerOf2(nomEntries); - return 24 + (15 * ceil); - } - - /** - * Gets the Family of this SetOperation - * @return the Family of this SetOperation - */ - public abstract Family getFamily(); - - //restricted - - /** - * Gets the hash array in compact form. - * This is only useful during stateful operations. - * This should never be made public. - * @return the hash array - */ - abstract long[] getCache(); - - /** - * Returns the backing MemorySegment object if it exists, otherwise null. - * @return the backing MemorySegment object if it exists, otherwise null. - */ - MemorySegment getMemorySegment() { return null; } - - /** - * Gets the current count of retained entries. - * This is only useful during stateful operations. - * Intentionally not made public because behavior will be confusing to end user. - * - * @return Gets the current count of retained entries. - */ - abstract int getRetainedEntries(); - - /** - * Returns the seedHash established during class construction. - * @return the seedHash. - */ - abstract short getSeedHash(); - - /** - * Gets the current value of ThetaLong. - * Only useful during stateful operations. - * Intentionally not made public because behavior will be confusing to end user. - * @return the current value of ThetaLong. - */ - abstract long getThetaLong(); - - @Override - public abstract boolean hasMemorySegment(); - - @Override - public abstract boolean isDirect(); - - /** - * Returns true if this set operator is empty. - * Only useful during stateful operations. - * Intentionally not made public because behavior will be confusing to end user. - * @return true if this set operator is empty. - */ - abstract boolean isEmpty(); - - @Override - public abstract boolean isSameResource(final MemorySegment seg); - -} diff --git a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java deleted file mode 100644 index f56163951..000000000 --- a/src/main/java/org/apache/datasketches/theta2/SetOperationBuilder.java +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.common.Util.LS; -import static org.apache.datasketches.common.Util.TAB; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * For building a new SetOperation. - * - * @author Lee Rhodes - */ -public final class SetOperationBuilder { - private int bLgNomLongs; - private long bSeed; - private ResizeFactor bRF; - private float bP; - - /** - * Constructor for building a new SetOperation. The default configuration is - *
                - *
              • Max Nominal Entries (max K): - * {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
              • - *
              • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
              • - *
              • {@link ResizeFactor#X8}
              • - *
              • Input Sampling Probability: 1.0
              • - *
              • MemorySegment: null
              • - *
              - */ - public SetOperationBuilder() { - bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); - bSeed = Util.DEFAULT_UPDATE_SEED; - bP = (float) 1.0; - bRF = ResizeFactor.X8; - } - - /** - * Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a - * Set Operation can be less than max K, but never greater. - * The minimum value is 16 and the maximum value is 67,108,864, which is 2^26. - * @param nomEntries Nominal Entries - * This will become the ceiling power of 2 if it is not a power of 2. - * @return this SetOperationBuilder - */ - public SetOperationBuilder setNominalEntries(final int nomEntries) { - bLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries)); - if ((bLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) { - throw new SketchesArgumentException("Nominal Entries must be >= 16 and <= 67108864: " - + nomEntries); - } - return this; - } - - /** - * Alternative method of setting the Nominal Entries for this set operation from the log_base2 value. - * The minimum value is 4 and the maximum value is 26. - * Be aware that set operations as large as this maximum value may not have been - * thoroughly characterized for performance. - * - * @param lgNomEntries the log_base2 Nominal Entries. - * @return this SetOperationBuilder - */ - public SetOperationBuilder setLogNominalEntries(final int lgNomEntries) { - bLgNomLongs = ThetaUtil.checkNomLongs(1 << lgNomEntries); - return this; - } - - /** - * Returns Log-base 2 Nominal Entries - * @return Log-base 2 Nominal Entries - */ - public int getLgNominalEntries() { - return bLgNomLongs; - } - - /** - * Sets the long seed value that is require by the hashing function. - * @param seed See seed - * @return this SetOperationBuilder - */ - public SetOperationBuilder setSeed(final long seed) { - bSeed = seed; - return this; - } - - /** - * Returns the seed - * @return the seed - */ - public long getSeed() { - return bSeed; - } - - /** - * Sets the upfront uniform sampling probability, p. Although this functionality is - * implemented for Unions only, it rarely makes sense to use it. The proper use of upfront - * sampling is when building the sketches. - * @param p See Sampling Probability, p - * @return this SetOperationBuilder - */ - public SetOperationBuilder setP(final float p) { - if ((p <= 0.0) || (p > 1.0)) { - throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p); - } - bP = p; - return this; - } - - /** - * Returns the pre-sampling probability p - * @return the pre-sampling probability p - */ - public float getP() { - return bP; - } - - /** - * Sets the cache Resize Factor - * @param rf See Resize Factor - * @return this SetOperationBuilder - */ - public SetOperationBuilder setResizeFactor(final ResizeFactor rf) { - bRF = rf; - return this; - } - - /** - * Returns the Resize Factor - * @return the Resize Factor - */ - public ResizeFactor getResizeFactor() { - return bRF; - } - - /** - * Returns a SetOperation with the current configuration of this Builder and the given Family. - * @param family the chosen SetOperation family - * @return a SetOperation - */ - public SetOperation build(final Family family) { - return build(family, null); - } - - /** - * Returns a SetOperation with the current configuration of this Builder, the given Family - * and the given destination MemorySegment. Note that the destination MemorySegment cannot be used with AnotB. - * @param family the chosen SetOperation family - * @param dstSeg The destination MemorySegment. - * @return a SetOperation - */ - public SetOperation build(final Family family, final MemorySegment dstSeg) { - SetOperation setOp = null; - switch (family) { - case UNION: { - if (dstSeg == null) { - setOp = UnionImpl.initNewHeapInstance(bLgNomLongs, bSeed, bP, bRF); - } - else { - setOp = UnionImpl.initNewDirectInstance(bLgNomLongs, bSeed, bP, bRF, dstSeg); - } - break; - } - case INTERSECTION: { - if (dstSeg == null) { - setOp = IntersectionImpl.initNewHeapInstance(bSeed); - } - else { - setOp = IntersectionImpl.initNewDirectInstance(bSeed, dstSeg); - } - break; - } - case A_NOT_B: { - if (dstSeg == null) { - setOp = new AnotBimpl(bSeed); - } - else { - throw new SketchesArgumentException( - "AnotB can not be persisted."); - } - break; - } - default: - throw new SketchesArgumentException( - "Given Family cannot be built as a SetOperation: " + family.toString()); - } - return setOp; - } - - /** - * Convenience method, returns a configured SetOperation Union with - * Default Nominal Entries - * @return a Union object - */ - public Union buildUnion() { - return (Union) build(Family.UNION); - } - - /** - * Convenience method, returns a configured SetOperation Union with - * Default Nominal Entries - * and the given destination MemorySegment. - * @param dstSeg The destination MemorySegment. - * @return a Union object - */ - public Union buildUnion(final MemorySegment dstSeg) { - return (Union) build(Family.UNION, dstSeg); - } - - /** - * Convenience method, returns a configured SetOperation Intersection with - * Default Nominal Entries - * @return an Intersection object - */ - public Intersection buildIntersection() { - return (Intersection) build(Family.INTERSECTION); - } - - /** - * Convenience method, returns a configured SetOperation Intersection with - * Default Nominal Entries - * and the given destination MemorySegment. - * @param dstSeg The destination MemorySegment. - * @return an Intersection object - */ - public Intersection buildIntersection(final MemorySegment dstSeg) { - return (Intersection) build(Family.INTERSECTION, dstSeg); - } - - /** - * Convenience method, returns a configured SetOperation ANotB with - * Default Update Seed - * @return an ANotB object - */ - public AnotB buildANotB() { - return (AnotB) build(Family.A_NOT_B); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("SetOperationBuilder configuration:").append(LS); - sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS); - sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS); - sb.append("Seed:").append(TAB).append(bSeed).append(LS); - sb.append("p:").append(TAB).append(bP).append(LS); - sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java deleted file mode 100644 index 7ec932b1f..000000000 --- a/src/main/java/org/apache/datasketches/theta2/SingleItemSketch.java +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.datasketches.common.ByteArrayUtil.putLongLE; -import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.apache.datasketches.theta2.PreambleUtil.SINGLEITEM_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.extractSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -/** - * A CompactSketch that holds only one item hash. - * - * @author Lee Rhodes - */ -final class SingleItemSketch extends CompactSketch { - private static final long DEFAULT_SEED_HASH = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED) & 0xFFFFL; - - // For backward compatibility, a candidate pre0_ long must have: - // Flags (byte 5): Ordered, Compact, NOT Empty, Read Only, LittleEndian = 11010 = 0x1A. - // Flags mask will be 0x1F. - // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now. - // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3, - // and the hash seed matches, it is virtually guaranteed that we have a SingleItem Sketch. - - private static final long PRE0_LO6_SI = 0X00_00_3A_00_00_03_03_01L; //with SI flag - private long pre0_ = 0; - private long hash_ = 0; - - //Internal Constructor. All checking & hashing has been done, assumes default seed - private SingleItemSketch(final long hash) { - pre0_ = (DEFAULT_SEED_HASH << 48) | PRE0_LO6_SI; - hash_ = hash; - } - - //All checking & hashing has been done, given the relevant seed - SingleItemSketch(final long hash, final long seed) { - final long seedHash = Util.computeSeedHash(seed) & 0xFFFFL; - pre0_ = (seedHash << 48) | PRE0_LO6_SI; - hash_ = hash; - } - - //All checking & hashing has been done, given the relevant seedHash - SingleItemSketch(final long hash, final short seedHash) { - final long seedH = seedHash & 0xFFFFL; - pre0_ = (seedH << 48) | PRE0_LO6_SI; - hash_ = hash; - } - - /** - * Creates a SingleItemSketch on the heap given a SingleItemSketch MemorySegment image and a seedHash. - * Checks the seed hash of the given MemorySegment against the given seedHash. - * @param srcSeg the MemorySegment to be heapified. - * @param expectedSeedHash the given seedHash to be checked against the srcSeg seedHash - * @return a SingleItemSketch - */ //does not override Sketch - static SingleItemSketch heapify(final MemorySegment srcSeg, final short expectedSeedHash) { - Util.checkSeedHashes((short) extractSeedHash(srcSeg), expectedSeedHash); - final boolean singleItem = otherCheckForSingleItem(srcSeg); - if (singleItem) { return new SingleItemSketch(srcSeg.get(JAVA_LONG_UNALIGNED, 8), expectedSeedHash); } - throw new SketchesArgumentException("Input MemorySegment is not a SingleItemSketch."); - } - - @Override - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - if (dstSeg == null) { return this; } - else { - dstSeg.set(JAVA_LONG_UNALIGNED, 0, pre0_); - dstSeg.set(JAVA_LONG_UNALIGNED, 8, hash_); - return new DirectCompactSketch(dstSeg); - } - } - - //Create methods using the default seed - - /** - * Create this sketch with a long. - * - * @param datum The given long datum. - * @return a SingleItemSketch - */ - static SingleItemSketch create(final long datum) { - final long[] data = { datum }; - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - /** - * Create this sketch with the given double (or float) datum. - * The double will be converted to a long using Double.doubleToLongBits(datum), - * which normalizes all NaN values to a single NaN representation. - * Plus and minus zero will be normalized to plus zero. - * The special floating-point values NaN and +/- Infinity are treated as distinct. - * - * @param datum The given double datum. - * @return a SingleItemSketch - */ - static SingleItemSketch create(final double datum) { - final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 - final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - /** - * Create this sketch with the given String. - * The string is converted to a byte array using UTF8 encoding. - * If the string is null or empty no create attempt is made and the method returns null. - * - *

              Note: this will not produce the same hash values as the {@link #create(char[])} - * method and will generally be a little slower depending on the complexity of the UTF8 encoding. - *

              - * - * @param datum The given String. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final String datum) { - if ((datum == null) || datum.isEmpty()) { return null; } - final byte[] data = datum.getBytes(UTF_8); - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - /** - * Create this sketch with the given byte array. - * If the byte array is null or empty no create attempt is made and the method returns null. - * - * @param data The given byte array. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final byte[] data) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - /** - * Create this sketch with the given char array. - * If the char array is null or empty no create attempt is made and the method returns null. - * - *

              Note: this will not produce the same output hash values as the {@link #create(String)} - * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

              - * - * @param data The given char array. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final char[] data) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - /** - * Create this sketch with the given integer array. - * If the integer array is null or empty no create attempt is made and the method returns null. - * - * @param data The given int array. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final int[] data) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - /** - * Create this sketch with the given long array. - * If the long array is null or empty no create attempt is made and the method returns null. - * - * @param data The given long array. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final long[] data) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1); - } - - //Updates with a user specified seed - - /** - * Create this sketch with a long and a seed. - * - * @param datum The given long datum. - * @param seed used to hash the given value. - * @return a SingleItemSketch - */ - static SingleItemSketch create(final long datum, final long seed) { - final long[] data = { datum }; - return new SingleItemSketch(hash(data, seed)[0] >>> 1); - } - - /** - * Create this sketch with the given double (or float) datum and a seed. - * The double will be converted to a long using Double.doubleToLongBits(datum), - * which normalizes all NaN values to a single NaN representation. - * Plus and minus zero will be normalized to plus zero. - * The special floating-point values NaN and +/- Infinity are treated as distinct. - * - * @param datum The given double datum. - * @param seed used to hash the given value. - * @return a SingleItemSketch - */ - static SingleItemSketch create(final double datum, final long seed) { - final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 - final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN forms - return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); - } - - /** - * Create this sketch with the given String and a seed. - * The string is converted to a byte array using UTF8 encoding. - * If the string is null or empty no create attempt is made and the method returns null. - * - *

              Note: this will not produce the same output hash values as the {@link #create(char[])} - * method and will generally be a little slower depending on the complexity of the UTF8 encoding. - *

              - * - * @param datum The given String. - * @param seed used to hash the given value. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final String datum, final long seed) { - if ((datum == null) || datum.isEmpty()) { return null; } - final byte[] data = datum.getBytes(UTF_8); - return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); - } - - /** - * Create this sketch with the given byte array and a seed. - * If the byte array is null or empty no create attempt is made and the method returns null. - * - * @param data The given byte array. - * @param seed used to hash the given value. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final byte[] data, final long seed) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); - } - - /** - * Create this sketch with the given char array and a seed. - * If the char array is null or empty no create attempt is made and the method returns null. - * - *

              Note: this will not produce the same output hash values as the {@link #create(String)} - * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

              - * - * @param data The given char array. - * @param seed used to hash the given value. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final char[] data, final long seed) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); - } - - /** - * Create this sketch with the given integer array and a seed. - * If the integer array is null or empty no create attempt is made and the method returns null. - * - * @param data The given int array. - * @param seed used to hash the given value. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final int[] data, final long seed) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); - } - - /** - * Create this sketch with the given long array (as an item) and a seed. - * If the long array is null or empty no create attempt is made and the method returns null. - * - * @param data The given long array. - * @param seed used to hash the given value. - * @return a SingleItemSketch or null - */ - static SingleItemSketch create(final long[] data, final long seed) { - if ((data == null) || (data.length == 0)) { return null; } - return new SingleItemSketch(hash(data, seed)[0] >>> 1, seed); - } - - //Sketch - - @Override //much faster - public int getCountLessThanThetaLong(final long thetaLong) { - return (hash_ < thetaLong) ? 1 : 0; - } - - @Override - public int getCurrentBytes() { - return 16; - } - - @Override - public double getEstimate() { - return 1.0; - } - - @Override - public HashIterator iterator() { - return new HeapCompactHashIterator(new long[] { hash_ }); - } - - @Override - public double getLowerBound(final int numStdDev) { - return 1.0; - } - - @Override - public int getRetainedEntries(final boolean valid) { - return 1; - } - - @Override - public long getThetaLong() { - return Long.MAX_VALUE; - } - - @Override - public double getUpperBound(final int numStdDev) { - return 1.0; - } - - @Override - public boolean isEmpty() { - return false; - } - - @Override - public boolean isOrdered() { - return true; - } - - @Override - public byte[] toByteArray() { - final byte[] out = new byte[16]; - putLongLE(out, 0, pre0_); - putLongLE(out, 8, hash_); - return out; - } - - //restricted methods - - @Override - long[] getCache() { - return new long[] { hash_ }; - } - - @Override - int getCompactPreambleLongs() { - return 1; - } - - @Override - int getCurrentPreambleLongs() { - return 1; - } - - @Override - short getSeedHash() { - return (short) (pre0_ >>> 48); - } - - static final boolean otherCheckForSingleItem(final MemorySegment seg) { - return otherCheckForSingleItem(extractPreLongs(seg), extractSerVer(seg), - extractFamilyID(seg), extractFlags(seg) ); - } - - static final boolean otherCheckForSingleItem(final int preLongs, final int serVer, - final int famId, final int flags) { - // Flags byte: SI=X, Ordered=T, Compact=T, Empty=F, ReadOnly=T, BigEndian=F = X11010 = 0x1A. - // Flags mask will be 0x1F. - // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now. - // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3, - // and the hash seed matches (not done here), it is virtually guaranteed that we have a - // SingleItem Sketch. - final boolean numPreLongs = preLongs == 1; - final boolean numSerVer = serVer >= 3; - final boolean numFamId = famId == Family.COMPACT.getID(); - final boolean numFlags = (flags & 0x1F) == 0x1A; //no SI, yet - final boolean singleFlag = (flags & SINGLEITEM_FLAG_MASK) > 0; - return (numPreLongs && numSerVer && numFamId && numFlags) || singleFlag; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/Sketch.java b/src/main/java/org/apache/datasketches/theta2/Sketch.java deleted file mode 100644 index 7789a2a08..000000000 --- a/src/main/java/org/apache/datasketches/theta2/Sketch.java +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.common.Family.idToFamily; -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.common.Util.LS; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.common.Util.zeroPad; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.thetacommon2.HashOperations.count; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.MemorySegmentStatus; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.BinomialBoundsN; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * The top-level class for all theta sketches. This class is never constructed directly. - * Use the UpdateSketch.builder() methods to create UpdateSketches. - * - * @author Lee Rhodes - */ -public abstract class Sketch implements MemorySegmentStatus { - - Sketch() {} - - //public static factory constructor-type methods - - /** - * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. - * - *

              The resulting sketch will not retain any link to the source MemorySegment.

              - * - *

              For Update Sketches this method checks if the - * Default Update Seed

              - * was used to create the source MemorySegment image. - * - *

              For Compact Sketches this method assumes that the sketch image was created with the - * correct hash seed, so it is not checked.

              - * - * @param srcSeg an image of a Sketch. - * - * @return a Sketch on the heap. - */ - public static Sketch heapify(final MemorySegment srcSeg) { - final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); - final Family family = idToFamily(familyID); - if (family == Family.COMPACT) { - return CompactSketch.heapify(srcSeg); - } - return heapifyUpdateFromMemorySegment(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. - * - *

              The resulting sketch will not retain any link to the source MemorySegment.

              - * - *

              For Update and Compact Sketches this method checks if the given expectedSeed was used to - * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

              - * - * @param srcSeg an image of a Sketch that was created using the given expectedSeed. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * Compact sketches store a 16-bit hash of the seed, but not the seed itself. - * @return a Sketch on the heap. - */ - public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed) { - final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); - final Family family = idToFamily(familyID); - if (family == Family.COMPACT) { - return CompactSketch.heapify(srcSeg, expectedSeed); - } - return heapifyUpdateFromMemorySegment(srcSeg, expectedSeed); - } - - /** - * Wrap takes the sketch image in the given MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * The wrap operation enables fast read-only merging and access to all the public read-only API. - * - *

              Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct sketches can be wrapped. - * Wrapping earlier serial version sketches will result in a on-heap CompactSketch - * where all data will be copied to the heap. These early versions were never designed to - * "wrap".

              - * - *

              Wrapping any subclass of this class that is empty or contains only a single item will - * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall space.

              - * - *

              For Update Sketches this method checks if the - * Default Update Seed

              - * was used to create the source MemorySegment image. - * - *

              For Compact Sketches this method assumes that the sketch image was created with the - * correct hash seed, so it is not checked.

              - * - * @param srcSeg an image of a Sketch. - * @return a Sketch backed by the given MemorySegment - */ - public static Sketch wrap(final MemorySegment srcSeg) { - final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; - final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; - final Family family = Family.idToFamily(familyID); - if (family == Family.QUICKSELECT) { - if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); - } else { - throw new SketchesArgumentException( - "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); - } - } - if (family == Family.COMPACT) { - return CompactSketch.wrap(srcSeg); - } - throw new SketchesArgumentException( - "Cannot wrap family: " + family + " as a Sketch"); - } - - /** - * Wrap takes the sketch image in the given MemorySegment and refers to it directly. - * There is no data copying onto the java heap. - * The wrap operation enables fast read-only merging and access to all the public read-only API. - * - *

              Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct sketches can be wrapped. - * Wrapping earlier serial version sketches will result in a on-heap CompactSketch - * where all data will be copied to the heap. These early versions were never designed to - * "wrap".

              - * - *

              Wrapping any subclass of this class that is empty or contains only a single item will - * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall space.

              - * - *

              For Update and Compact Sketches this method checks if the given expectedSeed was used to - * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked.

              - * - * @param srcSeg a MemorySegment with an image of a Sketch. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a UpdateSketch backed by the given MemorySegment except as above. - */ - public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) { - final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; - final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; - final Family family = Family.idToFamily(familyID); - if (family == Family.QUICKSELECT) { - if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed); - } else { - throw new SketchesArgumentException( - "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); - } - } - if (family == Family.COMPACT) { - return CompactSketch.wrap(srcSeg, expectedSeed); - } - throw new SketchesArgumentException( - "Cannot wrap family: " + family + " as a Sketch"); - } - - //Sketch interface - - /** - * Converts this sketch to a ordered CompactSketch. - * - *

              If this.isCompact() == true this method returns this, - * otherwise, this method is equivalent to - * {@link #compact(boolean, MemorySegment) compact(true, null)}. - * - *

              A CompactSketch is always immutable.

              - * - * @return this sketch as an ordered CompactSketch. - */ - public CompactSketch compact() { - return (this.isCompact()) ? (CompactSketch)this : compact(true, null); - } - - /** - * Convert this sketch to a CompactSketch. - * - *

              If this sketch is a type of UpdateSketch, the compacting process converts the hash table - * of the UpdateSketch to a simple list of the valid hash values. - * Any hash values of zero or equal-to or greater than theta will be discarded. - * The number of valid values remaining in the CompactSketch depends on a number of factors, - * but may be larger or smaller than Nominal Entries (or k). - * It will never exceed 2k. - * If it is critical to always limit the size to no more than k, - * then rebuild() should be called on the UpdateSketch prior to calling this method.

              - * - *

              A CompactSketch is always immutable.

              - * - *

              A new CompactSketch object is created:

              - *
              • if dstSeg!= null
              • - *
              • if dstSeg == null and this.hasMemorySegment() == true
              • - *
              • if dstSeg == null and this has more than 1 item and this.isOrdered() == false - * and dstOrdered == true.
              • - *
              - * - *

              Otherwise, this operation returns this.

              - * - * @param dstOrdered assumed true if this sketch is empty or has only one value - * See Destination Ordered - * - * @param dstSeg - * See Destination MemorySegment. - * - * @return this sketch as a CompactSketch. - */ - public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); - - /** - * Returns the number of storage bytes required for this Sketch if its current state were - * compacted. It this sketch is already in the compact form this is equivalent to - * calling {@link #getCurrentBytes()}. - * @return number of compact bytes - */ - public abstract int getCompactBytes(); - - /** - * Gets the number of hash values less than the given theta expressed as a long. - * @param thetaLong the given theta as a long between zero and Long.MAX_VALUE. - * @return the number of hash values less than the given thetaLong. - */ - public int getCountLessThanThetaLong(final long thetaLong) { - return count(getCache(), thetaLong); - } - - /** - * Returns the number of storage bytes required for this sketch in its current state. - * - * @return the number of storage bytes required for this sketch - */ - public abstract int getCurrentBytes(); - - /** - * Gets the unique count estimate. - * @return the sketch's best estimate of the cardinality of the input stream. - */ - public abstract double getEstimate(); - - /** - * Returns the Family that this sketch belongs to - * @return the Family that this sketch belongs to - */ - public abstract Family getFamily(); - - /** - * Gets the approximate lower error bound given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @return the lower bound. - */ - public double getLowerBound(final int numStdDev) { - return isEstimationMode() - ? lowerBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty()) - : getRetainedEntries(true); - } - - /** - * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. - * @param numberOfEntries the actual number of retained entries stored in the sketch. - * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of retained entries. - */ - public static int getMaxCompactSketchBytes(final int numberOfEntries) { - if (numberOfEntries == 0) { return 8; } - if (numberOfEntries == 1) { return 16; } - return (numberOfEntries << 3) + 24; - } - - /** - * Returns the maximum number of storage bytes required for a CompactSketch given the configured - * log_base2 of the number of nominal entries, which is a power of 2. - * @param lgNomEntries Nominal Entries - * @return the maximum number of storage bytes required for a CompactSketch with the given - * lgNomEntries. - */ - public static int getCompactSketchMaxBytes(final int lgNomEntries) { - return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD - + Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES; - } - - /** - * Returns the maximum number of storage bytes required for an UpdateSketch with the given - * number of nominal entries (power of 2). - * @param nomEntries Nominal Entries - * This will become the ceiling power of 2 if it is not. - * @return the maximum number of storage bytes required for a UpdateSketch with the given - * nomEntries - */ - public static int getMaxUpdateSketchBytes(final int nomEntries) { - final int nomEnt = ceilingPowerOf2(nomEntries); - return (nomEnt << 4) + (Family.QUICKSELECT.getMaxPreLongs() << 3); - } - - /** - * Returns the number of valid entries that have been retained by the sketch. - * @return the number of valid retained entries - */ - public int getRetainedEntries() { - return getRetainedEntries(true); - } - - /** - * Returns the number of entries that have been retained by the sketch. - * @param valid if true, returns the number of valid entries, which are less than theta and used - * for estimation. - * Otherwise, return the number of all entries, valid or not, that are currently in the internal - * sketch cache. - * @return the number of retained entries - */ - public abstract int getRetainedEntries(boolean valid); - - /** - * Returns the serialization version from the given MemorySegment - * @param seg the sketch MemorySegment - * @return the serialization version from the MemorySegment - */ - public static int getSerializationVersion(final MemorySegment seg) { - return seg.get(JAVA_BYTE, SER_VER_BYTE); - } - - /** - * Gets the value of theta as a double with a value between zero and one - * @return the value of theta as a double - */ - public double getTheta() { - return getThetaLong() / LONG_MAX_VALUE_AS_DOUBLE; - } - - /** - * Gets the value of theta as a long - * @return the value of theta as a long - */ - public abstract long getThetaLong(); - - /** - * Gets the approximate upper error bound given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @return the upper bound. - */ - public double getUpperBound(final int numStdDev) { - return isEstimationMode() - ? upperBound(getRetainedEntries(true), getThetaLong(), numStdDev, isEmpty()) - : getRetainedEntries(true); - } - - /** - * Returns true if this sketch is in compact form. - * @return true if this sketch is in compact form. - */ - public abstract boolean isCompact(); - - /** - * See Empty - * @return true if empty. - */ - public abstract boolean isEmpty(); - - /** - * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode). - * This is true if theta < 1.0 AND isEmpty() is false. - * @return true if the sketch is in estimation mode. - */ - public boolean isEstimationMode() { - return estMode(getThetaLong(), isEmpty()); - } - - /** - * Returns true if internal cache is ordered - * @return true if internal cache is ordered - */ - public abstract boolean isOrdered(); - - /** - * Returns a HashIterator that can be used to iterate over the retained hash values of the - * Theta sketch. - * @return a HashIterator that can be used to iterate over the retained hash values of the - * Theta sketch. - */ - public abstract HashIterator iterator(); - - /** - * Serialize this sketch to a byte array form. - * @return byte array of this sketch - */ - public abstract byte[] toByteArray(); - - /** - * Returns a human readable summary of the sketch. This method is equivalent to the parameterized - * call:
              - * Sketch.toString(sketch, true, false, 8, true); - * @return summary - */ - @Override - public String toString() { - return toString(true, false, 8, true); - } - - /** - * Gets a human readable listing of contents and summary of the given sketch. - * This can be a very long string. If this sketch is in a "dirty" state there - * may be values in the dataDetail view that are ≥ theta. - * - * @param sketchSummary If true the sketch summary will be output at the end. - * @param dataDetail If true, includes all valid hash values in the sketch. - * @param width The number of columns of hash values. Default is 8. - * @param hexMode If true, hashes will be output in hex. - * @return The result string, which can be very long. - */ - public String toString(final boolean sketchSummary, final boolean dataDetail, final int width, - final boolean hexMode) { - final StringBuilder sb = new StringBuilder(); - - int nomLongs = 0; - int arrLongs = 0; - float p = 0; - int rf = 0; - final boolean updateSketch = this instanceof UpdateSketch; - - final long thetaLong = getThetaLong(); - final int curCount = this.getRetainedEntries(true); - - if (updateSketch) { - final UpdateSketch uis = (UpdateSketch)this; - nomLongs = 1 << uis.getLgNomLongs(); - arrLongs = 1 << uis.getLgArrLongs(); - p = uis.getP(); - rf = uis.getResizeFactor().getValue(); - } - - if (dataDetail) { - final int w = width > 0 ? width : 8; // default is 8 wide - if (curCount > 0) { - sb.append("### SKETCH DATA DETAIL"); - final HashIterator it = iterator(); - int j = 0; - while (it.next()) { - final long h = it.get(); - if (j % w == 0) { - sb.append(LS).append(String.format(" %6d", j + 1)); - } - if (hexMode) { - sb.append(" " + zeroPad(Long.toHexString(h), 16) + ","); - } - else { - sb.append(String.format(" %20d,", h)); - } - j++ ; - } - sb.append(LS).append("### END DATA DETAIL").append(LS + LS); - } - } - - if (sketchSummary) { - final double thetaDbl = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; - final String thetaHex = zeroPad(Long.toHexString(thetaLong), 16); - final String thisSimpleName = this.getClass().getSimpleName(); - final int seedHash = Short.toUnsignedInt(getSeedHash()); - - sb.append(LS); - sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS); - if (updateSketch) { - sb.append(" Nominal Entries (k) : ").append(nomLongs).append(LS); - } - sb.append(" Estimate : ").append(getEstimate()).append(LS); - sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); - sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); - if (updateSketch) { - sb.append(" p : ").append(p).append(LS); - } - sb.append(" Theta (double) : ").append(thetaDbl).append(LS); - sb.append(" Theta (long) : ").append(thetaLong).append(LS); - sb.append(" Theta (long) hex : ").append(thetaHex).append(LS); - sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); - sb.append(" Empty? : ").append(isEmpty()).append(LS); - sb.append(" Ordered? : ").append(isOrdered()).append(LS); - if (updateSketch) { - sb.append(" Resize Factor : ").append(rf).append(LS); - sb.append(" Array Size Entries : ").append(arrLongs).append(LS); - } - sb.append(" Retained Entries : ").append(curCount).append(LS); - sb.append(" Seed Hash : ").append(Integer.toHexString(seedHash)) - .append(" | ").append(seedHash).append(LS); - sb.append("### END SKETCH SUMMARY").append(LS); - - } - return sb.toString(); - } - - /** - * Returns a human readable string of the preamble of a byte array image of a Theta Sketch. - * @param byteArr the given byte array - * @return a human readable string of the preamble of a byte array image of a Theta Sketch. - */ - public static String toString(final byte[] byteArr) { - return PreambleUtil.preambleToString(byteArr); - } - - /** - * Returns a human readable string of the preamble of a MemorySegment image of a Theta Sketch. - * @param seg the given MemorySegment object - * @return a human readable string of the preamble of a MemorySegment image of a Theta Sketch. - */ - public static String toString(final MemorySegment seg) { - return PreambleUtil.preambleToString(seg); - } - - //Restricted methods - - /** - * Gets the internal cache array. For on-heap sketches this will return a reference to the actual - * cache array. For MemorySegment-based sketches this returns a copy. - * @return the internal cache array. - */ - abstract long[] getCache(); - - /** - * Gets preamble longs if stored in compact form. If this sketch is already in compact form, - * this is identical to the call {@link #getCurrentPreambleLongs()}. - * @return preamble longs if stored in compact form. - */ - abstract int getCompactPreambleLongs(); - - /** - * Gets the number of data longs if stored in current state. - * @return the number of data longs if stored in current state. - */ - abstract int getCurrentDataLongs(); - - /** - * Returns preamble longs if stored in current state. - * @return number of preamble longs if stored. - */ - abstract int getCurrentPreambleLongs(); - - /** - * Returns the backing MemorySegment object if it exists, otherwise null. - * This is overridden where relevant. - * @return the backing MemorySegment object if it exists, otherwise null. - */ - MemorySegment getMemorySegment() { return null; } - - /** - * Gets the 16-bit seed hash - * @return the seed hash - */ - abstract short getSeedHash(); - - /** - * Returns true if given Family id is one of the theta sketches - * @param id the given Family id - * @return true if given Family id is one of the theta sketches - */ - static final boolean isValidSketchID(final int id) { - return id == Family.ALPHA.getID() - || id == Family.QUICKSELECT.getID() - || id == Family.COMPACT.getID(); - } - - /** - * Checks Ordered and Compact flags for integrity between sketch and a MemorySegment - * @param sketch the given sketch - */ - static final void checkSketchAndMemorySegmentFlags(final Sketch sketch) { - final MemorySegment seg = sketch.getMemorySegment(); - if (seg == null) { return; } - final int flags = PreambleUtil.extractFlags(seg); - if ((flags & COMPACT_FLAG_MASK) > 0 ^ sketch.isCompact()) { - throw new SketchesArgumentException("Possible corruption: " - + "MemorySegment Compact Flag inconsistent with Sketch"); - } - if ((flags & ORDERED_FLAG_MASK) > 0 ^ sketch.isOrdered()) { - throw new SketchesArgumentException("Possible corruption: " - + "MemorySegment Ordered Flag inconsistent with Sketch"); - } - } - - static final double estimate(final long thetaLong, final int curCount) { - return curCount * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong); - } - - static final double lowerBound(final int curCount, final long thetaLong, final int numStdDev, - final boolean empty) { - final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; - return BinomialBoundsN.getLowerBound(curCount, theta, numStdDev, empty); - } - - static final double upperBound(final int curCount, final long thetaLong, final int numStdDev, - final boolean empty) { - final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; - return BinomialBoundsN.getUpperBound(curCount, theta, numStdDev, empty); - } - - private static final boolean estMode(final long thetaLong, final boolean empty) { - return thetaLong < Long.MAX_VALUE && !empty; - } - - /** - * Instantiates a Heap Update Sketch from MemorySegment. Only SerVer3. SerVer 1 & 2 already handled. - * @param srcSeg the source MemorySegment - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return a Sketch - */ - private static final Sketch heapifyUpdateFromMemorySegment(final MemorySegment srcSeg, final long expectedSeed) { - final long cap = srcSeg.byteSize(); - if (cap < 8) { - throw new SketchesArgumentException( - "Corrupted: valid sketch must be at least 8 bytes."); - } - final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); - final Family family = idToFamily(familyID); - - if (family == Family.ALPHA) { - final int flags = PreambleUtil.extractFlags(srcSeg); - final boolean compactFlag = (flags & COMPACT_FLAG_MASK) != 0; - if (compactFlag) { - throw new SketchesArgumentException( - "Corrupted: ALPHA family image: cannot be compact"); - } - return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); - } - if (family == Family.QUICKSELECT) { - return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); - } - throw new SketchesArgumentException( - "Sketch cannot heapify family: " + family + " as a Sketch"); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/Sketches.java b/src/main/java/org/apache/datasketches/theta2/Sketches.java deleted file mode 100644 index 0a3904511..000000000 --- a/src/main/java/org/apache/datasketches/theta2/Sketches.java +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -/** - * This class brings together the common sketch and set operation creation methods and - * the public static methods into one place. - * - * @author Lee Rhodes - */ -public final class Sketches { - - private Sketches() {} - - /** - * Gets the unique count estimate from a valid MemorySegment image of a Sketch - * @param srcSeg the source MemorySegment - * @return the sketch's best estimate of the cardinality of the input stream. - */ - public static double getEstimate(final MemorySegment srcSeg) { - checkIfValidThetaSketch(srcSeg); - return Sketch.estimate(getThetaLong(srcSeg), getRetainedEntries(srcSeg)); - } - - /** - * Gets the approximate lower error bound from a valid MemorySegment image of a Sketch - * given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @param srcSeg the source MemorySegment - * @return the lower bound. - */ - public static double getLowerBound(final int numStdDev, final MemorySegment srcSeg) { - return Sketch.lowerBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg)); - } - - /** - * Convenience method, ref: {@link SetOperation#getMaxAnotBResultBytes(int)}. - * Returns the maximum number of bytes for the returned CompactSketch, given the maximum - * value of nomEntries of the first sketch A of AnotB. - * @param maxNomEntries the given value - * @return the maximum number of bytes. - */ - public static int getMaxAnotBResultBytes(final int maxNomEntries) { - return SetOperation.getMaxAnotBResultBytes(maxNomEntries); - } - - /** - * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. - * @param numberOfEntries the actual number of retained entries stored in the sketch. - * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of retained entries. - */ - public static int getMaxCompactSketchBytes(final int numberOfEntries) { - return Sketch.getMaxCompactSketchBytes(numberOfEntries); - } - - /** - * Returns the maximum number of storage bytes required for a CompactSketch given the configured - * log_base2 of the number of nominal entries, which is a power of 2. - * @param lgNomEntries Nominal Entries - * @return the maximum number of storage bytes required for a CompactSketch with the given - * lgNomEntries. - * @see Sketch#getCompactSketchMaxBytes(int) - */ - public static int getCompactSketchMaxBytes(final int lgNomEntries) { - return Sketch.getCompactSketchMaxBytes(lgNomEntries); - } - - /** - * Convenience method, ref: {@link SetOperation#getMaxIntersectionBytes(int)} - * @param nomEntries Ref: {@link SetOperation#getMaxIntersectionBytes(int)}, {@code nomEntries} - * @return Ref: {@link SetOperation#getMaxIntersectionBytes(int)} - */ - public static int getMaxIntersectionBytes(final int nomEntries) { - return SetOperation.getMaxIntersectionBytes(nomEntries); - } - - /** - * Convenience method, ref: {@link SetOperation#getMaxUnionBytes(int)} - * @param nomEntries Ref: {@link SetOperation#getMaxUnionBytes(int)}, {@code nomEntries} - * @return Ref: {@link SetOperation#getMaxUnionBytes(int)} - */ - public static int getMaxUnionBytes(final int nomEntries) { - return SetOperation.getMaxUnionBytes(nomEntries); - } - - /** - * Convenience method, ref: {@link Sketch#getMaxUpdateSketchBytes(int)} - * @param nomEntries Ref: {@link Sketch#getMaxUpdateSketchBytes(int)}, {@code nomEntries} - * @return Ref: {@link Sketch#getMaxUpdateSketchBytes(int)} - */ - public static int getMaxUpdateSketchBytes(final int nomEntries) { - return Sketch.getMaxUpdateSketchBytes(nomEntries); - } - - /** - * Convenience method, ref: {@link Sketch#getSerializationVersion(MemorySegment)} - * @param srcSeg Ref: {@link Sketch#getSerializationVersion(MemorySegment)}, {@code srcSeg} - * @return Ref: {@link Sketch#getSerializationVersion(MemorySegment)} - */ - public static int getSerializationVersion(final MemorySegment srcSeg) { - return Sketch.getSerializationVersion(srcSeg); - } - - /** - * Gets the approximate upper error bound from a valid MemorySegment image of a Sketch - * given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @param srcSeg the source MemorySegment - * @return the upper bound. - */ - public static double getUpperBound(final int numStdDev, final MemorySegment srcSeg) { - return Sketch.upperBound(getRetainedEntries(srcSeg), getThetaLong(srcSeg), numStdDev, getEmpty(srcSeg)); - } - - //Heapify Operations - - /** - * Convenience method, ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)} - * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment) CompactSketch.heapify(MemorySegment)}, {@code srcSeg} - * @return {@link CompactSketch CompactSketch} - */ - public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg) { - return CompactSketch.heapify(srcSeg); - } - - /** - * Convenience method, ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)} - * @param srcSeg Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, {@code srcSeg} - * @param expectedSeed Ref: {@link CompactSketch#heapify(MemorySegment, long) CompactSketch.heapify(MemorySegment, long)}, - * {@code expectedSeed} - * @return {@link CompactSketch CompactSketch} - */ - public static CompactSketch heapifyCompactSketch(final MemorySegment srcSeg, final long expectedSeed) { - return CompactSketch.heapify(srcSeg, expectedSeed); - } - - /** - * Convenience method, ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)} - * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment) CompactSketch.wrap(MemorySegment)}, {@code srcSeg} - * @return {@link CompactSketch CompactSketch} - */ - public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg) { - return CompactSketch.wrap(srcSeg); - } - - /** - * Convenience method, ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)} - * @param srcSeg Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, {@code srcSeg} - * @param expectedSeed Ref: {@link CompactSketch#wrap(MemorySegment, long) CompactSketch.wrap(MemorySegment, long)}, - * {@code expectedSeed} - * @return {@link CompactSketch CompactSketch} - */ - public static CompactSketch wrapCompactSketch(final MemorySegment srcSeg, final long expectedSeed) { - return CompactSketch.wrap(srcSeg, expectedSeed); - } - - /** - * Convenience method, ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)} - * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment) SetOperation.heapify(MemorySegment)}, {@code srcSeg} - * @return {@link SetOperation SetOperation} - */ - public static SetOperation heapifySetOperation(final MemorySegment srcSeg) { - return SetOperation.heapify(srcSeg); - } - - /** - * Convenience method, ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)} - * @param srcSeg Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, - * {@code srcSeg} - * @param expectedSeed the seed used to validate the given MemorySegment image. - * Ref: {@link SetOperation#heapify(MemorySegment, long) SetOperation.heapify(MemorySegment, long)}, - * {@code expectedSeed} - * @return {@link SetOperation SetOperation} - */ - public static SetOperation heapifySetOperation(final MemorySegment srcSeg, final long expectedSeed) { - return SetOperation.heapify(srcSeg, expectedSeed); - } - - /** - * Convenience method, ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)} - * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment) Sketch.heapify(MemorySegment)}, {@code srcSeg} - * @return {@link Sketch Sketch} - */ - public static Sketch heapifySketch(final MemorySegment srcSeg) { - return Sketch.heapify(srcSeg); - } - - /** - * Convenience method, ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)} - * @param srcSeg Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code srcSeg} - * @param expectedSeed the seed used to validate the given MemorySegment image. - * Ref: {@link Sketch#heapify(MemorySegment, long) Sketch.heapify(MemorySegment, long)}, {@code expectedSeed} - * @return {@link Sketch Sketch} - */ - public static Sketch heapifySketch(final MemorySegment srcSeg, final long expectedSeed) { - return Sketch.heapify(srcSeg, expectedSeed); - } - - /** - * Convenience method, ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)} - * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment) UpdateSketch.heapify(MemorySegment)}, {@code srcSeg} - * @return {@link UpdateSketch UpdateSketch} - */ - public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg) { - return UpdateSketch.heapify(srcSeg); - } - - /** - * Convenience method, ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)} - * @param srcSeg Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, - * {@code srcSeg} - * @param expectedSeed the seed used to validate the given MemorySegment image. - * Ref: {@link UpdateSketch#heapify(MemorySegment, long) UpdateSketch.heapify(MemorySegment, long)}, - * {@code expectedSeed} - * @return {@link UpdateSketch UpdateSketch} - */ - public static UpdateSketch heapifyUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) { - return UpdateSketch.heapify(srcSeg, expectedSeed); - } - - //Builders - - /** - * Ref: {@link SetOperationBuilder SetOperationBuilder} - * @return {@link SetOperationBuilder SetOperationBuilder} - */ - public static SetOperationBuilder setOperationBuilder() { - return new SetOperationBuilder(); - } - - /** - * Ref: {@link UpdateSketchBuilder UpdateSketchBuilder} - * @return {@link UpdateSketchBuilder UpdateSketchBuilder} - */ - public static UpdateSketchBuilder updateSketchBuilder() { - return new UpdateSketchBuilder(); - } - - //Wrap operations - - /** - * Convenience method, ref: {@link SetOperation#wrap(MemorySegment)} - * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} - * @return a Intersection backed by the given MemorySegment - */ - public static Intersection wrapIntersection(final MemorySegment srcSeg) { - return (Intersection) SetOperation.wrap(srcSeg); - } - - /** - * Convenience method, ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)} - * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment) SetOperation.wrap(MemorySegment)}, {@code srcSeg} - * @return {@link SetOperation SetOperation} - */ - public static SetOperation wrapSetOperation(final MemorySegment srcSeg) { - return wrapSetOperation(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Convenience method, ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)} - * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code srcSeg} - * @param expectedSeed the seed used to validate the given MemorySegment image. - * Ref: {@link SetOperation#wrap(MemorySegment, long) SetOperation.wrap(MemorySegment, long)}, {@code expectedSeed} - * @return {@link SetOperation SetOperation} - */ - public static SetOperation wrapSetOperation(final MemorySegment srcSeg, final long expectedSeed) { - return SetOperation.wrap(srcSeg, expectedSeed); - } - - /** - * Convenience method, ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)} - * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment) Sketch.wrap(MemorySegment)}, {@code srcSeg} - * @return {@link Sketch Sketch} - */ - public static Sketch wrapSketch(final MemorySegment srcSeg) { - return Sketch.wrap(srcSeg); - } - - /** - * Convenience method, ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)} - * @param srcSeg Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code srcSeg} - * @param expectedSeed the expectedSeed used to validate the given MemorySegment image. - * Ref: {@link Sketch#wrap(MemorySegment, long) Sketch.wrap(MemorySegment, long)}, {@code expectedSeed} - * @return {@link Sketch Sketch} - */ - public static Sketch wrapSketch(final MemorySegment srcSeg, final long expectedSeed) { - return Sketch.wrap(srcSeg, expectedSeed); - } - - /** - * Convenience method, ref: {@link SetOperation#wrap(MemorySegment)} and casts the result to a Union - * @param srcSeg Ref: {@link SetOperation#wrap(MemorySegment)}, {@code srcSeg} - * @return a Union backed by the given MemorySegment. - */ - public static Union wrapUnion(final MemorySegment srcSeg) { - return (Union) SetOperation.wrap(srcSeg); - } - - /** - * Convenience method, ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)} - * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment) UpdateSketch.wrap(MemorySegment)}, {@code srcSeg} - * @return {@link UpdateSketch UpdateSketch} - */ - public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg) { - return wrapUpdateSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Convenience method, ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)} - * @param srcSeg Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code srcSeg} - * @param expectedSeed the seed used to validate the given MemorySegment image. - * Ref: {@link UpdateSketch#wrap(MemorySegment, long) UpdateSketch.wrap(MemorySegment, long)}, {@code expectedSeed} - * @return {@link UpdateSketch UpdateSketch} - */ - public static UpdateSketch wrapUpdateSketch(final MemorySegment srcSeg, final long expectedSeed) { - return UpdateSketch.wrap(srcSeg, expectedSeed); - } - - //Restricted static methods - - static void checkIfValidThetaSketch(final MemorySegment srcSeg) { - final int fam = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); - if (!Sketch.isValidSketchID(fam)) { - throw new SketchesArgumentException("Source MemorySegment not a valid Sketch. Family: " - + Family.idToFamily(fam).toString()); - } - } - - static boolean getEmpty(final MemorySegment srcSeg) { - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); - if (serVer == 1) { - return ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (getRetainedEntries(srcSeg) == 0)); - } - return (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 - } - - static int getPreambleLongs(final MemorySegment srcSeg) { - return srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //for SerVer 1,2,3 - } - - static int getRetainedEntries(final MemorySegment srcSeg) { - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); - if (serVer == 1) { - final int entries = srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); - if ((getThetaLong(srcSeg) == Long.MAX_VALUE) && (entries == 0)) { - return 0; - } - return entries; - } - //SerVer 2 or 3 - final int preLongs = getPreambleLongs(srcSeg); - final boolean empty = (srcSeg.get(JAVA_BYTE, FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0; //for SerVer 2 & 3 - if (preLongs == 1) { - return empty ? 0 : 1; - } - //preLongs > 1 - return srcSeg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); //for SerVer 1,2,3 - } - - static long getThetaLong(final MemorySegment srcSeg) { - final int preLongs = getPreambleLongs(srcSeg); - return (preLongs < 3) ? Long.MAX_VALUE : srcSeg.get(JAVA_LONG_UNALIGNED, THETA_LONG); //for SerVer 1,2,3 - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/Union.java b/src/main/java/org/apache/datasketches/theta2/Union.java deleted file mode 100644 index 5c0261782..000000000 --- a/src/main/java/org/apache/datasketches/theta2/Union.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -/** - * Compute the union of two or more theta sketches. - * A new instance represents an empty set. - * - * @author Lee Rhodes - */ -public abstract class Union extends SetOperation { - - /** - * Wrap a Union object around a Union MemorySegment object containing data. - * This method assumes the Default Update Seed. - * This does NO validity checking of the given MemorySegment. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg The source MemorySegment object. - * @return this class - */ - public static Union fastWrap(final MemorySegment srcSeg) { - return fastWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap a Union object around a Union MemorySegment object containing data. - * This does NO validity checking of the given MemorySegment. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg The source MemorySegment object. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See seed - * @return this class - */ - public static Union fastWrap(final MemorySegment srcSeg, final long expectedSeed) { - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); - if (serVer != 3) { - throw new SketchesArgumentException("SerVer must be 3: " + serVer); - } - return UnionImpl.fastWrapInstance(srcSeg, expectedSeed); - } - - /** - * Wrap a Union object around a Union MemorySegment object containing data. - * This method assumes the Default Update Seed. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg The source MemorySegment object. - * @return this class - */ - public static Union wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap a Union object around a Union MemorySegment object containing data. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg The source MemorySegment object. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See seed - * @return this class - */ - public static Union wrap(final MemorySegment srcSeg, final long expectedSeed) { - final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); - if (serVer != 3) { - throw new SketchesArgumentException("SerVer must be 3: " + serVer); - } - return UnionImpl.wrapInstance(srcSeg, expectedSeed); - } - - /** - * Returns the number of storage bytes required for this union in its current state. - * - * @return the number of storage bytes required for this union in its current state. - */ - public abstract int getCurrentBytes(); - - @Override - public Family getFamily() { - return Family.UNION; - } - - /** - * Returns the maximum required storage bytes for this union. - * @return the maximum required storage bytes for this union. - */ - public abstract int getMaxUnionBytes(); - - /** - * Gets the result of this operation as an ordered CompactSketch on the Java heap. - * This does not disturb the underlying data structure of the union. - * Therefore, it is OK to continue updating the union after this operation. - * @return the result of this operation as an ordered CompactSketch on the Java heap - */ - public abstract CompactSketch getResult(); - - /** - * Gets the result of this operation as a CompactSketch of the chosen form. - * This does not disturb the underlying data structure of the union. - * Therefore, it is OK to continue updating the union after this operation. - * - * @param dstOrdered - * See Destination Ordered - * - * @param dstSeg destination MemorySegment - * - * @return the result of this operation as a CompactSketch of the chosen form - */ - public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); - - /** - * Resets this Union. The seed remains intact, everything else reverts back to its virgin state. - */ - public abstract void reset(); - - /** - * Returns a byte array image of this Union object - * @return a byte array image of this Union object - */ - public abstract byte[] toByteArray(); - - /** - * This implements a stateless, pair-wise union operation. The returned sketch will be cut back to - * the smaller of the two k values if required. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param sketchA The first argument - * @param sketchB The second argument - * @return the result ordered CompactSketch on the heap. - */ - public CompactSketch union(final Sketch sketchA, final Sketch sketchB) { - return union(sketchA, sketchB, true, null); - } - - /** - * This implements a stateless, pair-wise union operation. The returned sketch will be cut back to - * k if required, similar to the regular Union operation. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param sketchA The first argument - * @param sketchB The second argument - * @param dstOrdered If true, the returned CompactSketch will be ordered. - * @param dstSeg If not null, the returned CompactSketch will be placed in this MemorySegment. - * @return the result CompactSketch. - */ - public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstOrdered, - MemorySegment dstSeg); - - /** - * Perform a Union operation with this union and the given on-heap sketch of the Theta Family. - * This method is not valid for the older SetSketch, which was prior to Open Source (August, 2015). - * - *

              This method can be repeatedly called. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param sketchIn The incoming sketch. - */ - public abstract void union(Sketch sketchIn); - - /** - * Perform a Union operation with this union and the given MemorySegment image of any sketch of the - * Theta Family. The input image may be from earlier versions of the Theta Compact Sketch, - * called the SetSketch (circa 2014), which was prior to Open Source and are compact and ordered. - * - *

              This method can be repeatedly called. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param seg MemorySegment image of sketch to be merged - */ - public abstract void union(MemorySegment seg); - - /** - * Update this union with the given long data item. - * - * @param datum The given long datum. - */ - public abstract void update(long datum); - - /** - * Update this union with the given double (or float) data item. - * The double will be converted to a long using Double.doubleToLongBits(datum), - * which normalizes all NaN values to a single NaN representation. - * Plus and minus zero will be normalized to plus zero. - * Each of the special floating-point values NaN and +/- Infinity are treated as distinct. - * - * @param datum The given double datum. - */ - public abstract void update(double datum); - - /** - * Update this union with the with the given String data item. - * The string is converted to a byte array using UTF8 encoding. - * If the string is null or empty no update attempt is made and the method returns. - * - *

              Note: this will not produce the same output hash values as the {@link #update(char[])} - * method and will generally be a little slower depending on the complexity of the UTF8 encoding. - *

              - * - *

              Note: this is not a Sketch Union operation. This treats the given string as a data item.

              - * - * @param datum The given String. - */ - public abstract void update(String datum); - - /** - * Update this union with the given byte array item. - * If the byte array is null or empty no update attempt is made and the method returns. - * - *

              Note: this is not a Sketch Union operation. This treats the given byte array as a data - * item.

              - * - * @param data The given byte array. - */ - public abstract void update(byte[] data); - - /** - * Update this union with the given ByteBuffer item. - * If the ByteBuffer is null or empty no update attempt is made and the method returns. - * - *

              Note: this is not a Sketch Union operation. This treats the given ByteBuffer as a data - * item.

              - * - * @param data The given ByteBuffer. - */ - public abstract void update(ByteBuffer data); - - /** - * Update this union with the given integer array item. - * If the integer array is null or empty no update attempt is made and the method returns. - * - *

              Note: this is not a Sketch Union operation. This treats the given integer array as a data - * item.

              - * - * @param data The given int array. - */ - public abstract void update(int[] data); - - /** - * Update this union with the given char array item. - * If the char array is null or empty no update attempt is made and the method returns. - * - *

              Note: this will not produce the same output hash values as the {@link #update(String)} - * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

              - * - *

              Note: this is not a Sketch Union operation. This treats the given char array as a data - * item.

              - * - * @param data The given char array. - */ - public abstract void update(char[] data); - - /** - * Update this union with the given long array item. - * If the long array is null or empty no update attempt is made and the method returns. - * - *

              Note: this is not a Sketch Union operation. This treats the given char array as a data - * item.

              - * - * @param data The given long array. - */ - public abstract void update(long[] data); - -} diff --git a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java b/src/main/java/org/apache/datasketches/theta2/UnionImpl.java deleted file mode 100644 index 1f8f19180..000000000 --- a/src/main/java/org/apache/datasketches/theta2/UnionImpl.java +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.Math.min; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; -import static org.apache.datasketches.theta2.PreambleUtil.UNION_THETA_LONG; -import static org.apache.datasketches.theta2.PreambleUtil.clearEmpty; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractUnionThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.insertUnionThetaLong; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.util.Objects; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.HashOperations; - -/** - * Shared code for the HeapUnion and DirectUnion implementations. - * - * @author Lee Rhodes - * @author Kevin Lang - */ -final class UnionImpl extends Union { - - /** - * Although the gadget object is initially an UpdateSketch, in the context of a Union it is used - * as a specialized buffer that happens to leverage much of the machinery of an UpdateSketch. - * However, in this context some of the key invariants of the sketch algorithm are intentionally - * violated as an optimization. As a result this object can not be considered as an UpdateSketch - * and should never be exported as an UpdateSketch. It's internal state is not necessarily - * finalized and may contain garbage. Also its internal concept of "nominal entries" or "k" can - * be meaningless. It is private for very good reasons. - */ - private final UpdateSketch gadget_; - private final short expectedSeedHash_; //eliminates having to compute the seedHash on every union. - private long unionThetaLong_; //when on-heap, this is the only copy - private boolean unionEmpty_; //when on-heap, this is the only copy - - private UnionImpl(final UpdateSketch gadget, final long seed) { - gadget_ = gadget; - expectedSeedHash_ = Util.computeSeedHash(seed); - } - - /** - * Construct a new Union SetOperation on the java heap. - * Called by SetOperationBuilder. - * - * @param lgNomLongs See lgNomLongs - * @param seed See seed - * @param p See Sampling Probability, p - * @param rf See Resize Factor - * @return instance of this sketch - */ - static UnionImpl initNewHeapInstance( - final int lgNomLongs, - final long seed, - final float p, - final ResizeFactor rf) { - final UpdateSketch gadget = //create with UNION family - new HeapQuickSelectSketch(lgNomLongs, seed, p, rf, true); - final UnionImpl unionImpl = new UnionImpl(gadget, seed); - unionImpl.unionThetaLong_ = gadget.getThetaLong(); - unionImpl.unionEmpty_ = gadget.isEmpty(); - return unionImpl; - } - - /** - * Construct a new Direct Union in the destination MemorySegment. - * Called by SetOperationBuilder. - * - * @param lgNomLongs See lgNomLongs. - * @param seed See seed - * @param p See Sampling Probability, p - * @param rf See Resize Factor - * @param dstSeg the given MemorySegment object destination. It will be cleared prior to use. - * @return this class - */ - static UnionImpl initNewDirectInstance( - final int lgNomLongs, - final long seed, - final float p, - final ResizeFactor rf, - final MemorySegment dstSeg) { - final UpdateSketch gadget = //create with UNION family - new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, dstSeg, true); - final UnionImpl unionImpl = new UnionImpl(gadget, seed); - unionImpl.unionThetaLong_ = gadget.getThetaLong(); - unionImpl.unionEmpty_ = gadget.isEmpty(); - return unionImpl; - } - - /** - * Heapify a Union from a MemorySegment Union object containing data. - * Called by SetOperation. - * @param srcSeg The source MemorySegment Union object. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See seed - * @return this class - */ - static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { - final MemorySegment srcSegRO = srcSeg.asReadOnly(); - Family.UNION.checkFamilyID(extractFamilyID(srcSegRO)); - final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSegRO, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSegRO); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSegRO); - return unionImpl; - } - - /** - * Fast-wrap a Union object around a Union MemorySegment object containing data. - * This does NO validity checking of the given MemorySegment. - * @param srcSeg The source MemorySegment object. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See seed - * @return this class - */ - static UnionImpl fastWrapInstance(final MemorySegment srcSeg, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); - final UpdateSketch gadget = srcSeg.isReadOnly() - ? DirectQuickSelectSketchR.fastReadOnlyWrap(srcSeg, expectedSeed) - : DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); - return unionImpl; - } - - /** - * Wrap a Union object around a Union MemorySegment object containing data. - * Called by SetOperation. - * @param srcSeg The source MemorySegment object. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See seed - * @return this class - */ - static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); - final UpdateSketch gadget = srcSeg.isReadOnly() - ? DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed) - : DirectQuickSelectSketch.writableWrap(srcSeg, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); - return unionImpl; - } - - @Override - public int getCurrentBytes() { - return gadget_.getCurrentBytes(); - } - - @Override - public int getMaxUnionBytes() { - final int lgK = gadget_.getLgNomLongs(); - return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3); - } - - @Override - MemorySegment getMemorySegment() { - return hasMemorySegment() ? gadget_.getMemorySegment() : null; - } - - @Override - public CompactSketch getResult() { - return getResult(true, null); - } - - @Override - public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { - final int gadgetCurCount = gadget_.getRetainedEntries(true); - final int k = 1 << gadget_.getLgNomLongs(); - final long[] gadgetCacheCopy = - gadget_.hasMemorySegment() ? gadget_.getCache() : gadget_.getCache().clone(); - - //Pull back to k - final long curGadgetThetaLong = gadget_.getThetaLong(); - final long adjGadgetThetaLong = gadgetCurCount > k - ? selectExcludingZeros(gadgetCacheCopy, gadgetCurCount, k + 1) : curGadgetThetaLong; - - //Finalize Theta and curCount - final long unionThetaLong = gadget_.hasMemorySegment() - ? gadget_.getMemorySegment().get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG) - : unionThetaLong_; - - final long minThetaLong = min(min(curGadgetThetaLong, adjGadgetThetaLong), unionThetaLong); - final int curCountOut = minThetaLong < curGadgetThetaLong - ? HashOperations.count(gadgetCacheCopy, minThetaLong) - : gadgetCurCount; - - //Compact the cache - final long[] compactCacheOut = - CompactOperations.compactCache(gadgetCacheCopy, curCountOut, minThetaLong, dstOrdered); - final boolean empty = gadget_.isEmpty() && unionEmpty_; - final short seedHash = gadget_.getSeedHash(); - return CompactOperations.componentsToCompact( - minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstSeg, compactCacheOut); - } - - @Override - public boolean hasMemorySegment() { - return gadget_.hasMemorySegment(); - } - - @Override - public boolean isDirect() { - return gadget_.isDirect(); - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return gadget_.isSameResource(that); - } - - @Override - public void reset() { - gadget_.reset(); - unionThetaLong_ = gadget_.getThetaLong(); - unionEmpty_ = gadget_.isEmpty(); - } - - @Override - public byte[] toByteArray() { - final byte[] gadgetByteArr = gadget_.toByteArray(); - final MemorySegment seg = MemorySegment.ofArray(gadgetByteArr); - insertUnionThetaLong(seg, unionThetaLong_); - if (gadget_.isEmpty() != unionEmpty_) { - clearEmpty(seg); - unionEmpty_ = false; - } - return gadgetByteArr; - } - - @Override //Stateless Union - public CompactSketch union(final Sketch sketchA, final Sketch sketchB, final boolean dstOrdered, - final MemorySegment dstSeg) { - reset(); - union(sketchA); - union(sketchB); - final CompactSketch csk = getResult(dstOrdered, dstSeg); - reset(); - return csk; - } - - @Override - public void union(final Sketch sketchIn) { - //UNION Empty Rule: AND the empty states. - - if (sketchIn == null || sketchIn.isEmpty()) { - //null and empty is interpreted as (Theta = 1.0, count = 0, empty = T). Nothing changes - return; - } - //sketchIn is valid and not empty - Util.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); - if (sketchIn instanceof SingleItemSketch) { - gadget_.hashUpdate(sketchIn.getCache()[0]); - return; - } - Sketch.checkSketchAndMemorySegmentFlags(sketchIn); - - unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule - unionEmpty_ = false; - final boolean isOrdered = sketchIn.isOrdered(); - final HashIterator it = sketchIn.iterator(); - while (it.next()) { - final long hash = it.get(); - if (hash < unionThetaLong_ && hash < gadget_.getThetaLong()) { - gadget_.hashUpdate(hash); // backdoor update, hash function is bypassed - } else { - if (isOrdered) { break; } - } - } - unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //Theta rule with gadget - if (gadget_.hasMemorySegment()) { - final MemorySegment wseg = gadget_.getMemorySegment(); - PreambleUtil.insertUnionThetaLong(wseg, unionThetaLong_); - PreambleUtil.clearEmpty(wseg); - } - } - - @Override - public void union(final MemorySegment seg) { - Objects.requireNonNull(seg, "MemorySegment must be non-null"); - union(Sketch.wrap(seg.asReadOnly())); - } - - @Override - public void update(final long datum) { - gadget_.update(datum); - } - - @Override - public void update(final double datum) { - gadget_.update(datum); - } - - @Override - public void update(final String datum) { - gadget_.update(datum); - } - - @Override - public void update(final byte[] data) { - gadget_.update(data); - } - - @Override - public void update(final ByteBuffer data) { - gadget_.update(data); - } - - @Override - public void update(final char[] data) { - gadget_.update(data); - } - - @Override - public void update(final int[] data) { - gadget_.update(data); - } - - @Override - public void update(final long[] data) { - gadget_.update(data); - } - - //Restricted - - @Override - long[] getCache() { - return gadget_.getCache(); - } - - @Override - int getRetainedEntries() { - return gadget_.getRetainedEntries(true); - } - - @Override - short getSeedHash() { - return gadget_.getSeedHash(); - } - - @Override - long getThetaLong() { - return min(unionThetaLong_, gadget_.getThetaLong()); - } - - @Override - boolean isEmpty() { - return gadget_.isEmpty() && unionEmpty_; - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java b/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java deleted file mode 100644 index 6c12ca7c2..000000000 --- a/src/main/java/org/apache/datasketches/theta2/UpdateReturnState.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -/** - * See Update Return State - * - * @author Lee Rhodes - */ -public enum UpdateReturnState { - - /** - * The hash was accepted into the sketch and the retained count was incremented. - */ - InsertedCountIncremented, //all UpdateSketches - - /** - * The hash was accepted into the sketch, the retained count was incremented. - * The current cache was out of room and resized larger based on the Resize Factor. - */ - InsertedCountIncrementedResized, //used by HeapQuickSelectSketch - - /** - * The hash was accepted into the sketch, the retained count was incremented. - * The current cache was out of room and at maximum size, so the cache was rebuilt. - */ - InsertedCountIncrementedRebuilt, //used by HeapQuickSelectSketch - - /** - * The hash was accepted into the sketch and the retained count was not incremented. - */ - InsertedCountNotIncremented, //used by enhancedHashInsert for Alpha - - /** - * The hash was inserted into the local concurrent buffer, - * but has not yet been propagated to the concurrent shared sketch. - */ - ConcurrentBufferInserted, //used by ConcurrentHeapThetaBuffer - - /** - * The hash has been propagated to the concurrent shared sketch. - * This does not reflect the action taken by the shared sketch. - */ - ConcurrentPropagated, //used by ConcurrentHeapThetaBuffer - - /** - * The hash was rejected as a duplicate. - */ - RejectedDuplicate, //all UpdateSketches hashUpdate(), enhancedHashInsert - - /** - * The hash was rejected because it was null or empty. - */ - RejectedNullOrEmpty, //UpdateSketch.update(arr[]) - - /** - * The hash was rejected because the value was negative, zero or - * greater than theta. - */ - RejectedOverTheta; //all UpdateSketches.hashUpdate() - -} diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java deleted file mode 100644 index e58a80bf5..000000000 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketch.java +++ /dev/null @@ -1,485 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.common.Util.checkBounds; -import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.apache.datasketches.theta2.CompactOperations.componentsToCompact; -import static org.apache.datasketches.theta2.PreambleUtil.BIG_ENDIAN_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.checkSegmentSeedHash; -import static org.apache.datasketches.theta2.PreambleUtil.extractFamilyID; -import static org.apache.datasketches.theta2.PreambleUtil.extractFlags; -import static org.apache.datasketches.theta2.PreambleUtil.extractLgResizeFactor; -import static org.apache.datasketches.theta2.PreambleUtil.extractP; -import static org.apache.datasketches.theta2.PreambleUtil.extractSerVer; -import static org.apache.datasketches.theta2.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta2.PreambleUtil.getSegBytes; -import static org.apache.datasketches.theta2.UpdateReturnState.RejectedNullOrEmpty; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.util.Objects; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * The parent class for the Update Sketch families, such as QuickSelect and Alpha. - * The primary task of an Update Sketch is to consider datums presented via the update() methods - * for inclusion in its internal cache. This is the sketch building process. - * - * @author Lee Rhodes - */ -public abstract class UpdateSketch extends Sketch { - - UpdateSketch() {} - - /** - * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto - * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as writable, direct objects can be wrapped. This method assumes the - * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. - * Default Update Seed. - * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash. - * It must have a size of at least 24 bytes. - * @return an UpdateSketch backed by the given MemorySegment - */ - public static UpdateSketch wrap(final MemorySegment srcWSeg) { - return wrap(srcWSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap takes the sketch image in MemorySegment and refers to it directly. There is no data copying onto - * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as writable direct objects can be wrapped. - * An attempt to "wrap" earlier version sketches will result in a "heapified", normal - * Java Heap version of the sketch where all data will be copied to the heap. - * @param srcWSeg an image of a writable sketch where the image seed hash matches the given seed hash. - * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * Compact sketches store a 16-bit hash of the seed, but not the seed itself. - * @return a UpdateSketch backed by the given MemorySegment - */ - public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expectedSeed) { - Objects.requireNonNull(srcWSeg, "Source MemorySeg e t must not be null"); - checkBounds(0, 24, srcWSeg.byteSize()); //need min 24 bytes - final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; - final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; - final Family family = Family.idToFamily(familyID); - if (family != Family.QUICKSELECT) { - throw new SketchesArgumentException( - "A " + family + " sketch cannot be wrapped as an UpdateSketch."); - } - if ((serVer == 3) && (preLongs == 3)) { - return DirectQuickSelectSketch.writableWrap(srcWSeg, expectedSeed); - } else { - throw new SketchesArgumentException( - "Corrupted: An UpdateSketch image must have SerVer = 3 and preLongs = 3"); - } - } - - /** - * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the - * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. - * @param srcSeg the given MemorySegment with a sketch image. - * It must have a size of at least 24 bytes. - * @return an UpdateSketch - */ - public static UpdateSketch heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Instantiates an on-heap UpdateSketch from a MemorySegment. - * @param srcSeg the given MemorySegment. - * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given MemorySegment image. - * See Update Hash Seed. - * @return an UpdateSketch - */ - public static UpdateSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { - Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); - checkBounds(0, 24, srcSeg.byteSize()); //need min 24 bytes - final Family family = Family.idToFamily(srcSeg.get(JAVA_BYTE, FAMILY_BYTE)); - if (family.equals(Family.ALPHA)) { - return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); - } - return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); - } - - //Sketch interface - - @Override - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { - return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), - false, false, dstOrdered, dstWSeg, getCache()); - } - - @Override - public int getCompactBytes() { - final int preLongs = getCompactPreambleLongs(); - final int dataLongs = getRetainedEntries(true); - return (preLongs + dataLongs) << 3; - } - - @Override - int getCurrentDataLongs() { - return 1 << getLgArrLongs(); - } - - @Override - public boolean hasMemorySegment() { - return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).hasMemorySegment()); - } - - @Override - public boolean isCompact() { - return false; - } - - @Override - public boolean isDirect() { - return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isDirect()); - } - - @Override - public boolean isOrdered() { - return false; - } - - @Override - public boolean isSameResource(final MemorySegment that) { - return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isSameResource(that)); - } - - //UpdateSketch interface - - /** - * Returns a new builder - * @return a new builder - */ - public static final UpdateSketchBuilder builder() { - return new UpdateSketchBuilder(); - } - - /** - * Returns the configured ResizeFactor - * @return the configured ResizeFactor - */ - public abstract ResizeFactor getResizeFactor(); - - /** - * Gets the configured sampling probability, p. - * See Sampling Probability, p - * @return the sampling probability, p - */ - abstract float getP(); - - /** - * Gets the configured seed - * @return the configured seed - */ - abstract long getSeed(); - - /** - * Resets this sketch back to a virgin empty state. - */ - public abstract void reset(); - - /** - * Rebuilds the hash table to remove dirty values or to reduce the size - * to nominal entries. - * @return this sketch - */ - public abstract UpdateSketch rebuild(); - - /** - * Present this sketch with a long. - * - * @param datum The given long datum. - * @return - * See Update Return State - */ - public UpdateReturnState update(final long datum) { - final long[] data = { datum }; - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given double (or float) datum. - * The double will be converted to a long using Double.doubleToLongBits(datum), - * which normalizes all NaN values to a single NaN representation. - * Plus and minus zero will be normalized to plus zero. - * The special floating-point values NaN and +/- Infinity are treated as distinct. - * - * @param datum The given double datum. - * @return - * See Update Return State - */ - public UpdateReturnState update(final double datum) { - final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0 - final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN & +/- infinity forms - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given String. - * The string is converted to a byte array using UTF8 encoding. - * If the string is null or empty no update attempt is made and the method returns. - * - *

              Note: this will not produce the same output hash values as the {@link #update(char[])} - * method and will generally be a little slower depending on the complexity of the UTF8 encoding. - *

              - * - * @param datum The given String. - * @return - * See Update Return State - */ - public UpdateReturnState update(final String datum) { - if ((datum == null) || datum.isEmpty()) { - return RejectedNullOrEmpty; - } - final byte[] data = datum.getBytes(UTF_8); - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given byte array. - * If the byte array is null or empty no update attempt is made and the method returns. - * - * @param data The given byte array. - * @return - * See Update Return State - */ - public UpdateReturnState update(final byte[] data) { - if ((data == null) || (data.length == 0)) { - return RejectedNullOrEmpty; - } - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given ByteBuffer - * If the ByteBuffer is null or empty, no update attempt is made and the method returns. - * - * @param buffer the input ByteBuffer - * @return - * See Update Return State - */ - public UpdateReturnState update(final ByteBuffer buffer) { - if (buffer == null || buffer.hasRemaining() == false) { - return RejectedNullOrEmpty; - } - return hashUpdate(hash(buffer, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given char array. - * If the char array is null or empty no update attempt is made and the method returns. - * - *

              Note: this will not produce the same output hash values as the {@link #update(String)} - * method but will be a little faster as it avoids the complexity of the UTF8 encoding.

              - * - * @param data The given char array. - * @return - * See Update Return State - */ - public UpdateReturnState update(final char[] data) { - if ((data == null) || (data.length == 0)) { - return RejectedNullOrEmpty; - } - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given integer array. - * If the integer array is null or empty no update attempt is made and the method returns. - * - * @param data The given int array. - * @return - * See Update Return State - */ - public UpdateReturnState update(final int[] data) { - if ((data == null) || (data.length == 0)) { - return RejectedNullOrEmpty; - } - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - /** - * Present this sketch with the given long array. - * If the long array is null or empty no update attempt is made and the method returns. - * - * @param data The given long array. - * @return - * See Update Return State - */ - public UpdateReturnState update(final long[] data) { - if ((data == null) || (data.length == 0)) { - return RejectedNullOrEmpty; - } - return hashUpdate(hash(data, getSeed())[0] >>> 1); - } - - //restricted methods - - /** - * All potential updates converge here. - * - *

              Don't ever call this unless you really know what you are doing!

              - * - * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. - * A negative hash value will throw an exception. - * @return See Update Return State - */ - abstract UpdateReturnState hashUpdate(long hash); - - /** - * Gets the Log base 2 of the current size of the internal cache - * @return the Log base 2 of the current size of the internal cache - */ - abstract int getLgArrLongs(); - - /** - * Gets the Log base 2 of the configured nominal entries - * @return the Log base 2 of the configured nominal entries - */ - public abstract int getLgNomLongs(); - - /** - * Returns true if the internal cache contains "dirty" values that are greater than or equal - * to thetaLong. - * @return true if the internal cache is dirty. - */ - abstract boolean isDirty(); - - /** - * Returns true if numEntries (curCount) is greater than the hashTableThreshold. - * @param numEntries the given number of entries (or current count). - * @return true if numEntries (curCount) is greater than the hashTableThreshold. - */ - abstract boolean isOutOfSpace(int numEntries); - - static void checkUnionQuickSelectFamily(final MemorySegment seg, final int preambleLongs, - final int lgNomLongs) { - //Check Family - final int familyID = extractFamilyID(seg); //byte 2 - final Family family = Family.idToFamily(familyID); - if (family.equals(Family.UNION)) { - if (preambleLongs != Family.UNION.getMinPreLongs()) { - throw new SketchesArgumentException( - "Possible corruption: Invalid PreambleLongs value for UNION: " + preambleLongs); - } - } - else if (family.equals(Family.QUICKSELECT)) { - if (preambleLongs != Family.QUICKSELECT.getMinPreLongs()) { - throw new SketchesArgumentException( - "Possible corruption: Invalid PreambleLongs value for QUICKSELECT: " + preambleLongs); - } - } else { - throw new SketchesArgumentException( - "Possible corruption: Invalid Family: " + family.toString()); - } - - //Check lgNomLongs - if (lgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS) { - throw new SketchesArgumentException( - "Possible corruption: Current MemorySegment lgNomLongs < min required size: " - + lgNomLongs + " < " + ThetaUtil.MIN_LG_NOM_LONGS); - } - } - - static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs, - final int lgNomLongs, final int lgArrLongs) { - - //Check SerVer - final int serVer = extractSerVer(srcSeg); //byte 1 - if (serVer != SER_VER) { - throw new SketchesArgumentException( - "Possible corruption: Invalid Serialization Version: " + serVer); - } - - //Check flags - final int flags = extractFlags(srcSeg); //byte 5 - final int flagsMask = - ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK; - if ((flags & flagsMask) > 0) { - throw new SketchesArgumentException( - "Possible corruption: Input srcSeg cannot be: big-endian, compact, ordered, nor read-only"); - } - - //Check seed hashes - final short seedHash = checkSegmentSeedHash(srcSeg, expectedSeed); //byte 6,7 - Util.checkSeedHashes(seedHash, Util.computeSeedHash(expectedSeed)); - - //Check seg capacity, lgArrLongs - final long curCapBytes = srcSeg.byteSize(); - final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); - if (curCapBytes < minReqBytes) { - throw new SketchesArgumentException( - "Possible corruption: Current MemorySegment size < min required size: " - + curCapBytes + " < " + minReqBytes); - } - //check Theta, p - final float p = extractP(srcSeg); //bytes 12-15 - final long thetaLong = extractThetaLong(srcSeg); //bytes 16-23 - final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; - //if (lgArrLongs <= lgNomLongs) the sketch is still resizing, thus theta cannot be < p. - if ((lgArrLongs <= lgNomLongs) && (theta < p) ) { - throw new SketchesArgumentException( - "Possible corruption: Theta cannot be < p and lgArrLongs <= lgNomLongs. " - + lgArrLongs + " <= " + lgNomLongs + ", Theta: " + theta + ", p: " + p); - } - } - - /** - * This checks to see if the MemorySegment RF factor was set correctly as early versions may not - * have set it. - * @param srcSeg the source MemorySegment - * @param lgNomLongs the current lgNomLongs - * @param lgArrLongs the current lgArrLongs - * @return true if the the MemorySegment RF factor is incorrect and the caller can either - * correct it or throw an error. - */ - static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgNomLongs, - final int lgArrLongs) { - final int lgT = lgNomLongs + 1; - final int lgA = lgArrLongs; - final int lgR = extractLgResizeFactor(srcSeg); - if (lgR == 0) { return lgA != lgT; } - return !(((lgT - lgA) % lgR) == 0); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java deleted file mode 100644 index 19abe7b13..000000000 --- a/src/main/java/org/apache/datasketches/theta2/UpdateSketchBuilder.java +++ /dev/null @@ -1,493 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.common.Util.LS; -import static org.apache.datasketches.common.Util.TAB; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * For building a new UpdateSketch. - * - * @author Lee Rhodes - */ -public final class UpdateSketchBuilder { - private int bLgNomLongs; - private long bSeed; - private ResizeFactor bRF; - private Family bFam; - private float bP; - - //Fields for concurrent theta sketch - private int bNumPoolThreads; - private int bLocalLgNomLongs; - private boolean bPropagateOrderedCompact; - private double bMaxConcurrencyError; - private int bMaxNumLocalThreads; - - /** - * Constructor for building a new UpdateSketch. The default configuration is - *
                - *
              • Nominal Entries: {@value org.apache.datasketches.thetacommon2.ThetaUtil#DEFAULT_NOMINAL_ENTRIES}
              • - *
              • Seed: {@value org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}
              • - *
              • Input Sampling Probability: 1.0
              • - *
              • Family: {@link org.apache.datasketches.common.Family#QUICKSELECT}
              • - *
              • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. - * For direct sketches, which are targeted for off-heap, this value will - * be fixed at either {@link ResizeFactor#X1} or {@link ResizeFactor#X2}.
              • - *
              - * Parameters unique to the concurrent sketches only: - *
                - *
              • Concurrent NumPoolThreads: 3
              • - *
              • Number of local Nominal Entries: 4
              • - *
              • Concurrent PropagateOrderedCompact: true
              • - *
              • Concurrent MaxConcurrencyError: 0
              • - *
              • Concurrent MaxNumLocalThreads: 1
              • - *
              - */ - public UpdateSketchBuilder() { - bLgNomLongs = Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES); - bSeed = Util.DEFAULT_UPDATE_SEED; - bP = (float) 1.0; - bRF = ResizeFactor.X8; - bFam = Family.QUICKSELECT; - // Default values for concurrent sketch - bNumPoolThreads = ConcurrentPropagationService.NUM_POOL_THREADS; - bLocalLgNomLongs = 4; //default is smallest legal QS sketch - bPropagateOrderedCompact = true; - bMaxConcurrencyError = 0; - bMaxNumLocalThreads = 1; - } - - /** - * Sets the Nominal Entries for this sketch. - * This value is also used for building a shared concurrent sketch. - * The minimum value is 16 (2^4) and the maximum value is 67,108,864 (2^26). - * Be aware that sketches as large as this maximum value may not have been - * thoroughly tested or characterized for performance. - * - * @param nomEntries Nominal Entries - * This will become the ceiling power of 2 if the given value is not. - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setNominalEntries(final int nomEntries) { - bLgNomLongs = ThetaUtil.checkNomLongs(nomEntries); - return this; - } - - /** - * Alternative method of setting the Nominal Entries for this sketch from the log_base2 value. - * This value is also used for building a shared concurrent sketch. - * The minimum value is 4 and the maximum value is 26. - * Be aware that sketches as large as this maximum value may not have been - * thoroughly characterized for performance. - * - * @param lgNomEntries the Log Nominal Entries. Also for the concurrent shared sketch - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setLogNominalEntries(final int lgNomEntries) { - bLgNomLongs = ThetaUtil.checkNomLongs(1 << lgNomEntries); - return this; - } - - /** - * Returns Log-base 2 Nominal Entries - * @return Log-base 2 Nominal Entries - */ - public int getLgNominalEntries() { - return bLgNomLongs; - } - - /** - * Sets the Nominal Entries for the concurrent local sketch. The minimum value is 16 and the - * maximum value is 67,108,864, which is 2^26. - * Be aware that sketches as large as this maximum - * value have not been thoroughly tested or characterized for performance. - * - * @param nomEntries Nominal Entries - * This will become the ceiling power of 2 if it is not. - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setLocalNominalEntries(final int nomEntries) { - bLocalLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries)); - if ((bLocalLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) { - throw new SketchesArgumentException( - "Nominal Entries must be >= 16 and <= 67108864: " + nomEntries); - } - return this; - } - - /** - * Alternative method of setting the Nominal Entries for a local concurrent sketch from the - * log_base2 value. - * The minimum value is 4 and the maximum value is 26. - * Be aware that sketches as large as this maximum - * value have not been thoroughly tested or characterized for performance. - * - * @param lgNomEntries the Log Nominal Entries for a concurrent local sketch - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setLocalLogNominalEntries(final int lgNomEntries) { - bLocalLgNomLongs = lgNomEntries; - if ((bLocalLgNomLongs > ThetaUtil.MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS)) { - throw new SketchesArgumentException( - "Log Nominal Entries must be >= 4 and <= 26: " + lgNomEntries); - } - return this; - } - - /** - * Returns Log-base 2 Nominal Entries for the concurrent local sketch - * @return Log-base 2 Nominal Entries for the concurrent local sketch - */ - public int getLocalLgNominalEntries() { - return bLocalLgNomLongs; - } - - /** - * Sets the long seed value that is required by the hashing function. - * @param seed See seed - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setSeed(final long seed) { - bSeed = seed; - return this; - } - - /** - * Returns the seed - * @return the seed - */ - public long getSeed() { - return bSeed; - } - - /** - * Sets the upfront uniform sampling probability, p - * @param p See Sampling Probability, p - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setP(final float p) { - if ((p <= 0.0) || (p > 1.0)) { - throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p); - } - bP = p; - return this; - } - - /** - * Returns the pre-sampling probability p - * @return the pre-sampling probability p - */ - public float getP() { - return bP; - } - - /** - * Sets the cache Resize Factor. - * @param rf See Resize Factor - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setResizeFactor(final ResizeFactor rf) { - bRF = rf; - return this; - } - - /** - * Returns the Resize Factor - * @return the Resize Factor - */ - public ResizeFactor getResizeFactor() { - return bRF; - } - - /** - * Set the Family. - * @param family the family for this builder - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setFamily(final Family family) { - bFam = family; - return this; - } - - /** - * Returns the Family - * @return the Family - */ - public Family getFamily() { - return bFam; - } - - /** - * Sets the number of pool threads used for background propagation in the concurrent sketches. - * @param numPoolThreads the given number of pool threads - */ - public void setNumPoolThreads(final int numPoolThreads) { - bNumPoolThreads = numPoolThreads; - } - - /** - * Gets the number of background pool threads used for propagation in the concurrent sketches. - * @return the number of background pool threads - */ - public int getNumPoolThreads() { - return bNumPoolThreads; - } - - /** - * Sets the Propagate Ordered Compact flag to the given value. Used with concurrent sketches. - * - * @param prop the given value - * @return this UpdateSketchBuilder - */ - public UpdateSketchBuilder setPropagateOrderedCompact(final boolean prop) { - bPropagateOrderedCompact = prop; - return this; - } - - /** - * Gets the Propagate Ordered Compact flag used with concurrent sketches. - * @return the Propagate Ordered Compact flag - */ - public boolean getPropagateOrderedCompact() { - return bPropagateOrderedCompact; - } - - /** - * Sets the Maximum Concurrency Error. - * @param maxConcurrencyError the given Maximum Concurrency Error. - */ - public void setMaxConcurrencyError(final double maxConcurrencyError) { - bMaxConcurrencyError = maxConcurrencyError; - } - - /** - * Gets the Maximum Concurrency Error - * @return the Maximum Concurrency Error - */ - public double getMaxConcurrencyError() { - return bMaxConcurrencyError; - } - - /** - * Sets the Maximum Number of Local Threads. - * This is used to set the size of the local concurrent buffers. - * @param maxNumLocalThreads the given Maximum Number of Local Threads - */ - public void setMaxNumLocalThreads(final int maxNumLocalThreads) { - bMaxNumLocalThreads = maxNumLocalThreads; - } - - /** - * Gets the Maximum Number of Local Threads. - * @return the Maximum Number of Local Threads. - */ - public int getMaxNumLocalThreads() { - return bMaxNumLocalThreads; - } - - // BUILD FUNCTIONS - - /** - * Returns an UpdateSketch with the current configuration of this Builder. - * @return an UpdateSketch - */ - public UpdateSketch build() { - return build(null); - } - - /** - * Returns an UpdateSketch with the current configuration of this Builder - * with the specified backing destination MemorySegment store. - * Note: this cannot be used with the Alpha Family of sketches. - * @param dstSeg The destination MemorySegment. - * @return an UpdateSketch - */ - public UpdateSketch build(final MemorySegment dstSeg) { - UpdateSketch sketch = null; - switch (bFam) { - case ALPHA: { - if (dstSeg == null) { - sketch = HeapAlphaSketch.newHeapInstance(bLgNomLongs, bSeed, bP, bRF); - } - else { - throw new SketchesArgumentException("AlphaSketch cannot be backed by a MemorySegment."); - } - break; - } - case QUICKSELECT: { - if (dstSeg == null) { - sketch = new HeapQuickSelectSketch(bLgNomLongs, bSeed, bP, bRF, false); - } - else { - sketch = new DirectQuickSelectSketch( - bLgNomLongs, bSeed, bP, bRF, dstSeg, false); - } - break; - } - default: { - throw new SketchesArgumentException( - "Given Family cannot be built as a Theta Sketch: " + bFam.toString()); - } - } - return sketch; - } - - /** - * Returns an on-heap concurrent shared UpdateSketch with the current configuration of the - * Builder. - * - *

              The parameters unique to the shared concurrent sketch are: - *

                - *
              • Number of Pool Threads (default is 3)
              • - *
              • Maximum Concurrency Error
              • - *
              - * - *

              Key parameters that are in common with other Theta sketches: - *

                - *
              • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
              • - *
              - * - * @return an on-heap concurrent UpdateSketch with the current configuration of the Builder. - */ - public UpdateSketch buildShared() { - return buildShared(null); - } - - /** - * Returns a concurrent shared UpdateSketch with the current - * configuration of the Builder and the given destination MemorySegment. If the destination - * MemorySegment is null, this defaults to an on-heap concurrent shared UpdateSketch. - * - *

              The parameters unique to the shared concurrent sketch are: - *

                - *
              • Number of Pool Threads (default is 3)
              • - *
              • Maximum Concurrency Error
              • - *
              - * - *

              Key parameters that are in common with other Theta sketches: - *

                - *
              • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
              • - *
              • Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
              • - *
              - * - * @param dstSeg the given MemorySegment for Direct, otherwise null. - * @return a concurrent UpdateSketch with the current configuration of the Builder - * and the given destination MemorySegment. - */ - @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", - justification = "Harmless in Builder, fix later") - public UpdateSketch buildShared(final MemorySegment dstSeg) { - ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; - if (dstSeg == null) { - return new ConcurrentHeapQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError); - } else { - return new ConcurrentDirectQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError, dstSeg); - } - } - - /** - * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current - * configuration of the Builder, the data from the given sketch, and the given destination - * MemorySegment. If the destination MemorySegment is null, this defaults to an on-heap - * concurrent shared UpdateSketch. - * - *

              The parameters unique to the shared concurrent sketch are: - *

                - *
              • Number of Pool Threads (default is 3)
              • - *
              • Maximum Concurrency Error
              • - *
              - * - *

              Key parameters that are in common with other Theta sketches: - *

                - *
              • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
              • - *
              • Destination MemorySegment (if not null, returned sketch is Direct. Default is null.)
              • - *
              - * - * @param sketch a given UpdateSketch from which the data is used to initialize the returned - * shared sketch. - * @param dstSeg the given MemorySegment for Direct, otherwise null. - * @return a concurrent UpdateSketch with the current configuration of the Builder - * and the given destination MemorySegment. - */ - @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", - justification = "Harmless in Builder, fix later") - public UpdateSketch buildSharedFromSketch(final UpdateSketch sketch, final MemorySegment dstSeg) { - ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; - if (dstSeg == null) { - return new ConcurrentHeapQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError); - } else { - return new ConcurrentDirectQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError, dstSeg); - } - } - - /** - * Returns a local, on-heap, concurrent UpdateSketch to be used as a per-thread local buffer - * along with the given concurrent shared UpdateSketch and the current configuration of this - * Builder. - * - *

              The parameters unique to the local concurrent sketch are: - *

                - *
              • Local Nominal Entries or Local Log Nominal Entries
              • - *
              • Propagate Ordered Compact flag
              • - *
              - * - * @param shared the concurrent shared sketch to be accessed via the concurrent local sketch. - * @return an UpdateSketch to be used as a per-thread local buffer. - */ - public UpdateSketch buildLocal(final UpdateSketch shared) { - if ((shared == null) || !(shared instanceof ConcurrentSharedThetaSketch)) { - throw new SketchesStateException("The concurrent shared sketch must be built first."); - } - return new ConcurrentHeapThetaBuffer(bLocalLgNomLongs, bSeed, - (ConcurrentSharedThetaSketch) shared, bPropagateOrderedCompact, bMaxNumLocalThreads); - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("UpdateSketchBuilder configuration:").append(LS); - sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS); - sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS); - sb.append("LgLocalK:").append(TAB).append(bLocalLgNomLongs).append(LS); - sb.append("LocalK:").append(TAB).append(1 << bLocalLgNomLongs).append(LS); - sb.append("Seed:").append(TAB).append(bSeed).append(LS); - sb.append("p:").append(TAB).append(bP).append(LS); - sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); - sb.append("Family:").append(TAB).append(bFam).append(LS); - sb.append("Propagate Ordered Compact").append(TAB).append(bPropagateOrderedCompact).append(LS); - sb.append("NumPoolThreads").append(TAB).append(bNumPoolThreads).append(LS); - sb.append("MaxConcurrencyError").append(TAB).append(bMaxConcurrencyError).append(LS); - sb.append("MaxNumLocalThreads").append(TAB).append(bMaxNumLocalThreads).append(LS); - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java deleted file mode 100644 index 8973d76ff..000000000 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactCompressedSketch.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.theta2.PreambleUtil.wholeBytesToHoldBits; -import static org.apache.datasketches.theta2.PreambleUtil.ENTRY_BITS_BYTE_V4; -import static org.apache.datasketches.theta2.PreambleUtil.NUM_ENTRIES_BYTES_BYTE_V4; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; - -import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.common.Util; - -/** - * Wrapper around a serialized compact compressed read-only sketch. It is not empty, not a single item. - * - *

              This sketch can only be associated with a Serialization Version 4 format binary image.

              - */ -final class WrappedCompactCompressedSketch extends WrappedCompactSketch { - - /** - * Construct this sketch with the given bytes. - * @param bytes containing serialized compact compressed sketch. - */ - WrappedCompactCompressedSketch(final byte[] bytes) { - super(bytes); - } - - /** - * Wraps the given bytes, which must be a SerVer 4 compressed CompactSketch image. - * @param bytes representation of serialized compressed compact sketch. - * @param seedHash The update seedHash. - * See Seed Hash. - * @return this sketch - */ - static WrappedCompactCompressedSketch wrapInstance(final byte[] bytes, final short seedHash) { - Util.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); - return new WrappedCompactCompressedSketch(bytes); - } - - //Sketch Overrides - - @Override - public int getCurrentBytes() { - final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; - final int entryBits = bytes_[ENTRY_BITS_BYTE_V4]; - final int numEntriesBytes = bytes_[NUM_ENTRIES_BYTES_BYTE_V4]; - return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits); - } - - private static final int START_PACKED_DATA_EXACT_MODE = 8; - private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; - - @Override - public int getRetainedEntries(final boolean valid) { //compact is always valid - // number of entries is stored using variable length encoding - // most significant bytes with all zeros are not stored - // one byte in the preamble has the number of non-zero bytes used - final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; // if > 1 then the second long has theta - final int numEntriesBytes = bytes_[NUM_ENTRIES_BYTES_BYTE_V4]; - int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE; - int numEntries = 0; - for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(bytes_[offsetBytes++]) << (i << 3); - } - return numEntries; - } - - @Override - public long getThetaLong() { - final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; - return (preLongs > 1) ? ByteArrayUtil.getLongLE(bytes_, 8) : Long.MAX_VALUE; - } - - @Override - public boolean isEmpty() { - return false; - } - - @Override - public boolean isOrdered() { - return true; - } - - @Override - public HashIterator iterator() { - return new BytesCompactCompressedHashIterator( - bytes_, - (bytes_[PREAMBLE_LONGS_BYTE] > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) - + bytes_[NUM_ENTRIES_BYTES_BYTE_V4], - bytes_[ENTRY_BITS_BYTE_V4], - getRetainedEntries() - ); - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java deleted file mode 100644 index a1f65b3e2..000000000 --- a/src/main/java/org/apache/datasketches/theta2/WrappedCompactSketch.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.common.ByteArrayUtil.getIntLE; -import static org.apache.datasketches.common.ByteArrayUtil.getLongLE; -import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; -import static org.apache.datasketches.theta2.CompactOperations.segmentToCompact; -import static org.apache.datasketches.theta2.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.RETAINED_ENTRIES_INT; -import static org.apache.datasketches.theta2.PreambleUtil.SEED_HASH_SHORT; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.Util; - -/** - * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. - * - *

              This sketch can only be associated with a Serialization Version 3 format binary image.

              - */ -class WrappedCompactSketch extends CompactSketch { - final byte[] bytes_; - - /** - * Construct this sketch with the given bytes. - * @param bytes containing serialized compact sketch. - */ - WrappedCompactSketch(final byte[] bytes) { - bytes_ = bytes; - } - - /** - * Wraps the given byteArray, which must be a SerVer 3 CompactSketch image. - * @param bytes representation of serialized compressed compact sketch. - * @param seedHash The update seedHash. - * See Seed Hash. - * @return this sketch - */ - static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHash) { - Util.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); - return new WrappedCompactSketch(bytes); - } - - //Sketch Overrides - - @Override - public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { - return segmentToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg); - } - - @Override - public int getCurrentBytes() { - final int preLongs = bytes_[PreambleUtil.PREAMBLE_LONGS_BYTE]; - final int numEntries = (preLongs == 1) ? 0 : getIntLE(bytes_, RETAINED_ENTRIES_INT); - return (preLongs + numEntries) << 3; - } - - @Override - public int getRetainedEntries(final boolean valid) { //compact is always valid - final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; - return (preLongs == 1) ? 0 : getIntLE(bytes_, RETAINED_ENTRIES_INT); - } - - @Override - public long getThetaLong() { - final int preLongs = bytes_[PREAMBLE_LONGS_BYTE]; - return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE; - } - - @Override - public boolean isEmpty() { - return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0; - } - - @Override - public boolean isOrdered() { - return (bytes_[FLAGS_BYTE] & ORDERED_FLAG_MASK) > 0; - } - - @Override - public HashIterator iterator() { - return new BytesCompactHashIterator( - bytes_, - bytes_[PREAMBLE_LONGS_BYTE] << 3, - getRetainedEntries() - ); - } - - @Override - public byte[] toByteArray() { - return Arrays.copyOf(bytes_, getCurrentBytes()); - } - - //restricted methods - - @Override - long[] getCache() { - final long[] cache = new long[getRetainedEntries()]; - int i = 0; - final HashIterator it = iterator(); - while (it.next()) { - cache[i++] = it.get(); - } - return cache; - } - - @Override - int getCompactPreambleLongs() { - return bytes_[PREAMBLE_LONGS_BYTE]; - } - - @Override - int getCurrentPreambleLongs() { - return bytes_[PREAMBLE_LONGS_BYTE]; - } - - @Override - short getSeedHash() { - return getShortLE(bytes_, SEED_HASH_SHORT); - } -} diff --git a/src/main/java/org/apache/datasketches/theta2/package-info.java b/src/main/java/org/apache/datasketches/theta2/package-info.java deleted file mode 100644 index 71c333bb5..000000000 --- a/src/main/java/org/apache/datasketches/theta2/package-info.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * The theta package contains the basic sketch classes that are members of the - * Theta Sketch Framework. - * - *

              There is a separate Tuple package for many of the sketches that are derived from the - * same algorithms defined in the Theta Sketch Framework paper.

              - */ -package org.apache.datasketches.theta2; diff --git a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java index 79bff95a1..7044551f0 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java +++ b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java @@ -19,13 +19,14 @@ package org.apache.datasketches.thetacommon; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.Math.max; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Helper class for the common hash table methods. @@ -183,22 +184,22 @@ public static int hashArrayInsert(final long[] srcArr, final long[] hashTable, return count; } - //With Memory or WritableMemory + //With MemorySegment /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for Memory. - * Returns the index if found, -1 if not found. + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment. + * Returns the index if found, -1 if not found. The input MemorySegment may be read only. * - * @param mem The Memory containing the hash table to search. + * @param seg The MemorySegment containing the hash table to search. * The hash table portion must be a power of 2 in size. * @param lgArrLongs The log_base2(hashTable.length). * See lgArrLongs. * @param hash The hash value to search for. Must not be zero. - * @param memOffsetBytes offset in the memory where the hashTable starts + * @param segOffsetBytes offset in the MemorySegment where the hashTable starts * @return Current probe index if found, -1 if not found. */ - public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final long hash, - final int memOffsetBytes) { + public static int hashSearchMemorySegment(final MemorySegment seg, final int lgArrLongs, final long hash, + final int segOffsetBytes) { if (hash == 0) { throw new SketchesArgumentException("Given hash must not be zero: " + hash); } @@ -207,8 +208,8 @@ public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final int curProbe = (int) (hash & arrayMask); final int loopIndex = curProbe; do { - final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = mem.getLong(curProbeOffsetBytes); + final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes; + final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); if (curArrayHash == EMPTY) { return -1; } else if (curArrayHash == hash) { return curProbe; } curProbe = (curProbe + stride) & arrayMask; @@ -217,21 +218,21 @@ public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final } /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for Memory. + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment. * This method assumes that the input hash is not a duplicate. * Useful for rebuilding tables to avoid unnecessary comparisons. * Returns the index of insertion, which is always positive or zero. * Throws an exception if table has no empty slot. * - * @param wmem The WritableMemory that contains the hashTable to insert into. + * @param wseg The writable MemorySegment that contains the hashTable to insert into. * The size of the hashTable portion must be a power of 2. * @param lgArrLongs The log_base2(hashTable.length. * See lgArrLongs. * @param hash value that must not be zero and will be inserted into the array into an empty slot. - * @param memOffsetBytes offset in the WritableMemory where the hashTable starts + * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts * @return index of insertion. Always positive or zero. */ - public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgArrLongs, + public static int hashInsertOnlyMemorySegment(final MemorySegment wseg, final int lgArrLongs, final long hash, final int memOffsetBytes) { final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 final int stride = getStride(hash, lgArrLongs); @@ -240,9 +241,9 @@ public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgAr final int loopIndex = curProbe; do { final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wmem.getLong(curProbeOffsetBytes); + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); if (curArrayHash == EMPTY) { - wmem.putLong(curProbeOffsetBytes, hash); + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); return curProbe; } curProbe = (curProbe + stride) & arrayMask; @@ -252,19 +253,19 @@ public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgAr /** * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts - * values directly into a Memory. + * values directly into a writable MemorySegment. * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). * Throws an exception if the value is not found and table has no empty slot. * - * @param wmem The WritableMemory that contains the hashTable to insert into. + * @param wseg The writable MemorySegment that contains the hashTable to insert into. * @param lgArrLongs The log_base2(hashTable.length). * See lgArrLongs. * @param hash The hash value to be potentially inserted into an empty slot only if it is not * a duplicate of any other hash value in the table. It must not be zero. - * @param memOffsetBytes offset in the WritableMemory where the hash array starts + * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). */ - public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int lgArrLongs, + public static int hashSearchOrInsertMemorySegment(final MemorySegment wseg, final int lgArrLongs, final long hash, final int memOffsetBytes) { final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 final int stride = getStride(hash, lgArrLongs); @@ -273,9 +274,9 @@ public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int final int loopIndex = curProbe; do { final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wmem.getLong(curProbeOffsetBytes); + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); if (curArrayHash == EMPTY) { - wmem.putLong(curProbeOffsetBytes, hash); + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); return ~curProbe; } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate // curArrayHash is not a duplicate and not zero, continue searching @@ -283,7 +284,7 @@ public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int } while (curProbe != loopIndex); throw new SketchesArgumentException("Key not found and no empty slot in table!"); } - + //Other related methods /** diff --git a/src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java b/src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java deleted file mode 100644 index 233eae34f..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/BinomialBoundsN.java +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * This class enables the estimation of error bounds given a sample set size, the sampling - * probability theta, the number of standard deviations and a simple noDataSeen flag. This can - * be used to estimate error bounds for fixed threshold sampling as well as the error bounds - * calculations for sketches. - * - * @author Kevin Lang - */ -// BTW, the suffixes "NStar", "NPrimeB", and "NPrimeF" correspond to variables in the formal -// writeup of this scheme. -public final class BinomialBoundsN { - - private BinomialBoundsN() {} - - private static final double[] deltaOfNumSDev = - { - 0.5000000000000000000, // = 0.5 (1 + erf(0) - 0.1586553191586026479, // = 0.5 (1 + erf((-1/sqrt(2)))) - 0.0227502618904135701, // = 0.5 (1 + erf((-2/sqrt(2)))) - 0.0013498126861731796 // = 0.5 (1 + erf((-3/sqrt(2)))) - }; - - // our "classic" bounds, but now with continuity correction - - private static double contClassicLB(final double numSamplesF, final double theta, - final double numSDev) { - final double nHat = (numSamplesF - 0.5) / theta; - final double b = numSDev * Math.sqrt((1.0 - theta) / theta); - final double d = 0.5 * b * Math.sqrt((b * b) + (4.0 * nHat)); - final double center = nHat + (0.5 * (b * b)); - return (center - d); - } - - private static double contClassicUB(final double numSamplesF, final double theta, - final double numSDev) { - final double nHat = (numSamplesF + 0.5) / theta; - final double b = numSDev * Math.sqrt((1.0 - theta) / theta); - final double d = 0.5 * b * Math.sqrt((b * b) + (4.0 * nHat)); - final double center = nHat + (0.5 * (b * b)); - return (center + d); - } - - // This is a special purpose calculator for NStar, using a computational - // strategy inspired by its Bayesian definition. It is only appropriate - // for a very limited set of inputs. However, the procedure computeApproxBinoLB () - // below does in fact only call it for suitably limited inputs. - // Outside of this limited range, two different bad things will happen. - // First, because we are not using logarithms, the values of intermediate - // quantities will exceed the dynamic range of doubles. Second, even if that - // problem were fixed, the running time of this procedure is essentially linear - // in est = (numSamples / p), and that can be Very, Very Big. - - private static long specialNStar(final long numSamplesI, final double p, final double delta) { - final double q, numSamplesF; - double tot, curTerm; - long m; - assertTrue(numSamplesI >= 1); - assertTrue((0.0 < p) && (p < 1.0)); - assertTrue((0.0 < delta) && (delta < 1.0)); - q = 1.0 - p; - numSamplesF = numSamplesI; - // Use a different algorithm if the following isn't true; this one will be too slow, or worse. - assertTrue((numSamplesF / p) < 500.0); - curTerm = Math.pow(p, numSamplesF); // curTerm = posteriorProbability (k, k, p) - assertTrue(curTerm > 1e-100); // sanity check for non-use of logarithms - tot = curTerm; - m = numSamplesI; - while (tot <= delta) { // this test can fail even the first time - curTerm = (curTerm * q * (m)) / ((m + 1) - numSamplesI); - tot += curTerm; - m += 1; - } - // we have reached a state where tot > delta, so back up one - return (m - 1); - } - - // The following procedure has very limited applicability. - // The above remarks about specialNStar() also apply here. - private static long specialNPrimeB(final long numSamplesI, final double p, final double delta) { - final double q, numSamplesF, oneMinusDelta; - double tot, curTerm; - long m; - assertTrue(numSamplesI >= 1); - assertTrue((0.0 < p) && (p < 1.0)); - assertTrue((0.0 < delta) && (delta < 1.0)); - q = 1.0 - p; - oneMinusDelta = 1.0 - delta; - numSamplesF = numSamplesI; - curTerm = Math.pow(p, numSamplesF); // curTerm = posteriorProbability (k, k, p) - assertTrue(curTerm > 1e-100); // sanity check for non-use of logarithms - tot = curTerm; - m = numSamplesI; - while (tot < oneMinusDelta) { - curTerm = (curTerm * q * (m)) / ((m + 1) - numSamplesI); - tot += curTerm; - m += 1; - } - return (m); // don't need to back up - } - - private static long specialNPrimeF(final long numSamplesI, final double p, final double delta) { - // Use a different algorithm if the following isn't true; this one will be too slow, or worse. - assertTrue(((numSamplesI) / p) < 500.0); //A super-small delta could also make it slow. - return (specialNPrimeB(numSamplesI + 1, p, delta)); - } - - // The following computes an approximation to the lower bound of - // a Frequentist confidence interval based on the tails of the Binomial distribution. - private static double computeApproxBinoLB(final long numSamplesI, final double theta, - final int numSDev) { - if (theta == 1.0) { - return (numSamplesI); - } - - else if (numSamplesI == 0) { - return (0.0); - } - - else if (numSamplesI == 1) { - final double delta = deltaOfNumSDev[numSDev]; - final double rawLB = (Math.log(1.0 - delta)) / (Math.log(1.0 - theta)); - return (Math.floor(rawLB)); // round down - } - - else if (numSamplesI > 120) { - // plenty of samples, so gaussian approximation to binomial distribution isn't too bad - final double rawLB = contClassicLB( numSamplesI, theta, numSDev); - return (rawLB - 0.5); // fake round down - } - - // at this point we know 2 <= numSamplesI <= 120 - - else if (theta > (1.0 - 1e-5)) { // empirically-determined threshold - return (numSamplesI); - } - - else if (theta < ((numSamplesI) / 360.0)) { // empirically-determined threshold - // here we use the gaussian approximation, but with a modified "numSDev" - final int index; - final double rawLB; - index = (3 * ((int) numSamplesI)) + (numSDev - 1); - rawLB = contClassicLB(numSamplesI, theta, EquivTables.getLB(index)); - return (rawLB - 0.5); // fake round down - } - - else { // This is the most difficult range to approximate; we will compute an "exact" LB. - // We know that est <= 360, so specialNStar() shouldn't be ridiculously slow. - final double delta = deltaOfNumSDev[numSDev]; - final long nstar = specialNStar(numSamplesI, theta, delta); - return (nstar); // don't need to round - } - } - - // The following computes an approximation to the upper bound of - // a Frequentist confidence interval based on the tails of the Binomial distribution. - private static double computeApproxBinoUB(final long numSamplesI, final double theta, - final int numSDev) { - if (theta == 1.0) { - return (numSamplesI); - } - - else if (numSamplesI == 0) { - final double delta = deltaOfNumSDev[numSDev]; - final double rawUB = (Math.log(delta)) / (Math.log(1.0 - theta)); - return (Math.ceil(rawUB)); // round up - } - - else if (numSamplesI > 120) { - // plenty of samples, so gaussian approximation to binomial distribution isn't too bad - final double rawUB = contClassicUB(numSamplesI, theta, numSDev); - return (rawUB + 0.5); // fake round up - } - - // at this point we know 1 <= numSamplesI <= 120 - - else if (theta > (1.0 - 1e-5)) { // empirically-determined threshold - return (numSamplesI + 1); - } - - else if (theta < ((numSamplesI) / 360.0)) { // empirically-determined threshold - // here we use the gaussian approximation, but with a modified "numSDev" - final int index; - final double rawUB; - index = (3 * ((int) numSamplesI)) + (numSDev - 1); - rawUB = contClassicUB(numSamplesI, theta, EquivTables.getUB(index)); - return (rawUB + 0.5); // fake round up - } - - else { // This is the most difficult range to approximate; we will compute an "exact" UB. - // We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow. - final double delta = deltaOfNumSDev[numSDev]; - final long nprimef = specialNPrimeF(numSamplesI, theta, delta); - return (nprimef); // don't need to round - } - } - - // The following two procedures enforce some extra rules that help - // to prevent the return of bounds that might be confusing to users. - /** - * Returns the approximate lower bound value - * @param numSamples the number of samples in the sample set - * @param theta the sampling probability - * @param numSDev the number of "standard deviations" from the mean for the tail bounds. - * This must be an integer value of 1, 2 or 3. - * @param noDataSeen this is normally false. However, in the case where you have zero samples - * and a theta < 1.0, this flag enables the distinction between a virgin case when no actual - * data has been seen and the case where the estimate may be zero but an upper error bound may - * still exist. - * @return the approximate lower bound value - */ - public static double getLowerBound(final long numSamples, final double theta, final int numSDev, - final boolean noDataSeen) { - //in earlier code numSamples was called numSamplesI - if (noDataSeen) { return 0.0; } - checkArgs(numSamples, theta, numSDev); - final double lb = computeApproxBinoLB(numSamples, theta, numSDev); - final double numSamplesF = numSamples; - final double est = numSamplesF / theta; - return (Math.min(est, Math.max(numSamplesF, lb))); - } - - /** - * Returns the approximate upper bound value - * @param numSamples the number of samples in the sample set - * @param theta the sampling probability - * @param numSDev the number of "standard deviations" from the mean for the tail bounds. - * This must be an integer value of 1, 2 or 3. - * @param noDataSeen this is normally false. However, in the case where you have zero samples - * and a theta < 1.0, this flag enables the distinction between a virgin case when no actual - * data has been seen and the case where the estimate may be zero but an upper error bound may - * still exist. - * @return the approximate upper bound value - */ - public static double getUpperBound(final long numSamples, final double theta, final int numSDev, - final boolean noDataSeen) { - //in earlier code numSamples was called numSamplesI - if (noDataSeen) { return 0.0; } - checkArgs(numSamples, theta, numSDev); - final double ub = computeApproxBinoUB(numSamples, theta, numSDev); - final double numSamplesF = numSamples; - final double est = numSamplesF / theta; - return (Math.max(est, ub)); - } - - //exposed only for test - static void checkArgs(final long numSamples, final double theta, final int numSDev) { - if ((numSDev | (numSDev - 1) | (3 - numSDev) | numSamples) < 0) { - throw new SketchesArgumentException( - "numSDev must only be 1,2, or 3 and numSamples must >= 0: numSDev=" - + numSDev + ", numSamples=" + numSamples); - } - if ((theta < 0.0) || (theta > 1.0)) { - throw new SketchesArgumentException("0.0 < theta <= 1.0: " + theta); - } - } - - private static void assertTrue(final boolean truth) { - assert (truth); - } - -} // end of class "BinomialBoundsN" diff --git a/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java b/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java deleted file mode 100644 index 2ae14747c..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSets.java +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; - -import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.theta2.Sketch; - -/** - * This class is used to compute the bounds on the estimate of the ratio B / A, where: - *
                - *
              • A is a Theta Sketch of population PopA.
              • - *
              • B is a Theta Sketch of population PopB that is a subset of A, - * obtained by an intersection of A with some other Theta Sketch C, - * which acts like a predicate or selection clause.
              • - *
              • The estimate of the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
              • - *
              • The Upper Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
              • - *
              • The Lower Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
              • - *
              - * Note: The theta of A cannot be greater than the theta of B. - * If B is formed as an intersection of A and some other set C, - * then the theta of B is guaranteed to be less than or equal to the theta of B. - * - * @author Kevin Lang - * @author Lee Rhodes - */ -public final class BoundsOnRatiosInThetaSketchedSets { - - private BoundsOnRatiosInThetaSketchedSets() {} - - /** - * Gets the approximate lower bound for B over A based on a 95% confidence interval - * @param sketchA the sketch A - * @param sketchB the sketch B - * @return the approximate lower bound for B over A - */ - public static double getLowerBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = (thetaLongB == thetaLongA) - ? sketchA.getRetainedEntries(true) - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate upper bound for B over A based on a 95% confidence interval - * @param sketchA the sketch A - * @param sketchB the sketch B - * @return the approximate upper bound for B over A - */ - public static double getUpperBoundForBoverA(final Sketch sketchA, final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = (thetaLongB == thetaLongA) - ? sketchA.getRetainedEntries(true) - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 1.0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); - } - - /** - * Gets the estimate for B over A - * @param sketchA the sketch A - * @param sketchB the sketch B - * @return the estimate for B over A - */ - public static double getEstimateOfBoverA(final Sketch sketchA, final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = (thetaLongB == thetaLongA) - ? sketchA.getRetainedEntries(true) - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0.5; } - - return (double) countB / (double) countA; - } - - static void checkThetas(final long thetaLongA, final long thetaLongB) { - if (thetaLongB > thetaLongA) { - throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); - } - } -} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java b/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java deleted file mode 100644 index 1b16a383b..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSets.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; - -import org.apache.datasketches.common.BoundsOnRatiosInSampledSets; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.Summary; - -/** - * This class is used to compute the bounds on the estimate of the ratio B / A, where: - *
                - *
              • A is a Tuple Sketch of population PopA.
              • - *
              • B is a Tuple or Theta Sketch of population PopB that is a subset of A, - * obtained by an intersection of A with some other Tuple or Theta Sketch C, - * which acts like a predicate or selection clause.
              • - *
              • The estimate of the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
              • - *
              • The Upper Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
              • - *
              • The Lower Bound estimate on the ratio PopB/PopA is - * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
              • - *
              - * Note: The theta of A cannot be greater than the theta of B. - * If B is formed as an intersection of A and some other set C, - * then the theta of B is guaranteed to be less than or equal to the theta of B. - * - * @author Kevin Lang - * @author Lee Rhodes - * @author David Cromberge - */ -public final class BoundsOnRatiosInTupleSketchedSets { - - private BoundsOnRatiosInTupleSketchedSets() {} - - /** - * Gets the approximate lower bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Tuple sketch B with summary type S - * @param Summary - * @return the approximate lower bound for B over A - */ - public static double getLowerBoundForBoverA( - final Sketch sketchA, - final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate lower bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Theta sketch B - * @param Summary - * @return the approximate lower bound for B over A - */ - public static double getLowerBoundForBoverA( - final Sketch sketchA, - final org.apache.datasketches.theta2.Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate upper bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Tuple sketch B with summary type S - * @param Summary - * @return the approximate upper bound for B over A - */ - public static double getUpperBoundForBoverA( - final Sketch sketchA, - final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 1.0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); - } - - /** - * Gets the approximate upper bound for B over A based on a 95% confidence interval - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Theta sketch B - * @param Summary - * @return the approximate upper bound for B over A - */ - public static double getUpperBoundForBoverA( - final Sketch sketchA, - final org.apache.datasketches.theta2.Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 1.0; } - final double f = thetaLongB / LONG_MAX_VALUE_AS_DOUBLE; - return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, f); - } - - /** - * Gets the estimate for B over A - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Tuple sketch B with summary type S - * @param Summary - * @return the estimate for B over A - */ - public static double getEstimateOfBoverA( - final Sketch sketchA, - final Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0.5; } - - return (double) countB / (double) countA; - } - - /** - * Gets the estimate for B over A - * @param sketchA the Tuple sketch A with summary type S - * @param sketchB the Theta sketch B - * @param Summary - * @return the estimate for B over A - */ - public static double getEstimateOfBoverA( - final Sketch sketchA, - final org.apache.datasketches.theta2.Sketch sketchB) { - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - checkThetas(thetaLongA, thetaLongB); - - final int countB = sketchB.getRetainedEntries(true); - final int countA = thetaLongB == thetaLongA - ? sketchA.getRetainedEntries() - : sketchA.getCountLessThanThetaLong(thetaLongB); - - if (countA <= 0) { return 0.5; } - - return (double) countB / (double) countA; - } - - static void checkThetas(final long thetaLongA, final long thetaLongB) { - if (thetaLongB > thetaLongA) { - throw new SketchesArgumentException("ThetaLongB cannot be > ThetaLongA."); - } - } -} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java b/src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java deleted file mode 100644 index e824444a4..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/EquivTables.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -/** - * Tables for BinomialBoundsN calculations. - * - *

              These equivTables contain modified values for numSDevs that if used - * WHEN THETA IS VERY SMALL will cause the continuity-corrected version - * of our "classic" confidence intervals to be very close to "exact" confidence - * intervals based on the tails of the actual binomial distirbution.

              - * - * @author Kevin Lang - */ -final class EquivTables { - - private EquivTables() {} - - static double getLB(final int index) { - return lbEquivTable[index]; - } - - static double getUB(final int index) { - return ubEquivTable[index]; - } - - private static double[] lbEquivTable = { - 1.0, 2.0, 3.0, // fake values for k = 0 - 0.78733703534118149, 3.14426768537558132, 13.56789685109913535, // k = 1 - 0.94091379266077979, 2.64699271711145911, 6.29302733018320737, // k = 2 - 0.96869128474958188, 2.46531676590527127, 4.97375283467403051, // k = 3 - 0.97933572521046131, 2.37418810664669877, 4.44899975481712318, // k = 4 - 0.98479165917274258, 2.31863116255024693, 4.16712379778553554, // k = 5 - 0.98806033915698777, 2.28075536565225434, 3.99010556144099837, // k = 6 - 0.99021896790580399, 2.25302005857281529, 3.86784477136922078, // k = 7 - 0.99174267079089873, 2.23168103978522936, 3.77784896945266269, // k = 8 - 0.99287147837287648, 2.21465899260871879, 3.70851932988722410, // k = 9 - 0.99373900046805375, 2.20070155496262032, 3.65326029076638292, // k = 10 - 0.99442519013851438, 2.18900651202670815, 3.60803817612955413, // k = 11 - 0.99498066823221620, 2.17903457780744247, 3.57024330407946877, // k = 12 - 0.99543899410224412, 2.17040883161922693, 3.53810982030634591, // k = 13 - 0.99582322541263579, 2.16285726913676513, 3.51039837124298515, // k = 14 - 0.99614973311747690, 2.15617827879603396, 3.48621230377099778, // k = 15 - 0.99643042892560629, 2.15021897666090922, 3.46488605693562590, // k = 16 - 0.99667418783778317, 2.14486114872480016, 3.44591466064832730, // k = 17 - 0.99688774875812669, 2.14001181420209718, 3.42890765690452781, // k = 18 - 0.99707632299691795, 2.13559675336844634, 3.41355809420343803, // k = 19 - 0.99724399084971083, 2.13155592217421486, 3.39962113251016262, // k = 20 - 0.99739400151915447, 2.12784018863251845, 3.38689892877548004, // k = 21 - 0.99752896842633731, 2.12440890875851096, 3.37522975271599535, // k = 22 - 0.99765101725122918, 2.12122815311133195, 3.36448003577621080, // k = 23 - 0.99776189496810730, 2.11826934724291505, 3.35453840911279144, // k = 24 - 0.99786304821586214, 2.11550823850916458, 3.34531123809287578, // k = 25 - 0.99795568665180667, 2.11292409529477254, 3.33671916527694634, // k = 26 - 0.99804083063483517, 2.11049908609763293, 3.32869446834217797, // k = 27 - 0.99811933910984862, 2.10821776918189130, 3.32117898316676019, // k = 28 - 0.99819195457286014, 2.10606671027090897, 3.31412243534683171, // k = 29 - 0.99825930555178388, 2.10403415237001923, 3.30748113008135647, // k = 30 - 0.99832193858154028, 2.10210975877822648, 3.30121691946897045, // k = 31 - 0.99838032666573895, 2.10028440670842542, 3.29529629751144171, // k = 32 - 0.99843488390555990, 2.09855000145353188, 3.28968974413223236, // k = 33 - 0.99848596721417948, 2.09689934193824001, 3.28437111460505093, // k = 34 - 0.99853390005924325, 2.09532599155502908, 3.27931717312372939, // k = 35 - 0.99857895741078551, 2.09382418262592296, 3.27450718840060517, // k = 36 - 0.99862138880970974, 2.09238872751677718, 3.26992261182860489, // k = 37 - 0.99866141580770318, 2.09101494715108061, 3.26554677962434425, // k = 38 - 0.99869923565267982, 2.08969860402822860, 3.26136468165239535, // k = 39 - 0.99873502010169091, 2.08843585627218431, 3.25736275677081721, // k = 40 - 0.99876893292508839, 2.08722321436752623, 3.25352872241415980, // k = 41 - 0.99880111078502409, 2.08605749165553789, 3.24985141664350863, // k = 42 - 0.99883168573342118, 2.08493577529222307, 3.24632068399498053, // k = 43 - 0.99886077231613513, 2.08385540129560809, 3.24292724848112357, // k = 44 - 0.99888847451828155, 2.08281392374021834, 3.23966263299664092, // k = 45 - 0.99891488795844907, 2.08180908991394631, 3.23651906111521726, // k = 46 - 0.99894010085196783, 2.08083882998420222, 3.23348939240611344, // k = 47 - 0.99896419358239541, 2.07990122528650545, 3.23056705515594444, // k = 48 - 0.99898723510594323, 2.07899450946285924, 3.22774598963252402, // k = 49 - 0.99900929266780736, 2.07811704477046533, 3.22502059972006805, // k = 50 - 0.99903043086155208, 2.07726730587160091, 3.22238570890294795, // k = 51 - 0.99905070073845081, 2.07644388314946582, 3.21983651940365689, // k = 52 - 0.99907015770423868, 2.07564546080757850, 3.21736857351049821, // k = 53 - 0.99908884779227947, 2.07487081196367740, 3.21497773796417619, // k = 54 - 0.99910681586905525, 2.07411879634256024, 3.21266015316183484, // k = 55 - 0.99912410177549305, 2.07338834403498140, 3.21041222805715165, // k = 56 - 0.99914074347179849, 2.07267845454973099, 3.20823061166797174, // k = 57 - 0.99915677607464204, 2.07198819052374006, 3.20611216970604573, // k = 58 - 0.99917223149395795, 2.07131667846186929, 3.20405396962596001, // k = 59 - 0.99918714153457699, 2.07066309019154460, 3.20205326110445299, // k = 60 - 0.99920153247185794, 2.07002665203046377, 3.20010746990493544, // k = 61 - 0.99921543193525508, 2.06940663431663552, 3.19821417453343315, // k = 62 - 0.99922886570365677, 2.06880235245998279, 3.19637109973109546, // k = 63 - 0.99924185357357942, 2.06821315729285971, 3.19457610621114441, // k = 64 - 0.99925441845175555, 2.06763843812092318, 3.19282717869864996, // k = 65 - 0.99926658263325407, 2.06707761824370095, 3.19112241228646099, // k = 66 - 0.99927836173816331, 2.06653015295219689, 3.18946001739936946, // k = 67 - 0.99928977431994781, 2.06599552505539918, 3.18783829446098821, // k = 68 - 0.99930083753795884, 2.06547324585920933, 3.18625564538041317, // k = 69 - 0.99931156864562354, 2.06496285191821016, 3.18471055124089730, // k = 70 - 0.99932197985521043, 2.06446390392778767, 3.18320157510865442, // k = 71 - 0.99933208559809827, 2.06397598606787369, 3.18172735837393361, // k = 72 - 0.99934190032416836, 2.06349869971447220, 3.18028661102792398, // k = 73 - 0.99935143390791836, 2.06303166975550312, 3.17887810481605015, // k = 74 - 0.99936070171270330, 2.06257453607466346, 3.17750067581857820, // k = 75 - 0.99936971103502970, 2.06212696042919674, 3.17615321728274580, // k = 76 - 0.99937847392385493, 2.06168861430600714, 3.17483467831510779, // k = 77 - 0.99938700168914352, 2.06125918927764928, 3.17354405480557489, // k = 78 - 0.99939530099953799, 2.06083838987589729, 3.17228039269048168, // k = 79 - 0.99940338278830154, 2.06042593411496000, 3.17104278166036124, // k = 80 - 0.99941125463777780, 2.06002155276328835, 3.16983035274597569, // k = 81 - 0.99941892470027938, 2.05962498741951094, 3.16864227952240185, // k = 82 - 0.99942640059737187, 2.05923599161263837, 3.16747776846497686, // k = 83 - 0.99943368842187397, 2.05885433061945378, 3.16633606416374391, // k = 84 - 0.99944079790603269, 2.05847977868873500, 3.16521644518826406, // k = 85 - 0.99944773295734990, 2.05811212058944193, 3.16411821883858124, // k = 86 - 0.99945450059186669, 2.05775114781260982, 3.16304072400711789, // k = 87 - 0.99946110646314423, 2.05739666442039493, 3.16198332650733960, // k = 88 - 0.99946755770463369, 2.05704847678819647, 3.16094541781455973, // k = 89 - 0.99947385746861528, 2.05670640500335367, 3.15992641851471490, // k = 90 - 0.99948001256305474, 2.05637027420314666, 3.15892576988736096, // k = 91 - 0.99948602689656241, 2.05603991286400856, 3.15794293484717059, // k = 92 - 0.99949190674294641, 2.05571516158917689, 3.15697740043813724, // k = 93 - 0.99949765436329585, 2.05539586490317561, 3.15602867309343083, // k = 94 - 0.99950327557880314, 2.05508187237845164, 3.15509627710042651, // k = 95 - 0.99950877461972709, 2.05477304104951486, 3.15417975753007340, // k = 96 - 0.99951415481862682, 2.05446923022574879, 3.15327867462917766, // k = 97 - 0.99951942042375208, 2.05417030908833453, 3.15239260700215596, // k = 98 - 0.99952457390890004, 2.05387614661762541, 3.15152114915238712, // k = 99 - 0.99952962005008317, 2.05358662050909402, 3.15066390921020911, // k = 100 - 0.99953456216121594, 2.05330161104427589, 3.14982051097524618, // k = 101 - 0.99953940176368405, 2.05302100378725072, 3.14899059183684926, // k = 102 - 0.99954414373920031, 2.05274468493067275, 3.14817379948561893, // k = 103 - 0.99954879047621148, 2.05247255013657082, 3.14736979964868624, // k = 104 - 0.99955334485656522, 2.05220449388099269, 3.14657826610371671, // k = 105 - 0.99955780993869325, 2.05194041831310869, 3.14579888316276879, // k = 106 - 0.99956218652590678, 2.05168022402710903, 3.14503134811607765, // k = 107 - 0.99956647932785359, 2.05142381889103831, 3.14427536967733090, // k = 108 - 0.99957069025060719, 2.05117111251445294, 3.14353066260227365, // k = 109 - 0.99957482032178291, 2.05092201793428330, 3.14279695558593630, // k = 110 - 0.99957887261450651, 2.05067645094720774, 3.14207398336887422, // k = 111 - 0.99958284988383639, 2.05043432833224415, 3.14136149076028914, // k = 112 - 0.99958675435604505, 2.05019557189746138, 3.14065923143530767, // k = 113 - 0.99959058650074439, 2.04996010556124020, 3.13996696426707445, // k = 114 - 0.99959434898201494, 2.04972785368377686, 3.13928445867830419, // k = 115 - 0.99959804437042976, 2.04949874512311681, 3.13861149103462367, // k = 116 - 0.99960167394553423, 2.04927271043337100, 3.13794784369528656, // k = 117 - 0.99960523957651048, 2.04904968140490951, 3.13729330661277572, // k = 118 - 0.99960874253329735, 2.04882959397491504, 3.13664767767019725, // k = 119 - 0.99961218434327748, 2.04861238220240693, 3.13601075688413289 // k = 120 - }; - - private static double[] ubEquivTable = { - 1.0, 2.0, 3.0, // fake values for k = 0 - 0.99067760836669549, 1.75460517119302040, 2.48055626001627161, // k = 1 - 0.99270518097577565, 1.78855957509907171, 2.53863835259832626, // k = 2 - 0.99402032633599902, 1.81047286499563143, 2.57811676180597260, // k = 3 - 0.99492607629539975, 1.82625928017762362, 2.60759550546498531, // k = 4 - 0.99558653966013821, 1.83839160339161367, 2.63086812358551470, // k = 5 - 0.99608981951632813, 1.84812399034444752, 2.64993712523727254, // k = 6 - 0.99648648035983456, 1.85617372053235385, 2.66598485907860550, // k = 7 - 0.99680750790483330, 1.86298655802610824, 2.67976541374471822, // k = 8 - 0.99707292880049181, 1.86885682585270274, 2.69178781407745760, // k = 9 - 0.99729614928489241, 1.87398826101983218, 2.70241106542158604, // k = 10 - 0.99748667952445658, 1.87852708449801753, 2.71189717290596377, // k = 11 - 0.99765127712748836, 1.88258159501103250, 2.72044290303773550, // k = 12 - 0.99779498340305395, 1.88623391878036273, 2.72819957382063194, // k = 13 - 0.99792160418357412, 1.88954778748873764, 2.73528576807902368, // k = 14 - 0.99803398604944960, 1.89257337682371940, 2.74179612106766513, // k = 15 - 0.99813449883217231, 1.89535099316557876, 2.74780718300419835, // k = 16 - 0.99822494122659577, 1.89791339232732525, 2.75338173141955167, // k = 17 - 0.99830679915913834, 1.90028752122407241, 2.75857186416826039, // k = 18 - 0.99838117410831728, 1.90249575897183831, 2.76342117562634826, // k = 19 - 0.99844913407071090, 1.90455689090418900, 2.76796659454200267, // k = 20 - 0.99851147736424650, 1.90648682834171268, 2.77223944710058845, // k = 21 - 0.99856879856019987, 1.90829917277082473, 2.77626682032629901, // k = 22 - 0.99862183849734265, 1.91000561415842185, 2.78007199816156003, // k = 23 - 0.99867096266018507, 1.91161621560812023, 2.78367524259661536, // k = 24 - 0.99871656986212543, 1.91313978579765376, 2.78709435016625662, // k = 25 - 0.99875907577771272, 1.91458400425526065, 2.79034488416175463, // k = 26 - 0.99879885565047744, 1.91595563175945927, 2.79344064132371273, // k = 27 - 0.99883610756373287, 1.91726064301425936, 2.79639384757751941, // k = 28 - 0.99887095169674467, 1.91850441099725799, 2.79921543574803877, // k = 29 - 0.99890379414739527, 1.91969155477030995, 2.80191513182441554, // k = 30 - 0.99893466279047516, 1.92082633358913313, 2.80450167352080371, // k = 31 - 0.99896392088177777, 1.92191254955568525, 2.80698295731653502, // k = 32 - 0.99899147889385631, 1.92295362479495680, 2.80936614404217266, // k = 33 - 0.99901764688726757, 1.92395267400968351, 2.81165765979318394, // k = 34 - 0.99904238606342233, 1.92491244978191389, 2.81386337393604435, // k = 35 - 0.99906590152386343, 1.92583552644848055, 2.81598868034527072, // k = 36 - 0.99908829040739988, 1.92672418013918900, 2.81803841726804194, // k = 37 - 0.99910959420023460, 1.92758051694144683, 2.82001709302821268, // k = 38 - 0.99912996403594434, 1.92840654943159961, 2.82192875763732332, // k = 39 - 0.99914930224576892, 1.92920397044028391, 2.82377730628954282, // k = 40 - 0.99916781270195543, 1.92997447498220254, 2.82556612075063640, // k = 41 - 0.99918553179077207, 1.93071949211818605, 2.82729843191989971, // k = 42 - 0.99920250730914972, 1.93144048613876862, 2.82897728689417249, // k = 43 - 0.99921873345181211, 1.93213870990595638, 2.83060537017752267, // k = 44 - 0.99923435180002684, 1.93281536508689555, 2.83218527795750674, // k = 45 - 0.99924930425362390, 1.93347145882316340, 2.83371938965598247, // k = 46 - 0.99926370394567243, 1.93410820221384938, 2.83520990872793277, // k = 47 - 0.99927750755296074, 1.93472643138986200, 2.83665891945119597, // k = 48 - 0.99929082941537217, 1.93532697329771963, 2.83806833931606661, // k = 49 - 0.99930366295501472, 1.93591074716263734, 2.83943997143404658, // k = 50 - 0.99931598804721489, 1.93647857274021362, 2.84077557836653227, // k = 51 - 0.99932789059798210, 1.93703110239354714, 2.84207662106302905, // k = 52 - 0.99933946180485123, 1.93756904936378760, 2.84334468086129277, // k = 53 - 0.99935053819703512, 1.93809302131219852, 2.84458116874117195, // k = 54 - 0.99936126637970801, 1.93860365411038060, 2.84578731838604426, // k = 55 - 0.99937166229284458, 1.93910149816429112, 2.84696443486512862, // k = 56 - 0.99938169190727422, 1.93958709548454067, 2.84811369085281285, // k = 57 - 0.99939136927613959, 1.94006085573701625, 2.84923617230361970, // k = 58 - 0.99940074328745254, 1.94052339623206649, 2.85033291216254270, // k = 59 - 0.99940993070470086, 1.94097508636855309, 2.85140492437699322, // k = 60 - 0.99941868577388959, 1.94141633372043998, 2.85245314430358121, // k = 61 - 0.99942734443487780, 1.94184757038001976, 2.85347839582286156, // k = 62 - 0.99943556385736088, 1.94226915100517772, 2.85448160365493209, // k = 63 - 0.99944374522542034, 1.94268143723749631, 2.85546346373061510, // k = 64 - 0.99945159955424856, 1.94308482059116727, 2.85642486111805738, // k = 65 - 0.99945915301904620, 1.94347956957849988, 2.85736639994965458, // k = 66 - 0.99946660663832176, 1.94386600964031686, 2.85828887832701639, // k = 67 - 0.99947383703224091, 1.94424436597356021, 2.85919278275500233, // k = 68 - 0.99948075442870277, 1.94461502153473020, 2.86007887186090670, // k = 69 - 0.99948766082269458, 1.94497821937304138, 2.86094774077355396, // k = 70 - 0.99949422748713346, 1.94533411296001191, 2.86179981848076181, // k = 71 - 0.99950070756119658, 1.94568300035135167, 2.86263579405672886, // k = 72 - 0.99950704321753392, 1.94602523449961495, 2.86345610449197352, // k = 73 - 0.99951320334216121, 1.94636083782822311, 2.86426125541271404, // k = 74 - 0.99951920293474927, 1.94669011080745236, 2.86505169255406145, // k = 75 - 0.99952501670378524, 1.94701327348536779, 2.86582788270862920, // k = 76 - 0.99953071209267819, 1.94733044372333097, 2.86659027602854621, // k = 77 - 0.99953632734991515, 1.94764180764266825, 2.86733927778843167, // k = 78 - 0.99954171164873173, 1.94794766430732125, 2.86807526143834934, // k = 79 - 0.99954699274462655, 1.94824807472994621, 2.86879864789403882, // k = 80 - 0.99955216611081710, 1.94854317889829076, 2.86950970901679625, // k = 81 - 0.99955730019613043, 1.94883320227168610, 2.87020887436986527, // k = 82 - 0.99956213770650493, 1.94911826561721568, 2.87089648477021342, // k = 83 - 0.99956704264963037, 1.94939848545763539, 2.87157281693902178, // k = 84 - 0.99957166306481327, 1.94967401618316671, 2.87223821840905202, // k = 85 - 0.99957632713136491, 1.94994497791333288, 2.87289293193450135, // k = 86 - 0.99958087233392234, 1.95021155752212394, 2.87353731228213860, // k = 87 - 0.99958532555996271, 1.95047376805584349, 2.87417154907075201, // k = 88 - 0.99958956246481989, 1.95073180380688882, 2.87479599765507032, // k = 89 - 0.99959389351869277, 1.95098572880579013, 2.87541081987382086, // k = 90 - 0.99959807862052230, 1.95123574036898617, 2.87601637401948551, // k = 91 - 0.99960214057801977, 1.95148186921983324, 2.87661283691068093, // k = 92 - 0.99960607527256684, 1.95172415829728152, 2.87720042968334155, // k = 93 - 0.99960996433179616, 1.95196280898670693, 2.87777936649376898, // k = 94 - 0.99961379137860717, 1.95219787713926962, 2.87834989933620022, // k = 95 - 0.99961756088146103, 1.95242944583677058, 2.87891216133900230, // k = 96 - 0.99962125605327401, 1.95265762420910960, 2.87946647367488140, // k = 97 - 0.99962486179100551, 1.95288245314810638, 2.88001290210658567, // k = 98 - 0.99962843240297161, 1.95310404286672679, 2.88055166523392359, // k = 99 - 0.99963187276145504, 1.95332251980147475, 2.88108300006589957, // k = 100 - 0.99963525453173929, 1.95353785898848287, 2.88160703591438505, // k = 101 - 0.99963855412988778, 1.95375019354571577, 2.88212393551896184, // k = 102 - 0.99964190254169694, 1.95395953472205974, 2.88263389761985422, // k = 103 - 0.99964506565942202, 1.95416607430155409, 2.88313700661564098, // k = 104 - 0.99964834424233118, 1.95436972855640079, 2.88363350163803034, // k = 105 - 0.99965136548857458, 1.95457068540693513, 2.88412349413960101, // k = 106 - 0.99965436594726498, 1.95476896383092935, 2.88460710620208260, // k = 107 - 0.99965736463468602, 1.95496457504532373, 2.88508450078833789, // k = 108 - 0.99966034130443404, 1.95515761150707590, 2.88555580586194083, // k = 109 - 0.99966326130828520, 1.95534810382198998, 2.88602118761679094, // k = 110 - 0.99966601446035952, 1.95553622237747504, 2.88648066384146773, // k = 111 - 0.99966887679593697, 1.95572186728168163, 2.88693444915907094, // k = 112 - 0.99967161286551232, 1.95590523410490391, 2.88738271495714116, // k = 113 - 0.99967435412270333, 1.95608626483223702, 2.88782540459769166, // k = 114 - 0.99967701261934394, 1.95626497627117146, 2.88826277189363623, // k = 115 - 0.99967963265157778, 1.95644153684824573, 2.88869486674335008, // k = 116 - 0.99968216317182623, 1.95661589936000269, 2.88912184353694101, // k = 117 - 0.99968479674396349, 1.95678821614791332, 2.88954376359643561, // k = 118 - 0.99968729031337489, 1.95695842061650183, 2.88996069422501023, // k = 119 - 0.99968963358631413, 1.95712651709766305, 2.89037285320668502 // k = 120 - }; - -} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java deleted file mode 100644 index 57b8f1ab2..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/HashOperations.java +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.Math.max; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; - -/** - * Helper class for the common hash table methods. - * - * @author Lee Rhodes - * @author Kevin Lang - */ -public final class HashOperations { - private static final int STRIDE_HASH_BITS = 7; - private static final int EMPTY = 0; - - /** - * The stride mask for the Open Address, Double Hashing (OADH) hash table algorithm. - */ - public static final int STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1; - - private HashOperations() {} - - //Make odd and independent of index assuming lgArrLongs lowest bits of the hash were used for - // index. This results in a 8 bit value that is always odd. - private static int getStride(final long hash, final int lgArrLongs) { - return (2 * (int) ((hash >>> lgArrLongs) & STRIDE_MASK) ) + 1; - } - - //ON-HEAP - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for on-heap. - * Returns the index if found, -1 if not found. - * - * @param hashTable The hash table to search. Its size must be a power of 2. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to search for. It must not be zero. - * @return Current probe index if found, -1 if not found. - */ - public static int hashSearch(final long[] hashTable, final int lgArrLongs, final long hash) { - if (hash == 0) { - throw new SketchesArgumentException("Given hash must not be zero: " + hash); - } - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - - // search for duplicate or empty slot - final int loopIndex = curProbe; - do { - final long arrVal = hashTable[curProbe]; - if (arrVal == EMPTY) { - return -1; // not found - } else if (arrVal == hash) { - return curProbe; // found - } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - return -1; - } - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for on-heap. - * This method assumes that the input hash is not a duplicate. - * Useful for rebuilding tables to avoid unnecessary comparisons. - * Returns the index of insertion, which is always positive or zero. - * Throws an exception if the table has no empty slot. - * - * @param hashTable the hash table to insert into. Its size must be a power of 2. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to be potentially inserted into an empty slot. It must not be zero. - * @return index of insertion. Always positive or zero. - */ - public static int hashInsertOnly(final long[] hashTable, final int lgArrLongs, final long hash) { - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - - final long loopIndex = curProbe; - do { - final long arrVal = hashTable[curProbe]; - if (arrVal == EMPTY) { - hashTable[curProbe] = hash; - return curProbe; - } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - throw new SketchesArgumentException("No empty slot in table!"); - } - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for on-heap. - * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). - * Throws an exception if the value is not found and table has no empty slot. - * - * @param hashTable The hash table to insert into. Its size must be a power of 2. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to be potentially inserted into an empty slot only if it is not - * a duplicate of any other hash value in the table. It must not be zero. - * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). - */ - public static int hashSearchOrInsert(final long[] hashTable, final int lgArrLongs, - final long hash) { - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - - // search for duplicate or zero - final int loopIndex = curProbe; - do { - final long arrVal = hashTable[curProbe]; - if (arrVal == EMPTY) { - hashTable[curProbe] = hash; // insert value - return ~curProbe; - } else if (arrVal == hash) { - return curProbe; // found a duplicate - } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - throw new SketchesArgumentException("Hash not found and no empty slots!"); - } - - /** - * Inserts the given long array into the given OADH hashTable of the target size, - * ignores duplicates and counts the values inserted. - * The hash values must not be negative, zero values and values ≥ thetaLong are ignored. - * The given hash table may have values, but they must have been inserted by this method or one - * of the other OADH insert methods in this class. - * This method performs additional checks against potentially invalid hash values or theta values. - * Returns the count of values actually inserted. - * - * @param srcArr the source hash array to be potentially inserted - * @param hashTable The hash table to insert into. Its size must be a power of 2. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param thetaLong The theta value that all input hash values are compared against. - * It must greater than zero. - * See Theta Long - * @return the count of values actually inserted - */ - public static int hashArrayInsert(final long[] srcArr, final long[] hashTable, - final int lgArrLongs, final long thetaLong) { - int count = 0; - final int arrLen = srcArr.length; - checkThetaCorruption(thetaLong); - for (int i = 0; i < arrLen; i++ ) { // scan source array, build target array - final long hash = srcArr[i]; - checkHashCorruption(hash); - if (continueCondition(thetaLong, hash) ) { - continue; - } - if (hashSearchOrInsert(hashTable, lgArrLongs, hash) < 0) { - count++ ; - } - } - return count; - } - - //With MemorySegment - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment. - * Returns the index if found, -1 if not found. The input MemorySegment may be read only. - * - * @param seg The MemorySegment containing the hash table to search. - * The hash table portion must be a power of 2 in size. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to search for. Must not be zero. - * @param segOffsetBytes offset in the MemorySegment where the hashTable starts - * @return Current probe index if found, -1 if not found. - */ - public static int hashSearchMemorySegment(final MemorySegment seg, final int lgArrLongs, final long hash, - final int segOffsetBytes) { - if (hash == 0) { - throw new SketchesArgumentException("Given hash must not be zero: " + hash); - } - final int arrayMask = (1 << lgArrLongs) - 1; - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - final int loopIndex = curProbe; - do { - final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes; - final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); - if (curArrayHash == EMPTY) { return -1; } - else if (curArrayHash == hash) { return curProbe; } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - return -1; - } - - /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment. - * This method assumes that the input hash is not a duplicate. - * Useful for rebuilding tables to avoid unnecessary comparisons. - * Returns the index of insertion, which is always positive or zero. - * Throws an exception if table has no empty slot. - * - * @param wseg The writable MemorySegment that contains the hashTable to insert into. - * The size of the hashTable portion must be a power of 2. - * @param lgArrLongs The log_base2(hashTable.length. - * See lgArrLongs. - * @param hash value that must not be zero and will be inserted into the array into an empty slot. - * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts - * @return index of insertion. Always positive or zero. - */ - public static int hashInsertOnlyMemorySegment(final MemorySegment wseg, final int lgArrLongs, - final long hash, final int memOffsetBytes) { - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - // search for duplicate or zero - final int loopIndex = curProbe; - do { - final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); - if (curArrayHash == EMPTY) { - wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); - return curProbe; - } - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - throw new SketchesArgumentException("No empty slot in table!"); - } - - /** - * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts - * values directly into a writable MemorySegment. - * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). - * Throws an exception if the value is not found and table has no empty slot. - * - * @param wseg The writable MemorySegment that contains the hashTable to insert into. - * @param lgArrLongs The log_base2(hashTable.length). - * See lgArrLongs. - * @param hash The hash value to be potentially inserted into an empty slot only if it is not - * a duplicate of any other hash value in the table. It must not be zero. - * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts - * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). - */ - public static int hashSearchOrInsertMemorySegment(final MemorySegment wseg, final int lgArrLongs, - final long hash, final int memOffsetBytes) { - final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 - final int stride = getStride(hash, lgArrLongs); - int curProbe = (int) (hash & arrayMask); - // search for duplicate or zero - final int loopIndex = curProbe; - do { - final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); - if (curArrayHash == EMPTY) { - wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); - return ~curProbe; - } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate - // curArrayHash is not a duplicate and not zero, continue searching - curProbe = (curProbe + stride) & arrayMask; - } while (curProbe != loopIndex); - throw new SketchesArgumentException("Key not found and no empty slot in table!"); - } - - //Other related methods - - /** - * Checks that the given theta is not negative nor zero. - * @param thetaLong must be greater than zero otherwise throws an exception. - * See Theta Long - */ - public static void checkThetaCorruption(final long thetaLong) { - //if any one of the groups go negative it fails. - if (( thetaLong | (thetaLong - 1) ) < 0L ) { - throw new SketchesStateException( - "Data Corruption: thetaLong was negative or zero: " + "ThetaLong: " + thetaLong); - } - } - - /** - * Checks that the given hash value is not negative. - * @param hash must be greater than -1 otherwise throws an exception. - * Note a hash of zero is normally ignored, but a negative hash is never allowed. - */ - public static void checkHashCorruption(final long hash) { - if ( hash < 0L ) { - throw new SketchesArgumentException( - "Data Corruption: hash was negative: " + "Hash: " + hash); - } - } - - /** - * Return true (continue) if hash is greater than or equal to thetaLong, or if hash == 0, - * or if hash == Long.MAX_VALUE. - * @param thetaLong must be greater than the hash value - * See Theta Long - * @param hash must be less than thetaLong and not less than or equal to zero. - * @return true (continue) if hash is greater than or equal to thetaLong, or if hash == 0, - * or if hash == Long.MAX_VALUE. - */ - public static boolean continueCondition(final long thetaLong, final long hash) { - //if any one of the groups go negative it returns true - return (( (hash - 1L) | (thetaLong - hash - 1L)) < 0L ); - } - - /** - * Converts the given array to a hash table. - * @param hashArr The given array of hashes. Gaps are OK. - * @param count The number of valid hashes in the array - * @param thetaLong Any hashes equal to or greater than thetaLong will be ignored - * @param rebuildThreshold The fill fraction for the hash table forcing a rebuild or resize. - * @return a HashTable - */ - public static long[] convertToHashTable( - final long[] hashArr, - final int count, - final long thetaLong, - final double rebuildThreshold) { - final int lgArrLongs = minLgHashTableSize(count, rebuildThreshold); - final int arrLongs = 1 << lgArrLongs; - final long[] hashTable = new long[arrLongs]; - hashArrayInsert(hashArr, hashTable, lgArrLongs, thetaLong); - return hashTable; - } - - /** - * Returns the smallest log hash table size given the count of items and the rebuild threshold. - * @param count the given count of items - * @param rebuild_threshold the rebuild threshold as a fraction between zero and one. - * @return the smallest log hash table size - */ - public static int minLgHashTableSize(final int count, final double rebuild_threshold) { - final int upperCount = (int) Math.ceil(count / rebuild_threshold); - final int arrLongs = max(ceilingPowerOf2(upperCount), 1 << ThetaUtil.MIN_LG_ARR_LONGS); - final int newLgArrLongs = Integer.numberOfTrailingZeros(arrLongs); - return newLgArrLongs; - } - - /** - * Counts the cardinality of the first Log2 values of the given source array. - * @param srcArr the given source array - * @param lgArrLongs See lgArrLongs - * @param thetaLong See Theta Long - * @return the cardinality - */ - public static int countPart(final long[] srcArr, final int lgArrLongs, final long thetaLong) { - int cnt = 0; - final int len = 1 << lgArrLongs; - for (int i = len; i-- > 0;) { - final long hash = srcArr[i]; - if (continueCondition(thetaLong, hash) ) { - continue; - } - cnt++ ; - } - return cnt; - } - - /** - * Counts the cardinality of the given source array. - * @param srcArr the given source array - * @param thetaLong See Theta Long - * @return the cardinality - */ - public static int count(final long[] srcArr, final long thetaLong) { - int cnt = 0; - final int len = srcArr.length; - for (int i = len; i-- > 0;) { - final long hash = srcArr[i]; - if (continueCondition(thetaLong, hash) ) { - continue; - } - cnt++ ; - } - return cnt; - } - -} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java b/src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java deleted file mode 100644 index f559cb9e8..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/SetOperationCornerCases.java +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * Simplifies and speeds up set operations by resolving specific corner cases. - * @author Lee Rhodes - */ - -public class SetOperationCornerCases { - private static final long MAX = Long.MAX_VALUE; - - /** Intersection actions */ - public enum IntersectAction { - /** Degenerate{MinTheta, 0, F} */ - DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), - /** Empty{1.0, 0, T */ - EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), - /** Full Intersect */ - FULL_INTERSECT("I", "Full Intersect"); - - private String actionId; - private String actionDescription; - - private IntersectAction(final String actionId, final String actionDescription) { - this.actionId = actionId; - this.actionDescription = actionDescription; - } - - /** - * Gets the Action ID - * @return the actionId - */ - public String getActionId() { - return actionId; - } - - /** - * Gets the Action Description - * @return the actionDescription - */ - public String getActionDescription() { - return actionDescription; - } - } - - /** A not B actions */ - public enum AnotbAction { - /** Sketch A Exact */ - SKETCH_A("A", "Sketch A Exactly"), - /** Trim Sketch A by MinTheta */ - TRIM_A("TA", "Trim Sketch A by MinTheta"), - /** Degenerate{MinTheta, 0, F} */ - DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), - /** Degenerate{ThetaA, 0, F} */ - DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), - /** Empty{1.0, 0, T} */ - EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), - /** Full AnotB */ - FULL_ANOTB("N", "Full AnotB"); - - private String actionId; - private String actionDescription; - - private AnotbAction(final String actionId, final String actionDescription) { - this.actionId = actionId; - this.actionDescription = actionDescription; - } - - /** - * Gets the Action ID - * @return the actionId - */ - public String getActionId() { - return actionId; - } - - /** - * Gets the action description - * @return the action description - */ - public String getActionDescription() { - return actionDescription; - } - } - - /** List of union actions */ - public enum UnionAction { - /** Sketch A Exactly */ - SKETCH_A("A", "Sketch A Exactly"), - /** Trim Sketch A by MinTheta */ - TRIM_A("TA", "Trim Sketch A by MinTheta"), - /** Sketch B Exactly */ - SKETCH_B("B", "Sketch B Exactly"), - /** Trim Sketch B by MinTheta */ - TRIM_B("TB", "Trim Sketch B by MinTheta"), - /** Degenerate{MinTheta, 0, F} */ - DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), - /** Degenerate{ThetaA, 0, F} */ - DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), - /** Degenerate{ThetaB, 0, F} */ - DEGEN_THB_0_F("DB", "Degenerate{ThetaB, 0, F}"), - /** Empty{1.0, 0, T} */ - EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), - /** Full Union */ - FULL_UNION("N", "Full Union"); - - private String actionId; - private String actionDescription; - - private UnionAction(final String actionId, final String actionDescription) { - this.actionId = actionId; - this.actionDescription = actionDescription; - } - - /** - * Gets the action ID - * @return the actionId - */ - public String getActionId() { - return actionId; - } - - /** - * Gets the action description - * @return the actionDescription - */ - public String getActionDescription() { - return actionDescription; - } - } - - /** List of corner cases */ - public enum CornerCase { - /** Empty Empty */ - Empty_Empty(055, "A{ 1.0, 0, T} ; B{ 1.0, 0, T}", - IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.EMPTY_1_0_T), - /** Empty Exact */ - Empty_Exact(056, "A{ 1.0, 0, T} ; B{ 1.0,>0, F}", - IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), - /** Empty Estimation */ - Empty_Estimation(052, "A{ 1.0, 0, T} ; B{<1.0,>0, F", - IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), - /** Empty Degen */ - Empty_Degen(050, "A{ 1.0, 0, T} ; B{<1.0, 0, F}", - IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.DEGEN_THB_0_F), - - /** Exact Empty */ - Exact_Empty(065, "A{ 1.0,>0, F} ; B{ 1.0, 0, T}", - IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), - /** Exact Exact */ - Exact_Exact(066, "A{ 1.0,>0, F} ; B{ 1.0,>0, F}", - IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), - /** Exact Estimation */ - Exact_Estimation(062, "A{ 1.0,>0, F} ; B{<1.0,>0, F}", - IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), - /** Exact Degen */ - Exact_Degen(060, "A{ 1.0,>0, F} ; B{<1.0, 0, F}", - IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), - - /** Estimation_Empty */ - Estimation_Empty(025, "A{<1.0,>0, F} ; B{ 1.0, 0, T}", - IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), - /** Estimation_Exact */ - Estimation_Exact(026, "A{<1.0,>0, F} ; B{ 1.0,>0, F}", - IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), - /** Estimation_Estimation */ - Estimation_Estimation(022, "A{<1.0,>0, F} ; B{<1.0,>0, F}", - IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), - /** Estimation_Degen */ - Estimation_Degen(020, "A{<1.0,>0, F} ; B{<1.0, 0, F}", - IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), - - /** Degen_Empty */ - Degen_Empty(005, "A{<1.0, 0, F} ; B{ 1.0, 0, T}", - IntersectAction.EMPTY_1_0_T, AnotbAction.DEGEN_THA_0_F, UnionAction.DEGEN_THA_0_F), - /** Degen_Exact */ - Degen_Exact(006, "A{<1.0, 0, F} ; B{ 1.0,>0, F}", - IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_THA_0_F, UnionAction.TRIM_B), - /** Degen_Estimation */ - Degen_Estimation(002, "A{<1.0, 0, F} ; B{<1.0,>0, F}", - IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.TRIM_B), - /** Degen_Degen */ - Degen_Degen(000, "A{<1.0, 0, F} ; B{<1.0, 0, F}", - IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.DEGEN_MIN_0_F); - - private static final Map caseIdToCornerCaseMap = new HashMap<>(); - private int caseId; - private String caseDescription; - private IntersectAction intersectAction; - private AnotbAction anotbAction; - private UnionAction unionAction; - - static { - for (final CornerCase cc : values()) { - caseIdToCornerCaseMap.put(cc.getId(), cc); - } - } - - private CornerCase(final int caseId, final String caseDescription, - final IntersectAction intersectAction, final AnotbAction anotbAction, final UnionAction unionAction) { - this.caseId = caseId; - this.caseDescription = caseDescription; - this.intersectAction = intersectAction; - this.anotbAction = anotbAction; - this.unionAction = unionAction; - } - - /** - * Gets the case ID - * @return the caseId - */ - public int getId() { - return caseId; - } - - /** - * Gets the case description - * @return the caseDescription - */ - public String getCaseDescription() { - return caseDescription; - } - - /** - * Gets the intersect action - * @return the intersectAction - */ - public IntersectAction getIntersectAction() { - return intersectAction; - } - - /** - * Gets the AnotB action - * @return the anotbAction - */ - public AnotbAction getAnotbAction() { - return anotbAction; - } - - /** - * Gets the union action - * @return the unionAction - */ - public UnionAction getUnionAction() { - return unionAction; - } - - //See checkById test in /tuple/MiscTest. - /** - * Converts caseId to CornerCaseId - * @param id the case ID - * @return the Corner Case ID - */ - public static CornerCase caseIdToCornerCase(final int id) { - final CornerCase cc = caseIdToCornerCaseMap.get(id); - if (cc == null) { - throw new SketchesArgumentException("Possible Corruption: Illegal CornerCase ID: " + Integer.toOctalString(id)); - } - return cc; - } - } //end of enum CornerCase - - /** - * Creates the CornerCase ID - * @param thetaLongA the theta of A as a long - * @param countA the count of A - * @param emptyA true if A is empty - * @param thetaLongB the theta of B as a long - * @param countB the count of B - * @param emptyB true if B is empty - * @return the Corner Case ID - */ - public static int createCornerCaseId( - final long thetaLongA, final int countA, final boolean emptyA, - final long thetaLongB, final int countB, final boolean emptyB) { - return (sketchStateId(emptyA, countA, thetaLongA) << 3) | sketchStateId(emptyB, countB, thetaLongB); - } - - /** - * Returns the sketch state ID - * @param isEmpty true if empty - * @param numRetained the number of items retained - * @param thetaLong the value of theta as a long - * @return the sketch state ID - */ - public static int sketchStateId(final boolean isEmpty, final int numRetained, final long thetaLong) { - // assume thetaLong = MAX if empty - return (((thetaLong == MAX) || isEmpty) ? 4 : 0) | ((numRetained > 0) ? 2 : 0) | (isEmpty ? 1 : 0); - } -} diff --git a/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java b/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java deleted file mode 100644 index 585efec6f..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/ThetaUtil.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; - -/** - * Utility methods for the Theta Family of sketches - * @author Lee Rhodes - * - */ -public final class ThetaUtil { - - /** - * The smallest Log2 nom entries allowed: 4. - */ - public static final int MIN_LG_NOM_LONGS = 4; - /** - * The largest Log2 nom entries allowed: 26. - */ - public static final int MAX_LG_NOM_LONGS = 26; - /** - * The hash table rebuild threshold = 15.0/16.0. - */ - public static final double REBUILD_THRESHOLD = 15.0 / 16.0; - /** - * The resize threshold = 0.5; tuned for speed. - */ - public static final double RESIZE_THRESHOLD = 0.5; - /** - * The default nominal entries is provided as a convenience for those cases where the - * nominal sketch size in number of entries is not provided. - * A sketch of 4096 entries has a Relative Standard Error (RSE) of +/- 1.56% at a confidence of - * 68%; or equivalently, a Relative Error of +/- 3.1% at a confidence of 95.4%. - * See Default Nominal Entries - */ - public static final int DEFAULT_NOMINAL_ENTRIES = 4096; - - private ThetaUtil() {} - - /** - * The smallest Log2 cache size allowed: 5. - */ - public static final int MIN_LG_ARR_LONGS = 5; - - /** - * Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero, - * one or more resize factors. - * - * @param lgTarget Log2 of the target size - * @param lgRF Log_base2 of Resize Factor. - * See Resize Factor - * @param lgMin Log2 of the minimum allowed starting size - * @return The Log2 of the starting size - */ - public static int startingSubMultiple(final int lgTarget, final int lgRF, - final int lgMin) { - return lgTarget <= lgMin ? lgMin : lgRF == 0 ? lgTarget : (lgTarget - lgMin) % lgRF + lgMin; - } - - /** - * Checks that the given nomLongs is within bounds and returns the Log2 of the ceiling power of 2 - * of the given nomLongs. - * @param nomLongs the given number of nominal longs. This can be any value from 16 to - * 67108864, inclusive. - * @return The Log2 of the ceiling power of 2 of the given nomLongs. - */ - public static int checkNomLongs(final int nomLongs) { - final int lgNomLongs = Integer.numberOfTrailingZeros(Util.ceilingPowerOf2(nomLongs)); - if (lgNomLongs > MAX_LG_NOM_LONGS || lgNomLongs < MIN_LG_NOM_LONGS) { - throw new SketchesArgumentException("Nominal Entries must be >= 16 and <= 67108864: " - + nomLongs); - } - return lgNomLongs; - } - -} - diff --git a/src/main/java/org/apache/datasketches/thetacommon2/package-info.java b/src/main/java/org/apache/datasketches/thetacommon2/package-info.java deleted file mode 100644 index 46fd1110b..000000000 --- a/src/main/java/org/apache/datasketches/thetacommon2/package-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * This package contains common tools and methods for the theta, - * tuple, tuple/* and fdt packages. - */ -package org.apache.datasketches.thetacommon2; diff --git a/src/main/java/org/apache/datasketches/tuple/AnotB.java b/src/main/java/org/apache/datasketches/tuple/AnotB.java index 908e8f6be..fed710bdd 100644 --- a/src/main/java/org/apache/datasketches/tuple/AnotB.java +++ b/src/main/java/org/apache/datasketches/tuple/AnotB.java @@ -31,9 +31,9 @@ import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; import org.apache.datasketches.thetacommon.SetOperationCornerCases; +import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Computes a set difference, A-AND-NOT-B, of two generic tuple sketches. diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java index ec58a5e76..20eac81fb 100644 --- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java @@ -19,15 +19,18 @@ package org.apache.datasketches.tuple; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.thetacommon.HashOperations.count; +import java.lang.foreign.MemorySegment; import java.lang.reflect.Array; import java.nio.ByteOrder; import org.apache.datasketches.common.ByteArrayUtil; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; /** * CompactSketches are never created directly. They are created as a result of @@ -69,24 +72,24 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER /** * This is to create an instance of a CompactSketch given a serialized form * - * @param mem Memory object with serialized CompactSketch + * @param seg MemorySegment object with serialized CompactSketch * @param deserializer the SummaryDeserializer */ - CompactSketch(final Memory mem, final SummaryDeserializer deserializer) { + CompactSketch(final MemorySegment seg, final SummaryDeserializer deserializer) { super(Long.MAX_VALUE, true, null); int offset = 0; - final byte preambleLongs = mem.getByte(offset++); - final byte version = mem.getByte(offset++); - final byte familyId = mem.getByte(offset++); + final byte preambleLongs = seg.get(JAVA_BYTE, offset++); + final byte version = seg.get(JAVA_BYTE, offset++); + final byte familyId = seg.get(JAVA_BYTE, offset++); SerializerDeserializer.validateFamily(familyId, preambleLongs); if (version > serialVersionUID) { throw new SketchesArgumentException( "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version); } SerializerDeserializer - .validateType(mem.getByte(offset++), SerializerDeserializer.SketchType.CompactSketch); + .validateType(seg.get(JAVA_BYTE, offset++), SerializerDeserializer.SketchType.CompactSketch); if (version <= serialVersionUIDLegacy) { // legacy serial format - final byte flags = mem.getByte(offset++); + final byte flags = seg.get(JAVA_BYTE, offset++); final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); @@ -94,7 +97,7 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0; final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0; if (isThetaIncluded) { - thetaLong_ = mem.getLong(offset); + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset); offset += Long.BYTES; } else { thetaLong_ = Long.MAX_VALUE; @@ -103,9 +106,9 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER if (hasEntries) { int classNameLength = 0; if (version == serialVersionWithSummaryClassNameUID) { - classNameLength = mem.getByte(offset++); + classNameLength = seg.get(JAVA_BYTE, offset++); } - final int count = mem.getInt(offset); + final int count = seg.get(JAVA_INT_UNALIGNED, offset); offset += Integer.BYTES; if (version == serialVersionWithSummaryClassNameUID) { offset += classNameLength; @@ -113,11 +116,11 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER hashArr_ = new long[count]; for (int i = 0; i < count; i++) { - hashArr_[i] = mem.getLong(offset); + hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset); offset += Long.BYTES; } for (int i = 0; i < count; i++) { - offset += readSummary(mem, offset, i, count, deserializer); + offset += readSummary(seg, offset, i, count, deserializer); } } else { hashArr_ = new long[0]; @@ -125,7 +128,7 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER } } else { // current serial format offset++; //skip unused byte - final byte flags = mem.getByte(offset++); + final byte flags = seg.get(JAVA_BYTE, offset++); offset += 2; //skip 2 unused bytes empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; thetaLong_ = Long.MAX_VALUE; @@ -134,11 +137,11 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER if (preambleLongs == 1) { count = 1; } else { - count = mem.getInt(offset); + count = seg.get(JAVA_INT_UNALIGNED, offset); offset += Integer.BYTES; offset += 4; // unused if (preambleLongs > 2) { - thetaLong_ = mem.getLong(offset); + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset); offset += Long.BYTES; } } @@ -146,18 +149,18 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER hashArr_ = new long[count]; for (int i = 0; i < count; i++) { - hashArr_[i] = mem.getLong(offset); + hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset); offset += Long.BYTES; - offset += readSummary(mem, offset, i, count, deserializer); + offset += readSummary(seg, offset, i, count, deserializer); } } } @SuppressWarnings({"unchecked"}) - private int readSummary(final Memory mem, final int offset, final int i, final int count, + private int readSummary(final MemorySegment seg, final int offset, final int i, final int count, final SummaryDeserializer deserializer) { - final Memory memRegion = mem.region(offset, mem.getCapacity() - offset); - final DeserializeResult result = deserializer.heapifySummary(memRegion); + final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset); + final DeserializeResult result = deserializer.heapifySummary(segRegion); final S summary = result.getObject(); final Class summaryType = (Class) result.getObject().getClass(); if (summaryArr_ == null) { diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 756e99e8b..708486df1 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -19,11 +19,16 @@ package org.apache.datasketches.tuple; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.common.Util.checkBounds; import static org.apache.datasketches.common.Util.exactLog2OfLong; import static org.apache.datasketches.thetacommon.HashOperations.count; +import java.lang.foreign.MemorySegment; import java.lang.reflect.Array; import java.nio.ByteOrder; import java.util.Objects; @@ -33,7 +38,6 @@ import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -167,7 +171,7 @@ private QuickSelectSketch( /** * This is to create an instance of a QuickSelectSketch given a serialized form - * @param mem Memory object with serialized QuickSelectSketch + * @param seg MemorySegment object with serialized QuickSelectSketch * @param deserializer the SummaryDeserializer * @param summaryFactory the SummaryFactory * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. @@ -176,26 +180,26 @@ private QuickSelectSketch( */ @Deprecated QuickSelectSketch( - final Memory mem, + final MemorySegment seg, final SummaryDeserializer deserializer, final SummaryFactory summaryFactory) { - this(new Validate<>(), mem, deserializer, summaryFactory); + this(new Validate<>(), seg, deserializer, summaryFactory); } /* * This private constructor is used to protect against "Finalizer attacks". * The private static inner class Validate performs validation and deserialization - * from the input Memory and may throw exceptions. In order to protect against the attack, we must + * from the input MemorySegment and may throw exceptions. In order to protect against the attack, we must * perform this validation prior to the constructor's super reaches the Object class. * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass. * Using an empty final finalizer() is not recommended and is deprecated as of Java9. */ private QuickSelectSketch( final Validate val, - final Memory mem, + final MemorySegment seg, final SummaryDeserializer deserializer, final SummaryFactory summaryFactory) { - super(val.validate(mem, deserializer), val.myEmpty, summaryFactory); + super(val.validate(seg, deserializer), val.myEmpty, summaryFactory); nomEntries_ = val.myNomEntries; lgResizeFactor_ = val.myLgResizeFactor; samplingProbability_ = val.mySamplingProbability; @@ -222,43 +226,43 @@ private static final class Validate { @SuppressWarnings("unchecked") long validate( - final Memory mem, + final MemorySegment seg, final SummaryDeserializer deserializer) { - Objects.requireNonNull(mem, "SourceMemory must not be null."); + Objects.requireNonNull(seg, "Source MemorySegment must not be null."); Objects.requireNonNull(deserializer, "Deserializer must not be null."); - checkBounds(0, 8, mem.getCapacity()); + checkBounds(0, 8, seg.byteSize()); int offset = 0; - final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs - final byte version = mem.getByte(offset++); //byte 1 SerVer - final byte familyId = mem.getByte(offset++); //byte 2 FamID + final byte preambleLongs = seg.get(JAVA_BYTE, offset++); //byte 0 PreLongs + final byte version = seg.get(JAVA_BYTE, offset++); //byte 1 SerVer + final byte familyId = seg.get(JAVA_BYTE, offset++); //byte 2 FamID SerializerDeserializer.validateFamily(familyId, preambleLongs); if (version > serialVersionUID) { throw new SketchesArgumentException( "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version); } - SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3 + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, offset++), //byte 3 SerializerDeserializer.SketchType.QuickSelectSketch); - final byte flags = mem.getByte(offset++); //byte 4 + final byte flags = seg.get(JAVA_BYTE, offset++); //byte 4 final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Endian byte order mismatch"); } - myNomEntries = 1 << mem.getByte(offset++); //byte 5 - myLgCurrentCapacity = mem.getByte(offset++); //byte 6 - myLgResizeFactor = mem.getByte(offset++); //byte 7 + myNomEntries = 1 << seg.get(JAVA_BYTE, offset++); //byte 5 + myLgCurrentCapacity = seg.get(JAVA_BYTE, offset++); //byte 6 + myLgResizeFactor = seg.get(JAVA_BYTE, offset++); //byte 7 - checkBounds(0, preambleLongs * 8L, mem.getCapacity()); + checkBounds(0, preambleLongs * 8L, seg.byteSize()); final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; - mySamplingProbability = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11 + mySamplingProbability = isInSamplingMode ? seg.get(JAVA_FLOAT_UNALIGNED, offset) : 1f; //bytes 8 - 11 if (isInSamplingMode) { offset += Float.BYTES; } final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; if (isThetaIncluded) { - myThetaLong = mem.getLong(offset); + myThetaLong = seg.get(JAVA_LONG_UNALIGNED, offset); offset += Long.BYTES; } else { myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability); @@ -267,16 +271,16 @@ long validate( int count = 0; final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; if (hasEntries) { - count = mem.getInt(offset); + count = seg.get(JAVA_INT_UNALIGNED, offset); offset += Integer.BYTES; } final int currentCapacity = 1 << myLgCurrentCapacity; myHashTable = new long[currentCapacity]; for (int i = 0; i < count; i++) { - final long hash = mem.getLong(offset); + final long hash = seg.get(JAVA_LONG_UNALIGNED, offset); offset += Long.BYTES; - final Memory memRegion = mem.region(offset, mem.getCapacity() - offset); - final DeserializeResult summaryResult = deserializer.heapifySummary(memRegion); + final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset); + final DeserializeResult summaryResult = deserializer.heapifySummary(segRegion); final S summary = (S) summaryResult.getObject(); offset += summaryResult.getSize(); //in-place equivalent to insert(hash, summary): diff --git a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java index a30d47edf..2ca7c29df 100644 --- a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java @@ -19,9 +19,12 @@ package org.apache.datasketches.tuple; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; /** * Multipurpose serializer-deserializer for a collection of sketches defined by the enum. @@ -77,12 +80,12 @@ public static void validateType(final byte sketchTypeByte, final SketchType expe } /** - * Gets the sketch type byte from the given Memory image - * @param mem the given Memory image + * Gets the sketch type byte from the given MemorySegment image + * @param seg the given MemorySegment image * @return the SketchType */ - public static SketchType getSketchType(final Memory mem) { - final byte sketchTypeByte = mem.getByte(TYPE_BYTE_OFFSET); + public static SketchType getSketchType(final MemorySegment seg) { + final byte sketchTypeByte = seg.get(JAVA_BYTE, TYPE_BYTE_OFFSET); return getSketchType(sketchTypeByte); } diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java index f0003bf3c..8fd81f78c 100644 --- a/src/main/java/org/apache/datasketches/tuple/Sketch.java +++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java @@ -24,7 +24,7 @@ import org.apache.datasketches.thetacommon.BinomialBoundsN; /** - * This is an equivalent to org.apache.datasketches.theta.Sketch with + * This is an equivalent to org.apache.datasketches.theta2.Sketch with * addition of a user-defined Summary object associated with every unique entry * in the sketch. * @param Type of Summary diff --git a/src/main/java/org/apache/datasketches/tuple/Sketches.java b/src/main/java/org/apache/datasketches/tuple/Sketches.java index fba8ab604..6eef7f88d 100644 --- a/src/main/java/org/apache/datasketches/tuple/Sketches.java +++ b/src/main/java/org/apache/datasketches/tuple/Sketches.java @@ -19,7 +19,7 @@ package org.apache.datasketches.tuple; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; /** * Convenient static methods to instantiate generic tuple sketches. @@ -37,36 +37,36 @@ public static Sketch createEmptySketch() { } /** - * Instantiate a Sketch from a given Memory. + * Instantiate a Sketch from a given MemorySegment. * @param Type of Summary - * @param mem Memory object representing a Sketch + * @param seg MemorySegment object representing a Sketch * @param deserializer instance of SummaryDeserializer - * @return Sketch created from its Memory representation + * @return Sketch created from its MemorySegment representation */ public static Sketch heapifySketch( - final Memory mem, + final MemorySegment seg, final SummaryDeserializer deserializer) { - final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem); + final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); if (sketchType == SerializerDeserializer.SketchType.QuickSelectSketch) { - return new QuickSelectSketch<>(mem, deserializer, null); + return new QuickSelectSketch<>(seg, deserializer, null); } - return new CompactSketch<>(mem, deserializer); + return new CompactSketch<>(seg, deserializer); } /** - * Instantiate UpdatableSketch from a given Memory + * Instantiate UpdatableSketch from a given MemorySegment * @param Type of update value * @param Type of Summary - * @param mem Memory object representing a Sketch + * @param seg MemorySegment object representing a Sketch * @param deserializer instance of SummaryDeserializer * @param summaryFactory instance of SummaryFactory - * @return Sketch created from its Memory representation + * @return Sketch created from its MemorySegment representation */ public static > UpdatableSketch heapifyUpdatableSketch( - final Memory mem, + final MemorySegment seg, final SummaryDeserializer deserializer, final SummaryFactory summaryFactory) { - return new UpdatableSketch<>(mem, deserializer, summaryFactory); + return new UpdatableSketch<>(seg, deserializer, summaryFactory); } } diff --git a/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java index 8edbc3318..6393019b1 100644 --- a/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java @@ -19,7 +19,7 @@ package org.apache.datasketches.tuple; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; /** * Interface for deserializing user-defined Summary @@ -29,14 +29,14 @@ public interface SummaryDeserializer { /** * This is to create an instance of a Summary given a serialized representation. - * The user may assume that the start of the given Memory is the correct place to start + * The user may assume that the start of the given MemorySegment is the correct place to start * deserializing. However, the user must be able to determine the number of bytes required to - * deserialize the summary as the capacity of the given Memory may + * deserialize the summary as the capacity of the given MemorySegment may * include multiple such summaries and may be much larger than required for a single summary. - * @param mem Memory object with serialized representation of a Summary - * @return DeserializedResult object, which contains a Summary object and number of bytes read - * from the Memory + * @param seg MemorySegment object with serialized representation of a Summary + * @return DeserializedResult object, which contains a Summary object and number of bytes read + * from the MemorySegment */ - public DeserializeResult heapifySummary(Memory mem); + public DeserializeResult heapifySummary(MemorySegment seg); } diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java index f7a552add..b706ca11e 100644 --- a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java @@ -21,10 +21,10 @@ import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.memory.Memory; /** * An extension of QuickSelectSketch<S>, which can be updated with many types of keys. @@ -64,7 +64,7 @@ public UpdatableSketch(final int nomEntries, final int lgResizeFactor, /** * This is to create an instance of a sketch given a serialized form - * @param srcMem Memory object with data of a serialized UpdatableSketch + * @param srcSeg MemorySegment object with data of a serialized UpdatableSketch * @param deserializer instance of SummaryDeserializer * @param summaryFactory instance of SummaryFactory * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. @@ -73,10 +73,10 @@ public UpdatableSketch(final int nomEntries, final int lgResizeFactor, */ @Deprecated public UpdatableSketch( - final Memory srcMem, + final MemorySegment srcSeg, final SummaryDeserializer deserializer, final SummaryFactory summaryFactory) { - super(srcMem, deserializer, summaryFactory); + super(srcSeg, deserializer, summaryFactory); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java index 1bb9edeca..d51451cf5 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java @@ -19,8 +19,9 @@ package org.apache.datasketches.tuple.adouble; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.UpdatableSketch; /** @@ -59,17 +60,17 @@ public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplin } /** - * Constructs this sketch from a Memory image, which must be from an DoubleSketch, and + * Constructs this sketch from a MemorySegment image, which must be from an DoubleSketch, and * usually with data. - * @param mem the given Memory + * @param seg the given MemorySegment * @param mode The DoubleSummary mode to be used * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ @Deprecated - public DoubleSketch(final Memory mem, final DoubleSummary.Mode mode) { - super(mem, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); + public DoubleSketch(final MemorySegment seg, final DoubleSummary.Mode mode) { + super(seg, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java index f678fba06..e7268ffef 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java @@ -19,8 +19,12 @@ package org.apache.datasketches.tuple.adouble; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.DeserializeResult; import org.apache.datasketches.tuple.UpdatableSummary; @@ -146,13 +150,13 @@ public byte[] toByteArray() { /** * Creates an instance of the DoubleSummary given a serialized representation - * @param mem Memory object with serialized DoubleSummary + * @param seg MemorySegment object with serialized DoubleSummary * @return DeserializedResult object, which contains a DoubleSummary object and number of bytes - * read from the Memory + * read from the MemorySegment */ - public static DeserializeResult fromMemory(final Memory mem) { - return new DeserializeResult<>(new DoubleSummary(mem.getDouble(VALUE_INDEX), - Mode.values()[mem.getByte(MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); + public static DeserializeResult fromMemorySegment(final MemorySegment seg) { + return new DeserializeResult<>(new DoubleSummary(seg.get(JAVA_DOUBLE_UNALIGNED, VALUE_INDEX), + Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); } } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryDeserializer.java index 90e01b72b..b105a6b16 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryDeserializer.java @@ -19,7 +19,8 @@ package org.apache.datasketches.tuple.adouble; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.tuple.DeserializeResult; import org.apache.datasketches.tuple.SummaryDeserializer; @@ -30,8 +31,8 @@ public class DoubleSummaryDeserializer implements SummaryDeserializer { @Override - public DeserializeResult heapifySummary(final Memory mem) { - return DoubleSummary.fromMemory(mem); + public DeserializeResult heapifySummary(final MemorySegment seg) { + return DoubleSummary.fromMemorySegment(seg); } } diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java index 0bad790cc..5fdfe270e 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java @@ -19,8 +19,9 @@ package org.apache.datasketches.tuple.aninteger; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.UpdatableSketch; /** @@ -59,17 +60,17 @@ public IntegerSketch(final int lgK, final int lgResizeFactor, final float sampli } /** - * Constructs this sketch from a Memory image, which must be from an IntegerSketch, and + * Constructs this sketch from a MemorySegment image, which must be from an IntegerSketch, and * usually with data. - * @param mem the given Memory + * @param seg the given MemorySegment * @param mode The IntegerSummary mode to be used * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ @Deprecated - public IntegerSketch(final Memory mem, final IntegerSummary.Mode mode) { - super(mem, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode)); + public IntegerSketch(final MemorySegment seg, final IntegerSummary.Mode mode) { + super(seg, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode)); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java index a8cfdab56..b8c7c23fb 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java @@ -19,8 +19,12 @@ package org.apache.datasketches.tuple.aninteger; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.DeserializeResult; import org.apache.datasketches.tuple.UpdatableSummary; @@ -146,13 +150,13 @@ public byte[] toByteArray() { /** * Creates an instance of the IntegerSummary given a serialized representation - * @param mem Memory object with serialized IntegerSummary + * @param seg MemorySegment object with serialized IntegerSummary * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes - * read from the Memory + * read from the MemorySegment */ - public static DeserializeResult fromMemory(final Memory mem) { - return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX), - Mode.values()[mem.getByte(MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); + public static DeserializeResult fromMemorySegment(final MemorySegment seg) { + return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX), + Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); } } diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java index 68d468785..0941a1088 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java @@ -19,7 +19,8 @@ package org.apache.datasketches.tuple.aninteger; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.tuple.DeserializeResult; import org.apache.datasketches.tuple.SummaryDeserializer; @@ -30,8 +31,8 @@ public class IntegerSummaryDeserializer implements SummaryDeserializer { @Override - public DeserializeResult heapifySummary(final Memory mem) { - return IntegerSummary.fromMemory(mem); + public DeserializeResult heapifySummary(final MemorySegment seg) { + return IntegerSummary.fromMemorySegment(seg); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotB.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotB.java index 5fda000bc..670038c01 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotB.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotB.java @@ -19,7 +19,7 @@ package org.apache.datasketches.tuple.arrayofdoubles; -import org.apache.datasketches.memory.WritableMemory; +import java.lang.foreign.MemorySegment; /** * Computes a set difference of two tuple sketches of type ArrayOfDoubles @@ -48,10 +48,10 @@ public abstract class ArrayOfDoublesAnotB { /** * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch - * @param mem memory for the result (can be null) - * @return compact sketch representing the result of the operation (off-heap if memory is + * @param seg MemorySegment for the result (can be null) + * @return compact sketch representing the result of the operation (off-heap if MemorySegment is * provided) */ - public abstract ArrayOfDoublesCompactSketch getResult(WritableMemory mem); + public abstract ArrayOfDoublesCompactSketch getResult(MemorySegment seg); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java index b4552cc73..8d98b8e3a 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBImpl.java @@ -26,15 +26,16 @@ import static org.apache.datasketches.thetacommon.HashOperations.count; import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.SetOperationCornerCases; +import org.apache.datasketches.thetacommon.ThetaUtil; import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction; import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches. @@ -59,7 +60,7 @@ public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { numValues_ = numValues; - seedHash_ = org.apache.datasketches.common.Util.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); } @Override @@ -153,8 +154,8 @@ public ArrayOfDoublesCompactSketch getResult() { } @Override - public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) { - return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstMem); + public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstSeg); } private static DataArrays getResultArrays( diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java index 26f0f2275..7c474ee45 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersection.java @@ -21,9 +21,11 @@ import static java.lang.Math.min; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; /** * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles. @@ -47,7 +49,7 @@ public abstract class ArrayOfDoublesIntersection { * @param seed the hash function update seed. */ ArrayOfDoublesIntersection(final int numValues, final long seed) { - seedHash_ = org.apache.datasketches.common.Util.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); numValues_ = numValues; hashTables_ = null; empty_ = false; @@ -63,7 +65,7 @@ public abstract class ArrayOfDoublesIntersection { */ public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) { if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - org.apache.datasketches.common.Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); + Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); if (tupleSketch.numValues_ != numValues_) { throw new SketchesArgumentException( "Input tupleSketch cannot have different numValues from the internal numValues."); @@ -115,10 +117,10 @@ public ArrayOfDoublesCompactSketch getResult() { /** * Gets the result of stateful intersections so far. - * @param dstMem Memory for the compact sketch (can be null). + * @param dstSeg MemorySegment for the compact sketch (can be null). * @return Result of the intersections so far as a compact sketch. */ - public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) { + public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { if (firstCall_) { throw new SketchesStateException( "getResult() with no intervening intersections is not a legal result."); @@ -149,11 +151,11 @@ public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) { } } - return (dstMem == null) + return (dstSeg == null) ? new HeapArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut, thetaLong_, empty_, numValues_, seedHash_) : new DirectArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut, - thetaLong_, empty_, numValues_, seedHash_, dstMem); + thetaLong_, empty_, numValues_, seedHash_, dstSeg); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index a1dba3512..ddf102451 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -21,9 +21,10 @@ import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.QuickSelect; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -85,7 +86,7 @@ abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSk abstract int getSerializedSizeBytes(); - abstract void serializeInto(WritableMemory mem); + abstract void serializeInto(MemorySegment seg); @Override public void trim() { diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java index e866a3191..14177b499 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java @@ -19,8 +19,9 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.WritableMemory; /** * Builds set operations object for tuple sketches of type ArrayOfDoubles. @@ -83,7 +84,7 @@ public ArrayOfDoublesSetOperationBuilder setSeed(final long seed) { /** * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder. - * The new instance is allocated on the heap if the memory is not provided. + * The new instance is allocated on the heap. * @return an instance of ArrayOfDoublesUnion */ public ArrayOfDoublesUnion buildUnion() { @@ -92,18 +93,18 @@ public ArrayOfDoublesUnion buildUnion() { /** * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder - * and the given destination memory. - * @param dstMem destination memory to be used by the sketch + * and the given destination MemorySegment. + * @param dstSeg destination MemorySegment to be used by the sketch * @return an instance of ArrayOfDoublesUnion */ - public ArrayOfDoublesUnion buildUnion(final WritableMemory dstMem) { - return new DirectArrayOfDoublesUnion(nomEntries_, numValues_, seed_, dstMem); + public ArrayOfDoublesUnion buildUnion(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesUnion(nomEntries_, numValues_, seed_, dstSeg); } /** * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the * builder. - * The new instance is allocated on the heap if the memory is not provided. + * The new instance is allocated on the heap. * The number of nominal entries is not relevant to this, so it is ignored. * @return an instance of ArrayOfDoublesIntersection */ @@ -112,20 +113,18 @@ public ArrayOfDoublesIntersection buildIntersection() { } /** - * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the - * builder. - * The new instance is allocated on the heap if the memory is not provided. + * Creates an instance of ArrayOfDoublesIntersection in the given MemorySegment and based on the + * current configuration of the builder. * The number of nominal entries is not relevant to this, so it is ignored. - * @param dstMem destination memory to be used by the sketch + * @param dstSeg destination MemorySegment to be used by the sketch * @return an instance of ArrayOfDoublesIntersection */ - public ArrayOfDoublesIntersection buildIntersection(final WritableMemory dstMem) { - return new DirectArrayOfDoublesIntersection(numValues_, seed_, dstMem); + public ArrayOfDoublesIntersection buildIntersection(final MemorySegment dstSeg) { + return new DirectArrayOfDoublesIntersection(numValues_, seed_, dstSeg); } /** * Creates an instance of ArrayOfDoublesAnotB based on the current configuration of the builder. - * The memory is not relevant to this, so it is ignored if set. * The number of nominal entries is not relevant to this, so it is ignored. * @return an instance of ArrayOfDoublesAnotB */ diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java index 5dd0548f4..c901f81fd 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketch.java @@ -21,9 +21,9 @@ import static org.apache.datasketches.common.Util.LS; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.BinomialBoundsN; import org.apache.datasketches.tuple.SerializerDeserializer; @@ -71,49 +71,51 @@ static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES } } /** - * Heapify the given Memory as an ArrayOfDoublesSketch - * @param mem the given Memory + * Heapify the given MemorySegment as an ArrayOfDoublesSketch + * @param seg the given MemorySegment * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch heapify(final Memory mem) { - return heapify(mem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesSketch heapify(final MemorySegment seg) { + return heapify(seg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify the given Memory and seed as a ArrayOfDoublesSketch - * @param mem the given Memory + * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch + * @param seg the given MemorySegment * @param seed the given seed * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch heapify(final Memory mem, final long seed) { - final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem); + public static ArrayOfDoublesSketch heapify(final MemorySegment seg, final long seed) { + final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) { - return new HeapArrayOfDoublesQuickSelectSketch(mem, seed); + return new HeapArrayOfDoublesQuickSelectSketch(seg, seed); } - return new HeapArrayOfDoublesCompactSketch(mem, seed); + return new HeapArrayOfDoublesCompactSketch(seg, seed); } /** - * Wrap the given Memory as an ArrayOfDoublesSketch - * @param mem the given Memory + * Wrap the given MemorySegment as an ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param seg the given MemorySegment * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch wrap(final Memory mem) { - return wrap(mem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesSketch wrap(final MemorySegment seg) { + return wrap(seg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given Memory and seed as a ArrayOfDoublesSketch - * @param mem the given Memory + * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param seg the given MemorySegment * @param seed the given seed * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch wrap(final Memory mem, final long seed) { - final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem); + public static ArrayOfDoublesSketch wrap(final MemorySegment seg, final long seed) { + final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) { - return new DirectArrayOfDoublesQuickSelectSketchR(mem, seed); + return new DirectArrayOfDoublesQuickSelectSketchR(seg, seed); } - return new DirectArrayOfDoublesCompactSketch(mem, seed); + return new DirectArrayOfDoublesCompactSketch(seg, seed); } /** @@ -152,16 +154,16 @@ public double getLowerBound(final int numStdDev) { } /** - * Returns true if this sketch's data structure is backed by Memory or WritableMemory. - * @return true if this sketch's data structure is backed by Memory or WritableMemory. + * Returns true if this sketch's data structure is backed by MemorySegment. + * @return true if this sketch's data structure is backed by MemorySegment. */ - public abstract boolean hasMemory(); + public abstract boolean hasMemorySegment(); /** - * Returns the Memory object if it exists, otherwise null. - * @return the Memory object if it exists, otherwise null. + * Returns the MemorySegment object if it exists, otherwise null. + * @return the MemorySegment object if it exists, otherwise null. */ - abstract Memory getMemory(); + abstract MemorySegment getMemorySegment(); /** * See Empty @@ -256,10 +258,10 @@ public ArrayOfDoublesCompactSketch compact() { /** * Returns this sketch in compact form, which is immutable. - * @param dstMem the destination WritableMemory + * @param dstSeg the destination MemorySegment * @return this sketch in compact form, which is immutable. */ - public abstract ArrayOfDoublesCompactSketch compact(WritableMemory dstMem); + public abstract ArrayOfDoublesCompactSketch compact(MemorySegment dstSeg); @Override public String toString() { diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java index 575d1d7d7..53a145beb 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketches.java @@ -19,9 +19,9 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles. @@ -29,136 +29,123 @@ public final class ArrayOfDoublesSketches { /** - * Heapify the given Memory as an ArrayOfDoublesSketch - * @param srcMem the given source Memory + * Heapify the given MemorySegment as an ArrayOfDoublesSketch + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch heapifySketch(final Memory srcMem) { - return heapifySketch(srcMem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg) { + return heapifySketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify the given Memory and seed as a ArrayOfDoublesSketch - * @param srcMem the given source Memory + * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch heapifySketch(final Memory srcMem, final long seed) { - return ArrayOfDoublesSketch.heapify(srcMem, seed); + public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesSketch.heapify(srcSeg, seed); } /** - * Heapify the given Memory as an ArrayOfDoublesUpdatableSketch - * @param srcMem the given source Memory + * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final Memory srcMem) { - return heapifyUpdatableSketch(srcMem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg) { + return heapifyUpdatableSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify the given Memory and seed as a ArrayOfDoublesUpdatableSketch - * @param srcMem the given source Memory + * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final Memory srcMem, final long seed) { - return ArrayOfDoublesUpdatableSketch.heapify(srcMem, seed); + public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUpdatableSketch.heapify(srcSeg, seed); } /** - * Wrap the given Memory as an ArrayOfDoublesSketch - * @param srcMem the given source Memory + * Wrap the given MemorySegment as an ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch wrapSketch(final Memory srcMem) { - return wrapSketch(srcMem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg) { + return wrapSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given Memory and seed as a ArrayOfDoublesSketch - * @param srcMem the given source Memory + * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesSketch */ - public static ArrayOfDoublesSketch wrapSketch(final Memory srcMem, final long seed) { - return ArrayOfDoublesSketch.wrap(srcMem, seed); + public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesSketch.wrap(srcSeg, seed); } /** - * Wrap the given WritableMemory as an ArrayOfDoublesUpdatableSketch - * @param srcMem the given source Memory + * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final WritableMemory srcMem) { - return wrapUpdatableSketch(srcMem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg) { + return wrapUpdatableSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given WritableMemory and seed as a ArrayOfDoublesUpdatableSketch - * @param srcMem the given source Memory + * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final WritableMemory srcMem, final long seed) { - return ArrayOfDoublesUpdatableSketch.wrap(srcMem, seed); - } - - /** - * Heapify the given Memory as an ArrayOfDoublesUnion - * @param srcMem the given source Memory - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapifyUnion(final Memory srcMem) { - return heapifyUnion(srcMem, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given Memory and seed as an ArrayOfDoublesUnion - * @param srcMem the given source Memory - * @param seed the given seed - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapifyUnion(final Memory srcMem, final long seed) { - return ArrayOfDoublesUnion.heapify(srcMem, seed); + public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUpdatableSketch.wrap(srcSeg, seed); } /** - * Wrap the given Memory as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Heapify the given MemorySegment as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrapUnion(final Memory srcMem) { - return wrapUnion(srcMem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg) { + return heapifyUnion(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given Memory and seed as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrapUnion(final Memory srcMem, final long seed) { - return ArrayOfDoublesUnion.wrap(srcMem, seed); + public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUnion.heapify(srcSeg, seed); } /** - * Wrap the given Memory as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Wrap the given MemorySegment as an ArrayOfDoublesUnion + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrapUnion(final WritableMemory srcMem) { - return wrapUnion(srcMem, Util.DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg) { + return wrapUnion(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given Memory and seed as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrapUnion(final WritableMemory srcMem, final long seed) { - return ArrayOfDoublesUnion.wrap(srcMem, seed); + public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg, final long seed) { + return ArrayOfDoublesUnion.wrap(srcSeg, seed); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java index b0a95eecf..58a907702 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnion.java @@ -19,13 +19,15 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; + +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.tuple.SerializerDeserializer; /** @@ -58,60 +60,43 @@ public abstract class ArrayOfDoublesUnion { } /** - * Heapify the given Memory as an ArrayOfDoublesUnion - * @param srcMem the given source Memory - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapify(final Memory srcMem) { - return heapify(srcMem, DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given Memory and seed as an ArrayOfDoublesUnion - * @param srcMem the given source Memory - * @param seed the given seed - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapify(final Memory srcMem, final long seed) { - return HeapArrayOfDoublesUnion.heapifyUnion(srcMem, seed); - } - - /** - * Wrap the given Memory as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Heapify the given MemorySegment as an ArrayOfDoublesUnion. + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrap(final Memory srcMem) { - return wrap(srcMem, DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given Memory and seed as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion. + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrap(final Memory srcMem, final long seed) { - return DirectArrayOfDoublesUnion.wrapUnion((WritableMemory) srcMem, seed, false); + public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg, final long seed) { + return HeapArrayOfDoublesUnion.heapifyUnion(srcSeg, seed); } /** - * Wrap the given WritableMemory as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Wrap the given MemorySegment as an ArrayOfDoublesUnion. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrap(final WritableMemory srcMem) { - return wrap(srcMem, DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap the given WritableMemory and seed as an ArrayOfDoublesUnion - * @param srcMem the given source Memory + * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param srcSeg the given source MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUnion */ - public static ArrayOfDoublesUnion wrap(final WritableMemory srcMem, final long seed) { - return DirectArrayOfDoublesUnion.wrapUnion(srcMem, seed, true); + public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long seed) { + return DirectArrayOfDoublesUnion.wrapUnion(srcSeg, seed, !srcSeg.isReadOnly()); } /** @@ -120,11 +105,11 @@ public static ArrayOfDoublesUnion wrap(final WritableMemory srcMem, final long s * *

              Nulls and empty sketches are ignored.

              * - * @param tupleSketch sketch to add to the union + * @param tupleSketch sketch to add to the union. */ public void union(final ArrayOfDoublesSketch tupleSketch) { if (tupleSketch == null) { return; } - org.apache.datasketches.common.Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); + Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); if (gadget_.getNumValues() != tupleSketch.getNumValues()) { throw new SketchesArgumentException("Incompatible sketches: number of values mismatch " + gadget_.getNumValues() + " and " + tupleSketch.getNumValues()); @@ -150,18 +135,18 @@ public void union(final ArrayOfDoublesSketch tupleSketch) { /** * Returns the resulting union in the form of a compact sketch - * @param dstMem memory for the result (can be null) - * @return compact sketch representing the union (off-heap if memory is provided) + * @param dstSeg MemorySegment for the result (can be null) + * @return compact sketch representing the union (off-heap if MemorySegment is provided) */ - public ArrayOfDoublesCompactSketch getResult(final WritableMemory dstMem) { + public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { long unionThetaLong = unionThetaLong_; if (gadget_.getRetainedEntries() > gadget_.getNominalEntries()) { unionThetaLong = Math.min(unionThetaLong, gadget_.getNewThetaLong()); } - if (dstMem == null) { + if (dstSeg == null) { return new HeapArrayOfDoublesCompactSketch(gadget_, unionThetaLong); } - return new DirectArrayOfDoublesCompactSketch(gadget_, unionThetaLong, dstMem); + return new DirectArrayOfDoublesCompactSketch(gadget_, unionThetaLong, dstSeg); } /** @@ -194,14 +179,14 @@ public void reset() { public byte[] toByteArray() { final int sizeBytes = PREAMBLE_SIZE_BYTES + gadget_.getSerializedSizeBytes(); final byte[] byteArray = new byte[sizeBytes]; - final WritableMemory mem = WritableMemory.writableWrap(byteArray); - mem.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 - mem.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - mem.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - mem.putByte(SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 + seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); //byte 4-7 automatically zero - mem.putLong(THETA_LONG, unionThetaLong_); - gadget_.serializeInto(mem.writableRegion(PREAMBLE_SIZE_BYTES, mem.getCapacity() - PREAMBLE_SIZE_BYTES)); + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, unionThetaLong_); + gadget_.serializeInto(seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES)); return byteArray; } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java index 9521f85d6..dfeffcc4f 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java @@ -19,14 +19,14 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static org.apache.datasketches.common.Util.computeSeedHash; import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.tuple.Util; /** @@ -42,41 +42,43 @@ public abstract class ArrayOfDoublesUpdatableSketch extends ArrayOfDoublesSketch } /** - * Heapify the given Memory as an ArrayOfDoublesUpdatableSketch - * @param mem the given Memory + * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch + * @param seg the given MemorySegment * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch heapify(final Memory mem) { - return heapify(mem, DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg) { + return heapify(seg, DEFAULT_UPDATE_SEED); } /** - * Heapify the given Memory and seed as a ArrayOfDoublesUpdatableSketch - * @param mem the given Memory + * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch + * @param seg the given MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch heapify(final Memory mem, final long seed) { - return new HeapArrayOfDoublesQuickSelectSketch(mem, seed); + public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg, final long seed) { + return new HeapArrayOfDoublesQuickSelectSketch(seg, seed); } /** - * Wrap the given WritableMemory as an ArrayOfDoublesUpdatableSketch - * @param mem the given Memory + * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param seg the given MemorySegment * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch wrap(final WritableMemory mem) { - return wrap(mem, DEFAULT_UPDATE_SEED); + public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg) { + return wrap(seg, DEFAULT_UPDATE_SEED); } /** - * Wrap the given WritableMemory and seed as a ArrayOfDoublesUpdatableSketch - * @param mem the given Memory + * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch. + * If the given source MemorySegment is read-only, the returned Union object will also be read-only. + * @param seg the given MemorySegment * @param seed the given seed * @return an ArrayOfDoublesUpdatableSketch */ - public static ArrayOfDoublesUpdatableSketch wrap(final WritableMemory mem, final long seed) { - return new DirectArrayOfDoublesQuickSelectSketch(mem, seed); + public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg, final long seed) { + return new DirectArrayOfDoublesQuickSelectSketch(seg, seed); } /** @@ -198,16 +200,16 @@ public ArrayOfDoublesCompactSketch compact() { } /** - * Gets an off-heap compact representation of the sketch using the given memory - * @param dstMem memory for the compact sketch (can be null) - * @return compact sketch (off-heap if memory is provided) + * Gets an off-heap compact representation of the sketch using the given MemorySegment + * @param dstSeg MemorySegment for the compact sketch (can be null) + * @return compact sketch (off-heap if MemorySegment is provided) */ @Override - public ArrayOfDoublesCompactSketch compact(final WritableMemory dstMem) { - if (dstMem == null) { + public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { + if (dstSeg == null) { return new HeapArrayOfDoublesCompactSketch(this); } - return new DirectArrayOfDoublesCompactSketch(this, dstMem); + return new DirectArrayOfDoublesCompactSketch(this, dstSeg); } abstract int getCurrentCapacity(); @@ -218,7 +220,7 @@ long getSeed() { @Override short getSeedHash() { - return org.apache.datasketches.common.Util.computeSeedHash(seed_); + return computeSeedHash(seed_); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java index 3ce9bac7b..185186975 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java @@ -19,10 +19,11 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -120,12 +121,12 @@ public ArrayOfDoublesUpdatableSketch build() { /** * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder. - * @param dstMem instance of Memory to be used by the sketch + * @param dstSeg instance of MemorySegment to be used by the sketch * @return an ArrayOfDoublesUpdatableSketch */ - public ArrayOfDoublesUpdatableSketch build(final WritableMemory dstMem) { + public ArrayOfDoublesUpdatableSketch build(final MemorySegment dstSeg) { return new DirectArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(), - samplingProbability_, numValues_, seed_, dstMem); + samplingProbability_, numValues_, seed_, dstSeg); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 2e0dc3628..083b5e06a 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -19,37 +19,40 @@ package org.apache.datasketches.tuple.arrayofdoubles; -import static org.apache.datasketches.common.Util.checkSeedHashes; -import static org.apache.datasketches.common.Util.computeSeedHash; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.tuple.SerializerDeserializer; /** * Direct Compact Sketch of type ArrayOfDoubles. * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              */ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch { // this value exists only on heap, never serialized - private Memory mem_; + private MemorySegment seg_; /** * Converts the given UpdatableArrayOfDoublesSketch to this compact form. * @param sketch the given UpdatableArrayOfDoublesSketch - * @param dstMem the given destination Memory. + * @param dstSeg the given destination MemorySegment. */ DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, - final WritableMemory dstMem) { - this(sketch, sketch.getThetaLong(), dstMem); + final MemorySegment dstSeg) { + this(sketch, sketch.getThetaLong(), dstSeg); } /** @@ -57,30 +60,30 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc * trimming if necessary according to given theta * @param sketch the given UpdatableArrayOfDoublesSketch * @param thetaLong new value of thetaLong - * @param dstMem the given destination Memory. + * @param dstSeg the given destination MemorySegment. */ DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, - final long thetaLong, final WritableMemory dstMem) { + final long thetaLong, final MemorySegment dstSeg) { super(sketch.getNumValues()); - checkIfEnoughMemory(dstMem, sketch.getRetainedEntries(), sketch.getNumValues()); - mem_ = dstMem; - dstMem.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); - dstMem.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - dstMem.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - dstMem.putByte(SKETCH_TYPE_BYTE, (byte) + checkMemorySegmentSize(dstSeg, sketch.getRetainedEntries(), sketch.getNumValues()); + seg_ = dstSeg; + dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); isEmpty_ = sketch.isEmpty(); final int count = sketch.getRetainedEntries(); - dstMem.putByte(FLAGS_BYTE, (byte) ( + dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); - dstMem.putByte(NUM_VALUES_BYTE, (byte) numValues_); - dstMem.putShort(SEED_HASH_SHORT, org.apache.datasketches.common.Util.computeSeedHash(sketch.getSeed())); + dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed())); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - dstMem.putLong(THETA_LONG, thetaLong_); + dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); if (count > 0) { int keyOffset = ENTRIES_START; int valuesOffset = keyOffset + (SIZE_OF_KEY_BYTES * sketch.getRetainedEntries()); @@ -88,14 +91,14 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc int actualCount = 0; while (it.next()) { if (it.getKey() < thetaLong_) { - dstMem.putLong(keyOffset, it.getKey()); - dstMem.putDoubleArray(valuesOffset, it.getValues(), 0, numValues_); + dstSeg.set(JAVA_LONG_UNALIGNED, keyOffset, it.getKey()); + MemorySegment.copy(it.getValues(), 0, dstSeg, JAVA_DOUBLE_UNALIGNED, valuesOffset, numValues_); keyOffset += SIZE_OF_KEY_BYTES; valuesOffset += SIZE_OF_VALUE_BYTES * numValues_; actualCount++; } } - dstMem.putInt(RETAINED_ENTRIES_INT, actualCount); + dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, actualCount); } } @@ -103,109 +106,108 @@ final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketc * Creates an instance from components */ DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong, - final boolean isEmpty, final int numValues, final short seedHash, final WritableMemory dstMem) { + final boolean isEmpty, final int numValues, final short seedHash, final MemorySegment dstSeg) { super(numValues); - checkIfEnoughMemory(dstMem, values.length, numValues); - mem_ = dstMem; - dstMem.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); - dstMem.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - dstMem.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - dstMem.putByte(SKETCH_TYPE_BYTE, (byte) + checkMemorySegmentSize(dstSeg, values.length, numValues); + seg_ = dstSeg; + dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); isEmpty_ = isEmpty; final int count = keys.length; - dstMem.putByte(FLAGS_BYTE, (byte) ( + dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); - dstMem.putByte(NUM_VALUES_BYTE, (byte) numValues_); - dstMem.putShort(SEED_HASH_SHORT, seedHash); + dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash); thetaLong_ = thetaLong; - dstMem.putLong(THETA_LONG, thetaLong_); + dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); if (count > 0) { - dstMem.putInt(RETAINED_ENTRIES_INT, count); - dstMem.putLongArray(ENTRIES_START, keys, 0, count); - dstMem.putDoubleArray( - ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values, 0, values.length); + dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count); + MemorySegment.copy(keys, 0, dstSeg, JAVA_LONG_UNALIGNED, ENTRIES_START, count); + MemorySegment.copy(values, 0, dstSeg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values.length); } } /** - * Wraps the given Memory. - * @param mem See Memory + * Wraps the given MemorySegment. + * @param seg the given MemorySegment */ - DirectArrayOfDoublesCompactSketch(final Memory mem) { - super(mem.getByte(NUM_VALUES_BYTE)); - mem_ = mem; - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), - mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), + DirectArrayOfDoublesCompactSketch(final MemorySegment seg) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); + seg_ = seg; + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); - final byte version = mem_.getByte(SERIAL_VERSION_BYTE); + final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } final boolean isBigEndian = - (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - thetaLong_ = mem_.getLong(THETA_LONG); + isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); } /** - * Wraps the given Memory. - * @param mem See Memory + * Wraps the given MemorySegment. + * @param seg the given MemorySegment. * @param seed See seed */ - DirectArrayOfDoublesCompactSketch(final Memory mem, final long seed) { - super(mem.getByte(NUM_VALUES_BYTE)); - mem_ = mem; - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), - mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), + DirectArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); + seg_ = seg; + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); - final byte version = mem_.getByte(SERIAL_VERSION_BYTE); + final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } final boolean isBigEndian = - (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - checkSeedHashes(mem.getShort(SEED_HASH_SHORT), computeSeedHash(seed)); - isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - thetaLong_ = mem_.getLong(THETA_LONG); + Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); + isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } @Override - public ArrayOfDoublesCompactSketch compact(final WritableMemory dstMem) { - if (dstMem == null) { + public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { + if (dstSeg == null) { return new HeapArrayOfDoublesCompactSketch(getKeys(), getValuesAsOneDimension(), thetaLong_, isEmpty_, numValues_, getSeedHash()); } else { - mem_.copyTo(0, dstMem, 0, mem_.getCapacity()); - return new DirectArrayOfDoublesCompactSketch(dstMem); + MemorySegment.copy(seg_, 0, dstSeg, 0, seg_.byteSize()); + return new DirectArrayOfDoublesCompactSketch(dstSeg); } } @Override public int getRetainedEntries() { final boolean hasEntries = - (mem_.getByte(FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; - return (hasEntries ? mem_.getInt(RETAINED_ENTRIES_INT) : 0); + (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; + return (hasEntries ? seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0); } @Override - //converts compact Memory array of double[] to compact double[][] + //converts compact MemorySegment array of double[] to compact double[][] public double[][] getValues() { final int count = getRetainedEntries(); final double[][] values = new double[count][]; @@ -213,7 +215,7 @@ public double[][] getValues() { int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count); for (int i = 0; i < count; i++) { final double[] array = new double[numValues_]; - mem_.getDoubleArray(valuesOffset, array, 0, numValues_); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_); values[i] = array; valuesOffset += SIZE_OF_VALUE_BYTES * numValues_; } @@ -222,26 +224,26 @@ public double[][] getValues() { } @Override - //converts compact Memory array of double[] to compact double[] + //converts compact MemorySegment array of double[] to compact double[] double[] getValuesAsOneDimension() { final int count = getRetainedEntries(); final int numDoubles = count * numValues_; final double[] values = new double[numDoubles]; if (count > 0) { final int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count); - mem_.getDoubleArray(valuesOffset, values, 0, numDoubles); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, values, 0, numDoubles); } return values; } @Override - //converts compact Memory array of long[] to compact long[] + //converts compact MemorySegment array of long[] to compact long[] long[] getKeys() { final int count = getRetainedEntries(); final long[] keys = new long[count]; if (count > 0) { for (int i = 0; i < count; i++) { - mem_.getLongArray(ENTRIES_START, keys, 0, count); + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, ENTRIES_START, keys, 0, count); } } return keys; @@ -251,35 +253,35 @@ long[] getKeys() { public byte[] toByteArray() { final int sizeBytes = getCurrentBytes(); final byte[] byteArray = new byte[sizeBytes]; - final WritableMemory mem = WritableMemory.writableWrap(byteArray); - mem_.copyTo(0, mem, 0, sizeBytes); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + MemorySegment.copy(seg_, 0, seg, 0, sizeBytes); return byteArray; } @Override public ArrayOfDoublesSketchIterator iterator() { return new DirectArrayOfDoublesSketchIterator( - mem_, ENTRIES_START, getRetainedEntries(), numValues_); + seg_, ENTRIES_START, getRetainedEntries(), numValues_); } @Override short getSeedHash() { - return mem_.getShort(SEED_HASH_SHORT); + return seg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); } @Override - public boolean hasMemory() { return true; } + public boolean hasMemorySegment() { return true; } @Override - Memory getMemory() { return mem_; } + MemorySegment getMemorySegment() { return seg_; } - private static void checkIfEnoughMemory(final Memory mem, final int numEntries, + private static void checkMemorySegmentSize(final MemorySegment seg, final int numEntries, final int numValues) { final int sizeNeeded = ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); - if (sizeNeeded > mem.getCapacity()) { - throw new SketchesArgumentException("Not enough memory: need " + sizeNeeded - + " bytes, got " + mem.getCapacity() + " bytes"); + if (sizeNeeded > seg.byteSize()) { + throw new SketchesArgumentException("Not enough space: need " + sizeNeeded + + " bytes, got " + seg.byteSize() + " bytes"); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java index 7c1b1bf07..1aa34dbba 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -19,34 +19,34 @@ package org.apache.datasketches.tuple.arrayofdoubles; -import org.apache.datasketches.memory.WritableMemory; +import java.lang.foreign.MemorySegment; /** * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              */ final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { - private WritableMemory mem_; + private MemorySegment seg_; /** * Creates an instance of a DirectArrayOfDoublesIntersection with a custom update seed * @param numValues number of double values associated with each key * @param seed See seed - * @param dstMem See Memory + * @param dstSeg the destination MemorySegment */ - DirectArrayOfDoublesIntersection(final int numValues, final long seed, final WritableMemory dstMem) { + DirectArrayOfDoublesIntersection(final int numValues, final long seed, final MemorySegment dstSeg) { super(numValues, seed); - mem_ = dstMem; + seg_ = dstSeg; } @Override protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { - return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_); + return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, seg_); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 2730dc391..0bd6ee65f 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -19,17 +19,25 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.checkSeedHashes; +import static org.apache.datasketches.common.Util.clear; +import static org.apache.datasketches.common.Util.clearBits; import static org.apache.datasketches.common.Util.computeSeedHash; +import static org.apache.datasketches.common.Util.setBits; +import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.tuple.SerializerDeserializer; import org.apache.datasketches.tuple.Util; @@ -37,20 +45,20 @@ /** * Direct QuickSelect tuple sketch of type ArrayOfDoubles. * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              */ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch { // these values exist only on heap, never serialized - private WritableMemory mem_; - // these can be derived from the mem_ contents, but are kept here for performance + private MemorySegment seg_; + // these can be derived from the seg_ contents, but are kept here for performance private int keysOffset_; private int valuesOffset_; /** - * Construct a new sketch using the given Memory as its backing store. + * Construct a new sketch using the given MemorySegment as its backing store. * * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * given value. @@ -63,7 +71,7 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke * See Sampling Probability * @param numValues Number of double values to keep for each key. * @param seed See seed - * @param dstMem See Memory + * @param dstSeg the destination MemorySegment. */ DirectArrayOfDoublesQuickSelectSketch( final int nomEntries, @@ -71,8 +79,8 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke final float samplingProbability, final int numValues, final long seed, - final WritableMemory dstMem) { - this(checkMemory(nomEntries, lgResizeFactor, numValues, dstMem), + final MemorySegment dstSeg) { + this(checkMemorySegment(nomEntries, lgResizeFactor, numValues, dstSeg), //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J nomEntries, @@ -80,7 +88,7 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke samplingProbability, numValues, seed, - dstMem); + dstSeg); } private DirectArrayOfDoublesQuickSelectSketch( @@ -90,89 +98,89 @@ private DirectArrayOfDoublesQuickSelectSketch( final float samplingProbability, final int numValues, final long seed, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { super(numValues, seed); - mem_ = dstMem; + seg_ = dstSeg; final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - mem_.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); - mem_.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - mem_.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - mem_.putByte(SKETCH_TYPE_BYTE, (byte) + seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal()); final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - mem_.putByte(FLAGS_BYTE, (byte) ( + seg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) | (samplingProbability < 1f ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) | (1 << Flags.IS_EMPTY.ordinal()) )); - mem_.putByte(NUM_VALUES_BYTE, (byte) numValues); - mem_.putShort(SEED_HASH_SHORT, org.apache.datasketches.common.Util.computeSeedHash(seed)); + seg_.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues); + seg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(seed)); thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); - mem_.putLong(THETA_LONG, thetaLong_); - mem_.putByte(LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries)); - mem_.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); - mem_.putByte(LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor); - mem_.putFloat(SAMPLING_P_FLOAT, samplingProbability); - mem_.putInt(RETAINED_ENTRIES_INT, 0); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + seg_.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries)); + seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); + seg_.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor); + seg_.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); keysOffset_ = ENTRIES_START; valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity); - mem_.clear(keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); // clear keys only + clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); setRebuildThreshold(); } - private static final boolean checkMemory( + private static final boolean checkMemorySegment( final int nomEntries, final int lgResizeFactor, final int numValues, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - checkIfEnoughMemory(dstMem, startingCapacity, numValues); + checkMemorySegmentSize(dstSeg, startingCapacity, numValues); return true; } /** - * Wraps the given Memory. - * @param mem See Memory + * Wraps the given MemorySegment. + * @param seg the given MemorySegment * @param seed update seed */ DirectArrayOfDoublesQuickSelectSketch( - final WritableMemory mem, + final MemorySegment seg, final long seed) { - this(checkSerVer_Endianness(mem), mem, seed); + this(checkSerVer_Endianness(seg), seg, seed); //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J } private DirectArrayOfDoublesQuickSelectSketch( final boolean secure, //required part of Finalizer Attack prevention - final WritableMemory mem, + final MemorySegment seg, final long seed) { - super(mem.getByte(NUM_VALUES_BYTE), seed); - mem_ = mem; - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), - mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed); + seg_ = seg; + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - checkSeedHashes(mem.getShort(SEED_HASH_SHORT), computeSeedHash(seed)); + checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); keysOffset_ = ENTRIES_START; valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); // to do: make parent take care of its own parts lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); - thetaLong_ = mem_.getLong(THETA_LONG); - isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); + isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; setRebuildThreshold(); } - private static final boolean checkSerVer_Endianness(final Memory mem) { - final byte version = mem.getByte(SERIAL_VERSION_BYTE); + private static final boolean checkSerVer_Endianness(final MemorySegment seg) { + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } final boolean isBigEndian = - (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } @@ -180,7 +188,7 @@ private static final boolean checkSerVer_Endianness(final Memory mem) { } @Override - //converts Memory hashTable of double[] to compacted double[][] + //converts MemorySegment hashTable of double[] to compacted double[][] public double[][] getValues() { final int count = getRetainedEntries(); final double[][] values = new double[count][]; @@ -189,9 +197,9 @@ public double[][] getValues() { long valuesOffset = valuesOffset_; int cnt = 0; for (int j = 0; j < getCurrentCapacity(); j++) { - if (mem_.getLong(keyOffset) != 0) { + if (seg_.get(JAVA_LONG_UNALIGNED, keyOffset) != 0) { final double[] array = new double[numValues_]; - mem_.getDoubleArray(valuesOffset, array, 0, numValues_); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_); values[cnt++] = array; } keyOffset += SIZE_OF_KEY_BYTES; @@ -212,8 +220,8 @@ public double[][] getValues() { long valuesOffsetBytes = valuesOffset_; int cnt = 0; for (int j = 0; j < cap; j++) { - if (mem_.getLong(keyOffsetBytes) != 0) { - mem_.getDoubleArray(valuesOffsetBytes, values, cnt++ * numValues_, numValues_); + if (seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes) != 0) { + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffsetBytes, values, cnt++ * numValues_, numValues_); } keyOffsetBytes += SIZE_OF_KEY_BYTES; valuesOffsetBytes += (long)SIZE_OF_VALUE_BYTES * numValues_; @@ -234,7 +242,7 @@ long[] getKeys() { int cnt = 0; for (int j = 0; j < cap; j++) { final long key; - if ((key = mem_.getLong(keyOffsetBytes)) != 0) { + if ((key = seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes)) != 0) { keys[cnt++] = key; } keyOffsetBytes += SIZE_OF_KEY_BYTES; @@ -246,44 +254,43 @@ long[] getKeys() { @Override public int getRetainedEntries() { - return mem_.getInt(RETAINED_ENTRIES_INT); + return seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); } @Override public int getNominalEntries() { - return 1 << mem_.getByte(LG_NOM_ENTRIES_BYTE); + return 1 << seg_.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); } @Override public ResizeFactor getResizeFactor() { - return ResizeFactor.getRF(mem_.getByte(LG_RESIZE_FACTOR_BYTE)); + return ResizeFactor.getRF(seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE)); } @Override public float getSamplingProbability() { - return mem_.getFloat(SAMPLING_P_FLOAT); + return seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); } @Override public byte[] toByteArray() { final int sizeBytes = getSerializedSizeBytes(); final byte[] byteArray = new byte[sizeBytes]; - final WritableMemory mem = WritableMemory.writableWrap(byteArray); - serializeInto(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + serializeInto(seg); return byteArray; } @Override public ArrayOfDoublesSketchIterator iterator() { - return new DirectArrayOfDoublesSketchIterator(mem_, keysOffset_, getCurrentCapacity(), - numValues_); + return new DirectArrayOfDoublesSketchIterator(seg_, keysOffset_, getCurrentCapacity(), numValues_); } @Override - public boolean hasMemory() { return true; } + public boolean hasMemorySegment() { return true; } @Override - WritableMemory getMemory() { return mem_; } + MemorySegment getMemorySegment() { return seg_; } @Override int getSerializedSizeBytes() { @@ -291,60 +298,60 @@ int getSerializedSizeBytes() { } @Override - void serializeInto(final WritableMemory mem) { - mem_.copyTo(0, mem, 0, mem.getCapacity()); + void serializeInto(final MemorySegment seg) { + MemorySegment.copy(seg_, 0, seg, 0, seg.byteSize()); } @Override public void reset() { if (!isEmpty_) { isEmpty_ = true; - mem_.setBits(FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); + setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); } - final int lgResizeFactor = mem_.getByte(LG_RESIZE_FACTOR_BYTE); - final float samplingProbability = mem_.getFloat(SAMPLING_P_FLOAT); + final int lgResizeFactor = seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE); + final float samplingProbability = seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); final int startingCapacity = Util.getStartingCapacity(getNominalEntries(), lgResizeFactor); thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); - mem_.putLong(THETA_LONG, thetaLong_); - mem_.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); - mem_.putInt(RETAINED_ENTRIES_INT, 0); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); keysOffset_ = ENTRIES_START; valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity); - mem_.clear(keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); // clear keys only + clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); setRebuildThreshold(); } @Override protected long getKey(final int index) { - return mem_.getLong(keysOffset_ + ((long) SIZE_OF_KEY_BYTES * index)); + return seg_.get(JAVA_LONG_UNALIGNED, keysOffset_ + ((long) SIZE_OF_KEY_BYTES * index)); } @Override protected void incrementCount() { - final int count = mem_.getInt(RETAINED_ENTRIES_INT); + final int count = seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); if (count == 0) { - mem_.setBits(FLAGS_BYTE, (byte) (1 << Flags.HAS_ENTRIES.ordinal())); + setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.HAS_ENTRIES.ordinal())); } - mem_.putInt(RETAINED_ENTRIES_INT, count + 1); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count + 1); } @Override protected final int getCurrentCapacity() { - return 1 << mem_.getByte(LG_CUR_CAPACITY_BYTE); + return 1 << seg_.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE); } @Override protected void setThetaLong(final long thetaLong) { thetaLong_ = thetaLong; - mem_.putLong(THETA_LONG, thetaLong_); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); } @Override protected void setValues(final int index, final double[] values) { long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index); for (int i = 0; i < numValues_; i++) { - mem_.putDouble(offset, values[i]); + seg_.set(JAVA_DOUBLE_UNALIGNED, offset, values[i]); offset += SIZE_OF_VALUE_BYTES; } } @@ -353,7 +360,7 @@ protected void setValues(final int index, final double[] values) { protected void updateValues(final int index, final double[] values) { long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index); for (int i = 0; i < numValues_; i++) { - mem_.putDouble(offset, mem_.getDouble(offset) + values[i]); + seg_.set(JAVA_DOUBLE_UNALIGNED, offset, seg_.get(JAVA_DOUBLE_UNALIGNED, offset) + values[i]); offset += SIZE_OF_VALUE_BYTES; } } @@ -362,29 +369,30 @@ protected void updateValues(final int index, final double[] values) { protected void setNotEmpty() { if (isEmpty_) { isEmpty_ = false; - mem_.clearBits(FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); + clearBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); + } } @Override protected boolean isInSamplingMode() { - return (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) != 0; + return (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) != 0; } - // rebuild in the same memory + // rebuild in the same MemorySegment @Override protected void rebuild(final int newCapacity) { final int numValues = getNumValues(); - checkIfEnoughMemory(mem_, newCapacity, numValues); + checkMemorySegmentSize(seg_, newCapacity, numValues); final int currCapacity = getCurrentCapacity(); final long[] keys = new long[currCapacity]; final double[] values = new double[currCapacity * numValues]; - mem_.getLongArray(keysOffset_, keys, 0, currCapacity); - mem_.getDoubleArray(valuesOffset_, values, 0, currCapacity * numValues); - mem_.clear(keysOffset_, - ((long) SIZE_OF_KEY_BYTES * newCapacity) + ((long) SIZE_OF_VALUE_BYTES * newCapacity * numValues)); - mem_.putInt(RETAINED_ENTRIES_INT, 0); - mem_.putByte(LG_CUR_CAPACITY_BYTE, (byte)Integer.numberOfTrailingZeros(newCapacity)); + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, keysOffset_, keys, 0, currCapacity); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_, values, 0, currCapacity * numValues); + + clear(seg_, keysOffset_, ((long) SIZE_OF_KEY_BYTES * newCapacity) + ((long) SIZE_OF_VALUE_BYTES * newCapacity * numValues)); + seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte)Integer.numberOfTrailingZeros(newCapacity)); valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * newCapacity); lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity); for (int i = 0; i < keys.length; i++) { @@ -397,30 +405,30 @@ protected void rebuild(final int newCapacity) { @Override protected int insertKey(final long key) { - return HashOperations.hashInsertOnlyMemory(mem_, lgCurrentCapacity_, key, ENTRIES_START); + return HashOperations.hashInsertOnlyMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); } @Override protected int findOrInsertKey(final long key) { - return HashOperations.hashSearchOrInsertMemory(mem_, lgCurrentCapacity_, key, ENTRIES_START); + return HashOperations.hashSearchOrInsertMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); } @Override protected double[] find(final long key) { - final int index = HashOperations.hashSearchMemory(mem_, lgCurrentCapacity_, key, ENTRIES_START); + final int index = HashOperations.hashSearchMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); if (index == -1) { return null; } final double[] array = new double[numValues_]; - mem_.getDoubleArray(valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index), - array, 0, numValues_); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_ + + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index), array, 0, numValues_); return array; } - private static void checkIfEnoughMemory(final Memory mem, final int numEntries, final int numValues) { + private static void checkMemorySegmentSize(final MemorySegment seg, final int numEntries, final int numValues) { final int sizeNeeded = ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); - if (sizeNeeded > mem.getCapacity()) { - throw new SketchesArgumentException("Not enough memory: need " - + sizeNeeded + " bytes, got " + mem.getCapacity() + " bytes"); + if (sizeNeeded > seg.byteSize()) { + throw new SketchesArgumentException("Not enough space: need " + + sizeNeeded + " bytes, got " + seg.byteSize() + " bytes"); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java index f168c7284..3ff1a260c 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java @@ -19,14 +19,14 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; final class DirectArrayOfDoublesQuickSelectSketchR extends DirectArrayOfDoublesQuickSelectSketch { - DirectArrayOfDoublesQuickSelectSketchR(final Memory mem, final long seed) { - super((WritableMemory) mem, seed); + DirectArrayOfDoublesQuickSelectSketchR(final MemorySegment seg, final long seed) { + super(seg, seed); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java index dcdab1313..35f172c3f 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -19,18 +19,21 @@ package org.apache.datasketches.tuple.arrayofdoubles; -import org.apache.datasketches.memory.Memory; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; /** * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              */ final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { - private Memory mem_; + private MemorySegment seg_; private int offset_; private int numEntries_; private int numValues_; @@ -38,9 +41,9 @@ final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIt private static final int SIZE_OF_KEY_BYTES = 8; private static final int SIZE_OF_VALUE_BYTES = 8; - DirectArrayOfDoublesSketchIterator(final Memory mem, final int offset, final int numEntries, + DirectArrayOfDoublesSketchIterator(final MemorySegment seg, final int offset, final int numEntries, final int numValues) { - mem_ = mem; + seg_ = seg; offset_ = offset; numEntries_ = numEntries; numValues_ = numValues; @@ -51,7 +54,8 @@ final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIt public boolean next() { i_++; while (i_ < numEntries_) { - if (mem_.getLong(offset_ + ((long) SIZE_OF_KEY_BYTES * i_)) != 0) { return true; } + final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_); + if (seg_.get(JAVA_LONG_UNALIGNED, off) != 0) { return true; } i_++; } return false; @@ -59,19 +63,20 @@ public boolean next() { @Override public long getKey() { - return mem_.getLong(offset_ + ((long) SIZE_OF_KEY_BYTES * i_)); + final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_); + return seg_.get(JAVA_LONG_UNALIGNED, off); } @Override public double[] getValues() { + long off; if (numValues_ == 1) { - return new double[] { - mem_.getDouble(offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) - + ((long) SIZE_OF_VALUE_BYTES * i_)) }; + off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_); + return new double[] { seg_.get(JAVA_DOUBLE_UNALIGNED, off) }; } final double[] array = new double[numValues_]; - mem_.getDoubleArray(offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) - + ((long) SIZE_OF_VALUE_BYTES * i_ * numValues_), array, 0, numValues_); + off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_ * numValues_); + MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, off, array, 0, numValues_); return array; } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java index 734019632..0c7242088 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -19,73 +19,74 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.tuple.SerializerDeserializer; /** * Direct Union operation for tuple sketches of type ArrayOfDoubles. * - *

              This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

              */ class DirectArrayOfDoublesUnion extends ArrayOfDoublesUnion { - final WritableMemory mem_; + final MemorySegment seg_; /** * Creates an instance of DirectArrayOfDoublesUnion - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * given value. + * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than given value. * @param numValues Number of double values to keep for each key. * @param seed See seed - * @param dstMem See Memory + * @param dstSeg the destination MemorySegment */ DirectArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { super(new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed, - dstMem.writableRegion(PREAMBLE_SIZE_BYTES, dstMem.getCapacity() - PREAMBLE_SIZE_BYTES))); - mem_ = dstMem; - mem_.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 - mem_.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - mem_.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - mem_.putByte(SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); - mem_.putLong(THETA_LONG, gadget_.getThetaLong()); + dstSeg.asSlice(PREAMBLE_SIZE_BYTES, dstSeg.byteSize() - PREAMBLE_SIZE_BYTES))); + seg_ = dstSeg; + seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 + seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, gadget_.getThetaLong()); } //Called from wrapUnion below and extended by DirectArrayOfDoublesUnionR - DirectArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final WritableMemory mem) { + DirectArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) { super(gadget); - mem_ = mem; - unionThetaLong_ = mem.getLong(THETA_LONG); + seg_ = seg; + unionThetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } @Override void setUnionThetaLong(final long thetaLong) { super.setUnionThetaLong(thetaLong); - mem_.putLong(THETA_LONG, thetaLong); + seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); } - static ArrayOfDoublesUnion wrapUnion(final WritableMemory mem, final long seed, final boolean isWritable) { - final byte version = mem.getByte(ArrayOfDoublesUnion.SERIAL_VERSION_BYTE); + static ArrayOfDoublesUnion wrapUnion(final MemorySegment seg, final long seed, final boolean isWritable) { + final byte version = seg.get(JAVA_BYTE, ArrayOfDoublesUnion.SERIAL_VERSION_BYTE); if (version != ArrayOfDoublesUnion.serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem.getByte(SKETCH_TYPE_BYTE), + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesUnion); if (isWritable) { - final WritableMemory sketchMem = mem.writableRegion(PREAMBLE_SIZE_BYTES, - mem.getCapacity() - PREAMBLE_SIZE_BYTES); - return new DirectArrayOfDoublesUnion(new DirectArrayOfDoublesQuickSelectSketch(sketchMem, seed), mem); + final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); + return new DirectArrayOfDoublesUnion(new DirectArrayOfDoublesQuickSelectSketch(sketchSeg, seed), seg); } - final Memory sketchMem = mem.region(PREAMBLE_SIZE_BYTES, mem.getCapacity() - PREAMBLE_SIZE_BYTES); - return new DirectArrayOfDoublesUnionR(new DirectArrayOfDoublesQuickSelectSketchR(sketchMem, seed), mem); + final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); + return new DirectArrayOfDoublesUnionR(new DirectArrayOfDoublesQuickSelectSketchR(sketchSeg, seed), seg); } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnionR.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnionR.java index f865d823b..164526c96 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnionR.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnionR.java @@ -19,18 +19,19 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.memory.WritableMemory; final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion { /** - * Wraps the given Memory. + * Wraps the given MemorySegment. * @param gadget the ArrayOfDoublesQuickSelectSketch - * @param mem See Memory + * @param seg the destination MemorySegment */ - DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final WritableMemory mem) { - super(gadget, mem); + DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) { + super(gadget, seg); } @Override diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java index e57dd9552..b8f6fd863 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java @@ -19,15 +19,19 @@ package org.apache.datasketches.tuple.arrayofdoubles; -import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.tuple.SerializerDeserializer; /** @@ -57,7 +61,7 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch super(sketch.getNumValues()); isEmpty_ = sketch.isEmpty(); thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - seedHash_ = org.apache.datasketches.common.Util.computeSeedHash(sketch.getSeed()); + seedHash_ = Util.computeSeedHash(sketch.getSeed()); final int count = sketch.getRetainedEntries(); if (count > 0) { keys_ = new long[count]; @@ -100,57 +104,57 @@ final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch /** * This is to create an instance given a serialized form - * @param mem See Memory + * @param seg the destination segment */ - HeapArrayOfDoublesCompactSketch(final Memory mem) { - this(mem, DEFAULT_UPDATE_SEED); + HeapArrayOfDoublesCompactSketch(final MemorySegment seg) { + this(seg, Util.DEFAULT_UPDATE_SEED); } /** * This is to create an instance given a serialized form - * @param mem See Memory + * @param seg the source MemorySegment * @param seed See seed */ - HeapArrayOfDoublesCompactSketch(final Memory mem, final long seed) { - super(mem.getByte(NUM_VALUES_BYTE)); - seedHash_ = mem.getShort(SEED_HASH_SHORT); - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), - mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem.getByte(SKETCH_TYPE_BYTE), + HeapArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); + seedHash_ = seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); - final byte version = mem.getByte(SERIAL_VERSION_BYTE); + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException( "Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } final boolean isBigEndian = - (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - org.apache.datasketches.common.Util.checkSeedHashes(seedHash_, org.apache.datasketches.common.Util.computeSeedHash(seed)); - isEmpty_ = (mem.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - thetaLong_ = mem.getLong(THETA_LONG); + Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); + isEmpty_ = (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); final boolean hasEntries = - (mem.getByte(FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; + (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; if (hasEntries) { - final int count = mem.getInt(RETAINED_ENTRIES_INT); + final int count = seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); keys_ = new long[count]; values_ = new double[count * numValues_]; - mem.getLongArray(ENTRIES_START, keys_, 0, count); - mem.getDoubleArray(ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_, 0, values_.length); + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, count); + MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_, 0, values_.length); } } @Override - public ArrayOfDoublesCompactSketch compact(final WritableMemory dstMem) { - if (dstMem == null) { + public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { + if (dstSeg == null) { return new HeapArrayOfDoublesCompactSketch(keys_.clone(), values_.clone(), thetaLong_, isEmpty_, numValues_, seedHash_); } else { final byte[] byteArr = this.toByteArray(); - dstMem.putByteArray(0, byteArr, 0, byteArr.length); - return new DirectArrayOfDoublesCompactSketch(dstMem); + MemorySegment.copy(byteArr, 0, dstSeg, JAVA_BYTE, 0, byteArr.length); + return new DirectArrayOfDoublesCompactSketch(dstSeg); } } @@ -164,25 +168,24 @@ public byte[] toByteArray() { final int count = getRetainedEntries(); final int sizeBytes = getCurrentBytes(); final byte[] bytes = new byte[sizeBytes]; - final WritableMemory mem = WritableMemory.writableWrap(bytes); - mem.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); - mem.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - mem.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - mem.putByte(SKETCH_TYPE_BYTE, - (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); + final MemorySegment seg = MemorySegment.ofArray(bytes); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - mem.putByte(FLAGS_BYTE, (byte) ( + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( ((isBigEndian ? 1 : 0) << Flags.IS_BIG_ENDIAN.ordinal()) | ((isEmpty() ? 1 : 0) << Flags.IS_EMPTY.ordinal()) | ((count > 0 ? 1 : 0) << Flags.HAS_ENTRIES.ordinal()) )); - mem.putByte(NUM_VALUES_BYTE, (byte) numValues_); - mem.putShort(SEED_HASH_SHORT, seedHash_); - mem.putLong(THETA_LONG, thetaLong_); + seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); if (count > 0) { - mem.putInt(RETAINED_ENTRIES_INT, count); - mem.putLongArray(ENTRIES_START, keys_, 0, count); - mem.putDoubleArray(ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_, 0, values_.length); + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count); + MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, count); + MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_.length); } return bytes; } @@ -222,8 +225,8 @@ short getSeedHash() { } @Override - public boolean hasMemory() { return false; } + public boolean hasMemorySegment() { return false; } @Override - Memory getMemory() { return null; } + MemorySegment getMemorySegment() { return null; } } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesIntersection.java index 39b8fb340..a87b349bb 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesIntersection.java @@ -35,8 +35,7 @@ final class HeapArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { } @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, - final long seed) { + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { return new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java index 0aad041ec..beab988d3 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java @@ -19,19 +19,24 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.common.Util.checkSeedHashes; import static org.apache.datasketches.common.Util.computeSeedHash; import static org.apache.datasketches.common.Util.exactLog2OfLong; +import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.tuple.SerializerDeserializer; import org.apache.datasketches.tuple.Util; @@ -81,40 +86,41 @@ final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelec /** * This is to create an instance given a serialized form - * @param mem See Memory + * @param seg the source MemorySegment * @param seed See seed */ - HeapArrayOfDoublesQuickSelectSketch(final Memory mem, final long seed) { - super(mem.getByte(NUM_VALUES_BYTE), seed); - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), - mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem.getByte(SKETCH_TYPE_BYTE), + HeapArrayOfDoublesQuickSelectSketch(final MemorySegment seg, final long seed) { + super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed); + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), + seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - final byte version = mem.getByte(SERIAL_VERSION_BYTE); + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } - final byte flags = mem.getByte(FLAGS_BYTE); + final byte flags = seg.get(JAVA_BYTE, FLAGS_BYTE); final boolean isBigEndian = (flags & (1 << Flags.IS_BIG_ENDIAN.ordinal())) > 0; if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - checkSeedHashes(mem.getShort(SEED_HASH_SHORT), computeSeedHash(seed)); + checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0; - lgNomEntries_ = mem.getByte(LG_NOM_ENTRIES_BYTE); - thetaLong_ = mem.getLong(THETA_LONG); - final int currentCapacity = 1 << mem.getByte(LG_CUR_CAPACITY_BYTE); - lgResizeFactor_ = mem.getByte(LG_RESIZE_FACTOR_BYTE); - samplingProbability_ = mem.getFloat(SAMPLING_P_FLOAT); + lgNomEntries_ = seg.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); + thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); + final int currentCapacity = 1 << seg.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE); + lgResizeFactor_ = seg.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE); + samplingProbability_ = seg.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); keys_ = new long[currentCapacity]; values_ = new double[currentCapacity * numValues_]; final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; - count_ = hasEntries ? mem.getInt(RETAINED_ENTRIES_INT) : 0; + count_ = hasEntries ? seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0; if (count_ > 0) { - mem.getLongArray(ENTRIES_START, keys_, 0, currentCapacity); - mem.getDoubleArray(ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * currentCapacity), values_, 0, - currentCapacity * numValues_); + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, currentCapacity); + final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * currentCapacity); + MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, off, values_, 0, currentCapacity * numValues_); + } setRebuildThreshold(); lgCurrentCapacity_ = Integer.numberOfTrailingZeros(currentCapacity); @@ -193,8 +199,8 @@ public ResizeFactor getResizeFactor() { @Override public byte[] toByteArray() { final byte[] byteArray = new byte[getSerializedSizeBytes()]; - final WritableMemory mem = WritableMemory.writableWrap(byteArray); - serializeInto(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + serializeInto(seg); return byteArray; } @@ -226,38 +232,39 @@ int getSerializedSizeBytes() { // || Values Array doubles * values[] Length | @Override - void serializeInto(final WritableMemory mem) { - mem.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); - mem.putByte(SERIAL_VERSION_BYTE, serialVersionUID); - mem.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - mem.putByte(SKETCH_TYPE_BYTE, + void serializeInto(final MemorySegment seg) { + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); + seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); + seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); + seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal()); final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - mem.putByte(FLAGS_BYTE, (byte)( + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte)( (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) )); - mem.putByte(NUM_VALUES_BYTE, (byte) numValues_); - mem.putShort(SEED_HASH_SHORT, org.apache.datasketches.common.Util.computeSeedHash(seed_)); - mem.putLong(THETA_LONG, thetaLong_); - mem.putByte(LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_); - mem.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length)); - mem.putByte(LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor_); - mem.putFloat(SAMPLING_P_FLOAT, samplingProbability_); - mem.putInt(RETAINED_ENTRIES_INT, count_); + seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(seed_)); + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); + seg.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_); + seg.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length)); + seg.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor_); + seg.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability_); + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count_); if (count_ > 0) { - mem.putLongArray(ENTRIES_START, keys_, 0, keys_.length); - mem.putDoubleArray(ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * keys_.length), values_, 0, values_.length); + MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_.length); + final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * keys_.length); + MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, off, values_.length); } } @Override - public boolean hasMemory() { return false; } + public boolean hasMemorySegment() { return false; } @Override - Memory getMemory() { return null; } + MemorySegment getMemorySegment() { return null; } @Override public void reset() { diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesUnion.java index f112f1823..7a9b7fd75 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesUnion.java @@ -19,8 +19,12 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.SerializerDeserializer; /** @@ -47,23 +51,23 @@ final class HeapArrayOfDoublesUnion extends ArrayOfDoublesUnion { /** * This is to create an instance given a serialized form and a custom seed - * @param mem See Memory + * @param seg the source MemorySegment * @param seed See seed * @return a ArrayOfDoublesUnion on the Java heap */ - static ArrayOfDoublesUnion heapifyUnion(final Memory mem, final long seed) { - final byte version = mem.getByte(SERIAL_VERSION_BYTE); + static ArrayOfDoublesUnion heapifyUnion(final MemorySegment seg, final long seed) { + final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); } - SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), mem.getByte(PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(mem.getByte(SKETCH_TYPE_BYTE), + SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); + SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesUnion); - final Memory sketchMem = mem.region(PREAMBLE_SIZE_BYTES, mem.getCapacity() - PREAMBLE_SIZE_BYTES); - final ArrayOfDoublesQuickSelectSketch sketch = new HeapArrayOfDoublesQuickSelectSketch(sketchMem, seed); - return new HeapArrayOfDoublesUnion(sketch, mem.getLong(THETA_LONG)); + final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); + final ArrayOfDoublesQuickSelectSketch sketch = new HeapArrayOfDoublesQuickSelectSketch(sketchSeg, seed); + return new HeapArrayOfDoublesUnion(sketch, seg.get(JAVA_LONG_UNALIGNED, THETA_LONG)); } } diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketch.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketch.java index 99ddeee16..66ba4f96e 100644 --- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketch.java @@ -21,8 +21,9 @@ import static org.apache.datasketches.tuple.Util.stringArrHash; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.UpdatableSketch; /** @@ -62,16 +63,16 @@ public ArrayOfStringsSketch(final int lgK, final ResizeFactor rf, final float p) } /** - * Constructs this sketch from a Memory image, which must be from an ArrayOfStringsSketch, and + * Constructs this sketch from a MemorySegment image, which must be from an ArrayOfStringsSketch, and * usually with data. - * @param mem the given Memory + * @param seg the given MemorySegment * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ @Deprecated - public ArrayOfStringsSketch(final Memory mem) { - super(mem, new ArrayOfStringsSummaryDeserializer(), new ArrayOfStringsSummaryFactory()); + public ArrayOfStringsSketch(final MemorySegment seg) { + super(seg, new ArrayOfStringsSummaryDeserializer(), new ArrayOfStringsSummaryFactory()); } /** diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java index 48b2d6d6a..12d587398 100644 --- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java @@ -19,15 +19,15 @@ package org.apache.datasketches.tuple.strings; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.tuple.Util.stringArrHash; import static org.apache.datasketches.tuple.Util.stringConcat; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Buffer; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableBuffer; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.tuple.UpdatableSummary; /** @@ -36,55 +36,76 @@ */ public final class ArrayOfStringsSummary implements UpdatableSummary { - private String[] nodesArr = null; + private String[] stringArr = null; ArrayOfStringsSummary() { //required for ArrayOfStringsSummaryFactory - nodesArr = null; + stringArr = null; } //Used by copy() and in test - ArrayOfStringsSummary(final String[] nodesArr) { - this.nodesArr = nodesArr.clone(); - checkNumNodes(nodesArr.length); + ArrayOfStringsSummary(final String[] stringArr) { + this.stringArr = stringArr.clone(); + checkNumNodes(stringArr.length); } - //used by fromMemory and in test - ArrayOfStringsSummary(final Memory mem) { - final Buffer buf = mem.asBuffer(); - final int totBytes = buf.getInt(); - checkInBytes(mem, totBytes); - final int nodes = buf.getByte(); + //used by fromMemorySegment and in test + /** + * This reads a MemorySegment that has a layout similar to the C struct: + * {@snippet : + * typedef struct { + * int totBytes; + * byte nodes; //number of Nodes. + * Node[nodes] = { Node[0], Node[1], ... } + * } + * } + * Where a Node has a layout similar to the C struct: + * {@snippet : + * typedef struct { + * int numBytes; + * byte[] byteArray; //UTF-8 byte array. Not null terminated. + * } + * } + * @param seg the MemorySegment containing the Summary data + */ + ArrayOfStringsSummary(final MemorySegment seg) { + int pos = 0; + final int totBytes = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES; + checkInBytes(seg, totBytes); + final int nodes = seg.get(JAVA_BYTE, pos); pos += Byte.BYTES; checkNumNodes(nodes); - final String[] nodesArr = new String[nodes]; + final String[] stringArr = new String[nodes]; for (int i = 0; i < nodes; i++) { - final int len = buf.getInt(); + final int len = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES; final byte[] byteArr = new byte[len]; - buf.getByteArray(byteArr, 0, len); - nodesArr[i] = new String(byteArr, UTF_8); + MemorySegment.copy(seg, JAVA_BYTE, pos, byteArr, 0, len); pos += len; + stringArr[i] = new String(byteArr, UTF_8); } - this.nodesArr = nodesArr; + assert pos == totBytes; + this.stringArr = stringArr; } @Override public ArrayOfStringsSummary copy() { - final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(nodesArr); + final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(stringArr); return nodes; } @Override public byte[] toByteArray() { - final ComputeBytes cb = new ComputeBytes(nodesArr); + final ComputeBytes cb = new ComputeBytes(stringArr); final int totBytes = cb.totBytes_; final byte[] out = new byte[totBytes]; - final WritableMemory wmem = WritableMemory.writableWrap(out); - final WritableBuffer wbuf = wmem.asWritableBuffer(); - wbuf.putInt(totBytes); - wbuf.putByte(cb.numNodes_); - for (int i = 0; i < cb.numNodes_; i++) { - wbuf.putInt(cb.nodeLengthsArr_[i]); - wbuf.putByteArray(cb.nodeBytesArr_[i], 0, cb.nodeLengthsArr_[i]); + final MemorySegment wseg = MemorySegment.ofArray(out); + int pos = 0; + wseg.set(JAVA_INT_UNALIGNED, pos, totBytes); pos += Integer.BYTES; + final int numNodes = cb.numNodes_; + wseg.set(JAVA_BYTE, pos, (byte)numNodes); pos += Byte.BYTES; + for (int i = 0; i < numNodes; i++) { + final int nodeLen = cb.nodeLengthsArr_[i]; + wseg.set(JAVA_INT_UNALIGNED, pos, nodeLen); pos += Integer.BYTES; + MemorySegment.copy(cb.nodeBytesArr_[i], 0, wseg, JAVA_BYTE, pos, nodeLen); pos += nodeLen; } - assert wbuf.getPosition() == totBytes; + assert pos == totBytes; return out; } @@ -92,8 +113,8 @@ public byte[] toByteArray() { @Override public ArrayOfStringsSummary update(final String[] value) { - if (nodesArr == null) { - nodesArr = value.clone(); + if (stringArr == null) { + stringArr = value.clone(); } return this; } @@ -102,7 +123,7 @@ public ArrayOfStringsSummary update(final String[] value) { @Override public int hashCode() { - return (int) stringArrHash(nodesArr); + return (int) stringArrHash(stringArr); } @Override @@ -110,8 +131,8 @@ public boolean equals(final Object summary) { if (summary == null || !(summary instanceof ArrayOfStringsSummary)) { return false; } - final String thatStr = stringConcat(((ArrayOfStringsSummary) summary).nodesArr); - final String thisStr = stringConcat(nodesArr); + final String thatStr = stringConcat(((ArrayOfStringsSummary) summary).stringArr); + final String thisStr = stringConcat(stringArr); return thisStr.equals(thatStr); } @@ -120,37 +141,40 @@ public boolean equals(final Object summary) { * @return the nodes array for this summary. */ public String[] getValue() { - return nodesArr.clone(); + return stringArr.clone(); } //also used in test static void checkNumNodes(final int numNodes) { - if (numNodes > 127) { - throw new SketchesArgumentException("Number of nodes cannot exceed 127."); + if (numNodes > 127 || numNodes < 0) { + throw new SketchesArgumentException("Number of nodes cannot exceed 127 or be negative."); } } //also used in test - static void checkInBytes(final Memory mem, final int totBytes) { - if (mem.getCapacity() < totBytes) { - throw new SketchesArgumentException("Incoming Memory has insufficient capacity."); + static void checkInBytes(final MemorySegment seg, final int totBytes) { + if (seg.byteSize() < totBytes) { + throw new SketchesArgumentException("Incoming MemorySegment has insufficient capacity."); } } + /** + * Computes total bytes and number of nodes from the given string array. + */ private static class ComputeBytes { final byte numNodes_; final int[] nodeLengthsArr_; final byte[][] nodeBytesArr_; final int totBytes_; - ComputeBytes(final String[] nodesArr) { - numNodes_ = (byte) nodesArr.length; + ComputeBytes(final String[] stringArr) { + numNodes_ = (byte) stringArr.length; checkNumNodes(numNodes_); nodeLengthsArr_ = new int[numNodes_]; nodeBytesArr_ = new byte[numNodes_][]; int sumNodeBytes = 0; for (int i = 0; i < numNodes_; i++) { - nodeBytesArr_[i] = nodesArr[i].getBytes(UTF_8); + nodeBytesArr_[i] = stringArr[i].getBytes(UTF_8); nodeLengthsArr_[i] = nodeBytesArr_[i].length; sumNodeBytes += nodeLengthsArr_[i]; } diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryDeserializer.java index 16467f338..5363605d4 100644 --- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryDeserializer.java @@ -19,7 +19,10 @@ package org.apache.datasketches.tuple.strings; -import org.apache.datasketches.memory.Memory; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.tuple.DeserializeResult; import org.apache.datasketches.tuple.SummaryDeserializer; @@ -30,18 +33,18 @@ public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer { @Override - public DeserializeResult heapifySummary(final Memory mem) { - return ArrayOfStringsSummaryDeserializer.fromMemory(mem); + public DeserializeResult heapifySummary(final MemorySegment seg) { + return ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); } /** * Also used in test. - * @param mem the given memory + * @param seg the given MemorySegment * @return the DeserializeResult */ - static DeserializeResult fromMemory(final Memory mem) { - final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(mem); - final int totBytes = mem.getInt(0); + static DeserializeResult fromMemorySegment(final MemorySegment seg) { + final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(seg); + final int totBytes = seg.get(JAVA_INT_UNALIGNED, 0); return new DeserializeResult<>(nsum, totBytes); } diff --git a/src/main/java/org/apache/datasketches/tuple2/AnotB.java b/src/main/java/org/apache/datasketches/tuple2/AnotB.java deleted file mode 100644 index 72a94dfc3..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/AnotB.java +++ /dev/null @@ -1,636 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon2.HashOperations.convertToHashTable; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; - -import java.lang.reflect.Method; -import java.util.Arrays; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.thetacommon2.SetOperationCornerCases; -import org.apache.datasketches.thetacommon2.SetOperationCornerCases.AnotbAction; -import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Computes a set difference, A-AND-NOT-B, of two generic tuple sketches. - * This class includes both stateful and stateless operations. - * - *

              The stateful operation is as follows:

              - *
              
              - * AnotB anotb = new AnotB();
              - *
              - * anotb.setA(Sketch skA); //The first argument.
              - * anotb.notB(Sketch skB); //The second (subtraction) argument.
              - * anotb.notB(Sketch skC); // ...any number of additional subtractions...
              - * anotb.getResult(false); //Get an interim result.
              - * anotb.notB(Sketch skD); //Additional subtractions.
              - * anotb.getResult(true);  //Final result and resets the AnotB operator.
              - * 
              - * - *

              The stateless operation is as follows:

              - *
              
              - * AnotB anotb = new AnotB();
              - *
              - * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
              - * 
              - * - *

              Calling the setA operation a second time essentially clears the internal state and loads - * the new sketch.

              - * - *

              The stateless and stateful operations are independent of each other.

              - * - * @param Type of Summary - * - * @author Lee Rhodes - */ -@SuppressFBWarnings(value = "DP_DO_INSIDE_DO_PRIVILEGED", justification = "Defer fix") -public final class AnotB { - private boolean empty_ = true; - private long thetaLong_ = Long.MAX_VALUE; - private long[] hashArr_ = null; //always in compact form, not necessarily sorted - private S[] summaryArr_ = null; //always in compact form, not necessarily sorted - private int curCount_ = 0; - - private static final Method GET_CACHE; - - static { - try { - GET_CACHE = org.apache.datasketches.theta2.Sketch.class.getDeclaredMethod("getCache"); - GET_CACHE.setAccessible(true); - } catch (final Exception e) { - throw new SketchesStateException("Could not reflect getCache(): " + e); - } - } - - /** - * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the - * first argument A of A-AND-NOT-B. This overwrites the internal state of this - * AnotB operator with the contents of the given sketch. - * This sets the stage for multiple following notB steps. - * - *

              An input argument of null will throw an exception.

              - * - *

              Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. - * That is distinctly different from the java null, which represents a nonexistent object. - * In most cases it is a programming error due to some object that was not properly initialized. - * With a null as the first argument, we cannot know what the user's intent is. - * Since it is very likely that a null is a programming error, we throw a an exception.

              - * - *

              An empty input argument will set the internal state to empty.

              - * - *

              Rationale: An empty set is a mathematically legal concept. Although it makes any subsequent, - * valid argument for B irrelevant, we must allow this and assume the user knows what they are - * doing.

              - * - *

              Performing {@link #getResult(boolean)} just after this step will return a compact form of - * the given argument.

              - * - * @param skA The incoming sketch for the first argument, A. - */ - public void setA(final Sketch skA) { - if (skA == null) { - reset(); - throw new SketchesArgumentException("The input argument A may not be null"); - } - - empty_ = skA.isEmpty(); - thetaLong_ = skA.getThetaLong(); - final DataArrays da = getCopyOfDataArraysTuple(skA); - summaryArr_ = da.summaryArr; //it may be null - hashArr_ = da.hashArr; //it may be null - curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; - } - - /** - * This is part of a multistep, stateful AnotB operation and sets the given Tuple sketch as the - * second (or n+1th) argument B of A-AND-NOT-B. - * Performs an AND NOT operation with the existing internal state of this AnotB operator. - * - *

              An input argument of null or empty is ignored.

              - * - *

              Rationale: A null for the second or following arguments is more tolerable because - * A NOT null is still A even if we don't know exactly what the null represents. It - * clearly does not have any content that overlaps with A. Also, because this can be part of - * a multistep operation with multiple notB steps. Other following steps can still produce - * a valid result.

              - * - *

              Use {@link #getResult(boolean)} to obtain the result.

              - * - * @param skB The incoming Tuple sketch for the second (or following) argument B. - */ - public void notB(final Sketch skB) { - if (skB == null) { return; } //ignore - - final long thetaLongB = skB.getThetaLong(); - final int countB = skB.getRetainedEntries(); - final boolean emptyB = skB.isEmpty(); - - final int id = - SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB); - final CornerCase cCase = CornerCase.caseIdToCornerCase(id); - final AnotbAction anotbAction = cCase.getAnotbAction(); - - switch (anotbAction) { - case EMPTY_1_0_T: { - reset(); - break; - } - case DEGEN_MIN_0_F: { - reset(); - thetaLong_ = min(thetaLong_, thetaLongB); - empty_ = false; - break; - } - case DEGEN_THA_0_F: { - empty_ = false; - curCount_ = 0; - //thetaLong_ is ok - break; - } - case TRIM_A: { - thetaLong_ = min(thetaLong_, thetaLongB); - final DataArrays da = trimAndCopyDataArrays(hashArr_, summaryArr_, thetaLong_, true); - hashArr_ = da.hashArr; - curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; - summaryArr_ = da.summaryArr; - //empty_ = is whatever SkA is, - break; - } - case SKETCH_A: { - break; //result is already in A - } - case FULL_ANOTB: { //both A and B should have valid entries. - thetaLong_ = min(thetaLong_, thetaLongB); - final DataArrays daR = getCopyOfResultArraysTuple(thetaLong_, curCount_, hashArr_, summaryArr_, skB); - hashArr_ = daR.hashArr; - curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; - summaryArr_ = daR.summaryArr; - //empty_ = is whatever SkA is, - } - //default: not possible - } - } - - /** - * This is part of a multistep, stateful AnotB operation and sets the given Theta sketch as the - * second (or n+1th) argument B of A-AND-NOT-B. - * Performs an AND NOT operation with the existing internal state of this AnotB operator. - * Calls to this method can be intermingled with calls to - * {@link #notB(org.apache.datasketches.theta2.Sketch)}. - * - *

              An input argument of null or empty is ignored.

              - * - *

              Rationale: A null for the second or following arguments is more tolerable because - * A NOT null is still A even if we don't know exactly what the null represents. It - * clearly does not have any content that overlaps with A. Also, because this can be part of - * a multistep operation with multiple notB steps. Other following steps can still produce - * a valid result.

              - * - *

              Use {@link #getResult(boolean)} to obtain the result.

              - * - * @param skB The incoming Theta sketch for the second (or following) argument B. - */ - public void notB(final org.apache.datasketches.theta2.Sketch skB) { - if (skB == null) { return; } //ignore - - final long thetaLongB = skB.getThetaLong(); - final int countB = skB.getRetainedEntries(); - final boolean emptyB = skB.isEmpty(); - - final int id = - SetOperationCornerCases.createCornerCaseId(thetaLong_, curCount_, empty_, thetaLongB, countB, emptyB); - final CornerCase cCase = CornerCase.caseIdToCornerCase(id); - final AnotbAction anotbAction = cCase.getAnotbAction(); - - switch (anotbAction) { - case EMPTY_1_0_T: { - reset(); - break; - } - case DEGEN_MIN_0_F: { - reset(); - thetaLong_ = min(thetaLong_, thetaLongB); - empty_ = false; - break; - } - case DEGEN_THA_0_F: { - empty_ = false; - curCount_ = 0; - //thetaLong_ is ok - break; - } - case TRIM_A: { - thetaLong_ = min(thetaLong_, thetaLongB); - final DataArrays da = trimAndCopyDataArrays(hashArr_, summaryArr_,thetaLong_, true); - hashArr_ = da.hashArr; - curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; - summaryArr_ = da.summaryArr; - break; - } - case SKETCH_A: { - break; //result is already in A - } - case FULL_ANOTB: { //both A and B should have valid entries. - thetaLong_ = min(thetaLong_, thetaLongB); - final DataArrays daB = getCopyOfResultArraysTheta(thetaLong_, curCount_, hashArr_, summaryArr_, skB); - hashArr_ = daB.hashArr; - curCount_ = (hashArr_ == null) ? 0 : hashArr_.length; - summaryArr_ = daB.summaryArr; - //empty_ = is whatever SkA is, - } - //default: not possible - } - } - - /** - * Gets the result of the multistep, stateful operation AnotB that have been executed with calls - * to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or - * {@link #notB(org.apache.datasketches.theta2.Sketch)}). - * - * @param reset If true, clears this operator to the empty state after this result is - * returned. Set this to false if you wish to obtain an intermediate result. - * @return the result of this operation as an unordered {@link CompactSketch}. - */ - public CompactSketch getResult(final boolean reset) { - final CompactSketch result; - if (curCount_ == 0) { - result = new CompactSketch<>(null, null, thetaLong_, thetaLong_ == Long.MAX_VALUE); - } else { - - result = new CompactSketch<>(hashArr_, Util.copySummaryArray(summaryArr_), thetaLong_, false); - } - if (reset) { reset(); } - return result; - } - - /** - * Returns the A-and-not-B set operation on the two given Tuple sketches. - * - *

              This a stateless operation and has no impact on the internal state of this operator. - * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)}, - * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta2.Sketch)}, and - * {@link #getResult(boolean)} methods.

              - * - *

              If either argument is null an exception is thrown.

              - * - *

              Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. - * That is distinctly different from the java null, which represents a nonexistent object. - * In most cases it is a programming error due to some object that was not properly initialized. - * With a null as the first argument, we cannot know what the user's intent is. - * With a null as the second argument, we can't ignore it as we must return a result and there is - * no following possible viable arguments for the second argument. - * Since it is very likely that a null is a programming error, we throw an exception.

              - * - * @param skA The incoming Tuple sketch for the first argument - * @param skB The incoming Tuple sketch for the second argument - * @param Type of Summary - * @return the result as an unordered {@link CompactSketch} - */ - @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", - justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase") - public static CompactSketch aNotB( - final Sketch skA, - final Sketch skB) { - if (skA == null || skB == null) { - throw new SketchesArgumentException("Neither argument may be null for this stateless operation."); - } - - final long thetaLongA = skA.getThetaLong(); - final int countA = skA.getRetainedEntries(); - final boolean emptyA = skA.isEmpty(); - - final long thetaLongB = skB.getThetaLong(); - final int countB = skB.getRetainedEntries(); - final boolean emptyB = skB.isEmpty(); - - final int id = - SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); - final CornerCase cCase = CornerCase.caseIdToCornerCase(id); - final AnotbAction anotbAction = cCase.getAnotbAction(); - - CompactSketch result = null; - - switch (anotbAction) { - case EMPTY_1_0_T: { - result = new CompactSketch<>(null, null, Long.MAX_VALUE, true); - break; - } - case DEGEN_MIN_0_F: { - final long thetaLong = min(thetaLongA, thetaLongB); - result = new CompactSketch<>(null, null, thetaLong, false); - break; - } - case DEGEN_THA_0_F: { - result = new CompactSketch<>(null, null, thetaLongA, false); - break; - } - case TRIM_A: { - final DataArrays daA = getCopyOfDataArraysTuple(skA); - final long[] hashArrA = daA.hashArr; - final S[] summaryArrA = daA.summaryArr; - final long minThetaLong = min(thetaLongA, thetaLongB); - final DataArrays da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false); - result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_); - break; - } - case SKETCH_A: { - final DataArrays daA = getCopyOfDataArraysTuple(skA); - result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_); - break; - } - case FULL_ANOTB: { //both A and B should have valid entries. - final DataArrays daA = getCopyOfDataArraysTuple(skA); - final long minThetaLong = min(thetaLongA, thetaLongB); - final DataArrays daR = - getCopyOfResultArraysTuple(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB); - final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length; - if (countR == 0) { - result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE); - } else { - result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false); - } - } - //default: not possible - } - return result; - } - - /** - * Returns the A-and-not-B set operation on a Tuple sketch and a Theta sketch. - * - *

              This a stateless operation and has no impact on the internal state of this operator. - * Thus, this is not an accumulating update and is independent of the {@link #setA(Sketch)}, - * {@link #notB(Sketch)}, {@link #notB(org.apache.datasketches.theta2.Sketch)}, and - * {@link #getResult(boolean)} methods.

              - * - *

              If either argument is null an exception is thrown.

              - * - *

              Rationale: In mathematics a "null set" is a set with no members, which we call an empty set. - * That is distinctly different from the java null, which represents a nonexistent object. - * In most cases it is a programming error due to some object that was not properly initialized. - * With a null as the first argument, we cannot know what the user's intent is. - * With a null as the second argument, we can't ignore it as we must return a result and there is - * no following possible viable arguments for the second argument. - * Since it is very likely that a null is a programming error for either argument - * we throw a an exception.

              - * - * @param skA The incoming Tuple sketch for the first argument - * @param skB The incoming Theta sketch for the second argument - * @param Type of Summary - * @return the result as an unordered {@link CompactSketch} - */ - @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", - justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase") - public static CompactSketch aNotB( - final Sketch skA, - final org.apache.datasketches.theta2.Sketch skB) { - if (skA == null || skB == null) { - throw new SketchesArgumentException("Neither argument may be null for this stateless operation."); - } - - final long thetaLongA = skA.getThetaLong(); - final int countA = skA.getRetainedEntries(); - final boolean emptyA = skA.isEmpty(); - - final long thetaLongB = skB.getThetaLong(); - final int countB = skB.getRetainedEntries(); - final boolean emptyB = skB.isEmpty(); - - final int id = - SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); - final CornerCase cCase = CornerCase.caseIdToCornerCase(id); - final AnotbAction anotbAction = cCase.getAnotbAction(); - - CompactSketch result = null; - - switch (anotbAction) { - case EMPTY_1_0_T: { - result = new CompactSketch<>(null, null, Long.MAX_VALUE, true); - break; - } - case DEGEN_MIN_0_F: { - final long thetaLong = min(thetaLongA, thetaLongB); - result = new CompactSketch<>(null, null, thetaLong, false); - break; - } - case DEGEN_THA_0_F: { - result = new CompactSketch<>(null, null, thetaLongA, false); - break; - } - case TRIM_A: { - final DataArrays daA = getCopyOfDataArraysTuple(skA); - final long[] hashArrA = daA.hashArr; - final S[] summaryArrA = daA.summaryArr; - final long minThetaLong = min(thetaLongA, thetaLongB); - final DataArrays da = trimAndCopyDataArrays(hashArrA, summaryArrA, minThetaLong, false); - result = new CompactSketch<>(da.hashArr, da.summaryArr, minThetaLong, skA.empty_); - break; - } - case SKETCH_A: { - final DataArrays daA = getCopyOfDataArraysTuple(skA); - result = new CompactSketch<>(daA.hashArr, daA.summaryArr, thetaLongA, skA.empty_); - break; - } - case FULL_ANOTB: { //both A and B have valid entries. - final DataArrays daA = getCopyOfDataArraysTuple(skA); - final long minThetaLong = min(thetaLongA, thetaLongB); - @SuppressFBWarnings(value = "UWF_FIELD_NOT_INITIALIZED_IN_CONSTRUCTOR", - justification = "hashArr and summaryArr are guaranteed to be valid due to the switch on CornerCase") - final DataArrays daR = - getCopyOfResultArraysTheta(minThetaLong, daA.hashArr.length, daA.hashArr, daA.summaryArr, skB); - final int countR = (daR.hashArr == null) ? 0 : daR.hashArr.length; - if (countR == 0) { - result = new CompactSketch<>(null, null, minThetaLong, minThetaLong == Long.MAX_VALUE); - } else { - result = new CompactSketch<>(daR.hashArr, daR.summaryArr, minThetaLong, false); - } - } - //default: not possible - } - return result; - } - - //restricted - - static class DataArrays { - DataArrays() {} - - long[] hashArr; - S[] summaryArr; - } - - private static DataArrays getCopyOfDataArraysTuple( - final Sketch sk) { - final CompactSketch csk; - final DataArrays da = new DataArrays<>(); - if (sk instanceof CompactSketch) { - csk = (CompactSketch) sk; - } else { - csk = ((QuickSelectSketch)sk).compact(); - } - final int count = csk.getRetainedEntries(); - if (count == 0) { - da.hashArr = null; - da.summaryArr = null; - } else { - da.hashArr = csk.getHashArr().clone(); //deep copy, may not be sorted - da.summaryArr = Util.copySummaryArray(csk.getSummaryArr()); - } - return da; - } - - @SuppressWarnings("unchecked") - //Both skA and skB must have entries (count > 0) - private static DataArrays getCopyOfResultArraysTuple( - final long minThetaLong, - final int countA, - final long[] hashArrA, - final S[] summaryArrA, - final Sketch skB) { - final DataArrays daR = new DataArrays<>(); - - //Rebuild/get hashtable of skB - final long[] hashTableB; - - if (skB instanceof CompactSketch) { - final CompactSketch cskB = (CompactSketch) skB; - final int countB = skB.getRetainedEntries(); - hashTableB = convertToHashTable(cskB.getHashArr(), countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD); - } else { - final QuickSelectSketch qskB = (QuickSelectSketch) skB; - hashTableB = qskB.getHashTable(); - } - - //build temporary arrays of skA - final long[] tmpHashArrA = new long[countA]; - final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA); - - //search for non matches and build temp arrays - final int lgHTBLen = exactLog2OfLong(hashTableB.length); - int nonMatches = 0; - for (int i = 0; i < countA; i++) { - final long hash = hashArrA[i]; - if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta - final int index = hashSearch(hashTableB, lgHTBLen, hash); - if (index == -1) { - tmpHashArrA[nonMatches] = hash; - tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy(); - nonMatches++; - } - } - } - daR.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches); - daR.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches); - return daR; - } - - @SuppressWarnings("unchecked") - private static DataArrays getCopyOfResultArraysTheta( - final long minThetaLong, - final int countA, - final long[] hashArrA, - final S[] summaryArrA, - final org.apache.datasketches.theta2.Sketch skB) { - final DataArrays daB = new DataArrays<>(); - - //Rebuild/get hashtable of skB - final long[] hashTableB; //read only - - final long[] hashCacheB; - try { hashCacheB = (long[])GET_CACHE.invoke(skB); - } catch (final Exception e) { throw new SketchesStateException("Reflection Exception " + e); } - - if (skB instanceof org.apache.datasketches.theta2.CompactSketch) { - final int countB = skB.getRetainedEntries(true); - hashTableB = convertToHashTable(hashCacheB, countB, minThetaLong, ThetaUtil.REBUILD_THRESHOLD); - } else { - hashTableB = hashCacheB; - } - - //build temporary result arrays of skA - final long[] tmpHashArrA = new long[countA]; - final S[] tmpSummaryArrA = Util.newSummaryArray(summaryArrA, countA); - - //search for non matches and build temp arrays - final int lgHTBLen = exactLog2OfLong(hashTableB.length); - int nonMatches = 0; - for (int i = 0; i < countA; i++) { - final long hash = hashArrA[i]; - if (hash != 0 && hash < minThetaLong) { //skips hashes of A >= minTheta - final int index = hashSearch(hashTableB, lgHTBLen, hash); - if (index == -1) { //not found - tmpHashArrA[nonMatches] = hash; - tmpSummaryArrA[nonMatches] = (S) summaryArrA[i].copy(); - nonMatches++; - } - } - } - //trim the arrays - daB.hashArr = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches); - daB.summaryArr = Arrays.copyOfRange(tmpSummaryArrA, 0, nonMatches); - return daB; - } - - @SuppressWarnings("unchecked") - private static DataArrays trimAndCopyDataArrays( - final long[] hashArr, - final S[] summaryArr, - final long minThetaLong, - final boolean copy) { - - //build temporary arrays - final int countIn = hashArr.length; - final long[] tmpHashArr = new long[countIn]; - final S[] tmpSummaryArr = Util.newSummaryArray(summaryArr, countIn); - int countResult = 0; - for (int i = 0; i < countIn; i++) { - final long hash = hashArr[i]; - if (hash < minThetaLong) { - tmpHashArr[countResult] = hash; - tmpSummaryArr[countResult] = (S) (copy ? summaryArr[i].copy() : summaryArr[i]); - countResult++; - } else { continue; } - } - //Remove empty slots - final DataArrays da = new DataArrays<>(); - da.hashArr = Arrays.copyOfRange(tmpHashArr, 0, countResult); - da.summaryArr = Arrays.copyOfRange(tmpSummaryArr, 0, countResult); - return da; - } - - /** - * Resets this operation back to the empty state. - */ - public void reset() { - empty_ = true; - thetaLong_ = Long.MAX_VALUE; - hashArr_ = null; - summaryArr_ = null; - curCount_ = 0; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java deleted file mode 100644 index 63d1b07ba..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/CompactSketch.java +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.thetacommon2.HashOperations.count; - -import java.lang.foreign.MemorySegment; -import java.lang.reflect.Array; -import java.nio.ByteOrder; - -import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * CompactSketches are never created directly. They are created as a result of - * the compact() method of an UpdatableSketch or as a result of the getResult() - * method of a set operation like Union, Intersection or AnotB. CompactSketch - * consists of a compact list (i.e. no intervening spaces) of hash values, - * corresponding list of Summaries, and a value for theta. The lists may or may - * not be ordered. CompactSketch is read-only. - * - * @param type of Summary - */ -public final class CompactSketch extends Sketch { - private static final byte serialVersionWithSummaryClassNameUID = 1; - private static final byte serialVersionUIDLegacy = 2; - private static final byte serialVersionUID = 3; - private static final short defaultSeedHash = (short) 37836; // for compatibility with C++ - private final long[] hashArr_; - private S[] summaryArr_; - - private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } - - private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED } - - /** - * Create a CompactSketch from correct components - * @param hashArr compacted hash array - * @param summaryArr compacted summary array - * @param thetaLong long value of theta - * @param empty empty flag - */ - CompactSketch(final long[] hashArr, final S[] summaryArr, final long thetaLong, final boolean empty) { - super(thetaLong, empty, null); - super.thetaLong_ = thetaLong; - super.empty_ = empty; - hashArr_ = hashArr; - summaryArr_ = summaryArr; - } - - /** - * This is to create an instance of a CompactSketch given a serialized form - * - * @param seg MemorySegment object with serialized CompactSketch - * @param deserializer the SummaryDeserializer - */ - CompactSketch(final MemorySegment seg, final SummaryDeserializer deserializer) { - super(Long.MAX_VALUE, true, null); - int offset = 0; - final byte preambleLongs = seg.get(JAVA_BYTE, offset++); - final byte version = seg.get(JAVA_BYTE, offset++); - final byte familyId = seg.get(JAVA_BYTE, offset++); - SerializerDeserializer.validateFamily(familyId, preambleLongs); - if (version > serialVersionUID) { - throw new SketchesArgumentException( - "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version); - } - SerializerDeserializer - .validateType(seg.get(JAVA_BYTE, offset++), SerializerDeserializer.SketchType.CompactSketch); - if (version <= serialVersionUIDLegacy) { // legacy serial format - final byte flags = seg.get(JAVA_BYTE, offset++); - final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Byte order mismatch"); - } - empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0; - final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0; - if (isThetaIncluded) { - thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset); - offset += Long.BYTES; - } else { - thetaLong_ = Long.MAX_VALUE; - } - final boolean hasEntries = (flags & 1 << FlagsLegacy.HAS_ENTRIES.ordinal()) > 0; - if (hasEntries) { - int classNameLength = 0; - if (version == serialVersionWithSummaryClassNameUID) { - classNameLength = seg.get(JAVA_BYTE, offset++); - } - final int count = seg.get(JAVA_INT_UNALIGNED, offset); - offset += Integer.BYTES; - if (version == serialVersionWithSummaryClassNameUID) { - offset += classNameLength; - } - hashArr_ = new long[count]; - - for (int i = 0; i < count; i++) { - hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset); - offset += Long.BYTES; - } - for (int i = 0; i < count; i++) { - offset += readSummary(seg, offset, i, count, deserializer); - } - } else { - hashArr_ = new long[0]; - summaryArr_ = null; - } - } else { // current serial format - offset++; //skip unused byte - final byte flags = seg.get(JAVA_BYTE, offset++); - offset += 2; //skip 2 unused bytes - empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - thetaLong_ = Long.MAX_VALUE; - int count = 0; - if (!empty_) { - if (preambleLongs == 1) { - count = 1; - } else { - count = seg.get(JAVA_INT_UNALIGNED, offset); - offset += Integer.BYTES; - offset += 4; // unused - if (preambleLongs > 2) { - thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset); - offset += Long.BYTES; - } - } - } - hashArr_ = new long[count]; - - for (int i = 0; i < count; i++) { - hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset); - offset += Long.BYTES; - offset += readSummary(seg, offset, i, count, deserializer); - } - } - } - - @SuppressWarnings({"unchecked"}) - private int readSummary(final MemorySegment seg, final int offset, final int i, final int count, - final SummaryDeserializer deserializer) { - final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset); - final DeserializeResult result = deserializer.heapifySummary(segRegion); - final S summary = result.getObject(); - final Class summaryType = (Class) result.getObject().getClass(); - if (summaryArr_ == null) { - summaryArr_ = (S[]) Array.newInstance(summaryType, count); - } - summaryArr_[i] = summary; - return result.getSize(); - } - - @Override - public CompactSketch compact() { - return this; - } - - long[] getHashArr() { - return hashArr_; - } - - S[] getSummaryArr() { - return summaryArr_; - } - - @Override - public int getRetainedEntries() { - return hashArr_ == null ? 0 : hashArr_.length; - } - - @Override - public int getCountLessThanThetaLong(final long thetaLong) { - return count(hashArr_, thetaLong); - } - - // Layout of first 8 bytes: - // Long || Start Byte Adr: - // Adr: - // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - // 0 || seed hash | Flags | unused | SkType | FamID | SerVer | Preamble_Longs | - @Override - public byte[] toByteArray() { - final int count = getRetainedEntries(); - final boolean isSingleItem = count == 1 && !isEstimationMode(); - final int preambleLongs = isEmpty() || isSingleItem ? 1 : isEstimationMode() ? 3 : 2; - - int summariesSizeBytes = 0; - final byte[][] summariesBytes = new byte[count][]; - if (count > 0) { - for (int i = 0; i < count; i++) { - summariesBytes[i] = summaryArr_[i].toByteArray(); - summariesSizeBytes += summariesBytes[i].length; - } - } - - final int sizeBytes = Long.BYTES * preambleLongs + Long.BYTES * count + summariesSizeBytes; - final byte[] bytes = new byte[sizeBytes]; - int offset = 0; - bytes[offset++] = (byte) preambleLongs; - bytes[offset++] = serialVersionUID; - bytes[offset++] = (byte) Family.TUPLE.getID(); - bytes[offset++] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal(); - offset++; // unused - bytes[offset++] = (byte) ( - (1 << Flags.IS_COMPACT.ordinal()) - | (1 << Flags.IS_READ_ONLY.ordinal()) - | (isEmpty() ? 1 << Flags.IS_EMPTY.ordinal() : 0) - ); - ByteArrayUtil.putShortLE(bytes, offset, defaultSeedHash); - offset += Short.BYTES; - if (!isEmpty()) { - if (!isSingleItem) { - ByteArrayUtil.putIntLE(bytes, offset, count); - offset += Integer.BYTES; - offset += 4; // unused - if (isEstimationMode()) { - ByteArrayUtil.putLongLE(bytes, offset, thetaLong_); - offset += Long.BYTES; - } - } - } - for (int i = 0; i < count; i++) { - ByteArrayUtil.putLongLE(bytes, offset, hashArr_[i]); - offset += Long.BYTES; - System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length); - offset += summariesBytes[i].length; - } - return bytes; - } - - @Override - public TupleSketchIterator iterator() { - return new TupleSketchIterator<>(hashArr_, summaryArr_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java b/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java deleted file mode 100644 index a5fc38c31..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/DeserializeResult.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * Returns an object and its size in bytes as a result of a deserialize operation - * @param Type of object - */ -public class DeserializeResult { - private final T object; - private final int size; - - /** - * Creates an instance. - * @param object Deserialized object. - * @param size Deserialized size in bytes. - */ - public DeserializeResult(final T object, final int size) { - this.object = object; - this.size = size; - } - - /** - * Returns Deserialized object - * @return Deserialized object - */ - public T getObject() { - return object; - } - - /** - * Returns size in bytes occupied by the object in the serialized form - * @return size in bytes occupied by the object in the serialized form - */ - public int getSize() { - return size; - } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Filter.java b/src/main/java/org/apache/datasketches/tuple2/Filter.java deleted file mode 100644 index dbd61b576..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Filter.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import java.lang.reflect.Array; -import java.util.Arrays; -import java.util.function.Predicate; - -/** - * Class for filtering entries from a {@link Sketch} given a {@link Summary} - * - * @param Summary type against which apply the {@link Predicate} - */ -public class Filter { - private final Predicate predicate; - - /** - * Filter constructor with a {@link Predicate} - * @param predicate Predicate to use in this filter. If the Predicate returns False, the - * element is discarded. If the Predicate returns True, then the element is kept in the - * {@link Sketch} - */ - public Filter(final Predicate predicate) { - this.predicate = predicate; - } - - /** - * Filters elements on the provided {@link Sketch} - * - * @param sketchIn The sketch against which apply the {@link Predicate} - * @return A new Sketch with some of the entries filtered out based on the {@link Predicate} - */ - @SuppressWarnings("unchecked") - public CompactSketch filter(final Sketch sketchIn) { - if (sketchIn == null) { - return new CompactSketch<>(null, null, Long.MAX_VALUE, true); - } - final long[] hashes = new long[sketchIn.getRetainedEntries()]; - T[] summaries = null; // lazy init to get class from the first entry - int i = 0; - final TupleSketchIterator it = sketchIn.iterator(); - while (it.next()) { - final T summary = it.getSummary(); - if (predicate.test(summary)) { - hashes[i] = it.getHash(); - if (summaries == null) { - summaries = (T[]) Array.newInstance(summary.getClass(), sketchIn.getRetainedEntries()); - } - summaries[i++] = (T) summary.copy(); - } - } - final boolean isEmpty = i == 0 && !sketchIn.isEstimationMode(); - if (i == 0) { - return new CompactSketch<>(null, null, sketchIn.getThetaLong(), isEmpty); - } - return new CompactSketch<>(Arrays.copyOf(hashes, i), Arrays.copyOf(summaries, i), sketchIn.getThetaLong(), isEmpty); - } -} - diff --git a/src/main/java/org/apache/datasketches/tuple2/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/HashTables.java deleted file mode 100644 index be9ad48e3..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/HashTables.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.Math.ceil; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; - -import java.lang.reflect.Array; - -import org.apache.datasketches.thetacommon2.ThetaUtil; - -@SuppressWarnings("unchecked") -class HashTables { - long[] hashTable = null; - S[] summaryTable = null; - int lgTableSize = 0; - int numKeys = 0; - - HashTables() { } - - //must have valid entries - void fromSketch(final Sketch sketch) { - numKeys = sketch.getRetainedEntries(); - lgTableSize = getLgTableSize(numKeys); - - hashTable = new long[1 << lgTableSize]; - final TupleSketchIterator it = sketch.iterator(); - while (it.next()) { - final long hash = it.getHash(); - final int index = hashInsertOnly(hashTable, lgTableSize, hash); - final S mySummary = (S)it.getSummary().copy(); - if (summaryTable == null) { - summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize); - } - summaryTable[index] = mySummary; - } - } - - //must have valid entries - void fromSketch(final org.apache.datasketches.theta2.Sketch sketch, final S summary) { - numKeys = sketch.getRetainedEntries(true); - lgTableSize = getLgTableSize(numKeys); - - hashTable = new long[1 << lgTableSize]; - final org.apache.datasketches.theta2.HashIterator it = sketch.iterator(); - while (it.next()) { - final long hash = it.get(); - final int index = hashInsertOnly(hashTable, lgTableSize, hash); - final S mySummary = (S)summary.copy(); - if (summaryTable == null) { - summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize); - } - summaryTable[index] = mySummary; - } - } - - private void fromArrays(final long[] hashArr, final S[] summaryArr, final int count) { - numKeys = count; - lgTableSize = getLgTableSize(count); - - summaryTable = null; - hashTable = new long[1 << lgTableSize]; - for (int i = 0; i < count; i++) { - final long hash = hashArr[i]; - final int index = hashInsertOnly(hashTable, lgTableSize, hash); - final S mySummary = summaryArr[i]; - if (summaryTable == null) { - summaryTable = (S[]) Array.newInstance(mySummary.getClass(), 1 << lgTableSize); - } - summaryTable[index] = summaryArr[i]; - } - } - - //For Tuple Sketches - HashTables getIntersectHashTables( - final Sketch nextTupleSketch, - final long thetaLong, - final SummarySetOperations summarySetOps) { - - //Match nextSketch data with local instance data, filtering by theta - final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries()); - final long[] matchHashArr = new long[maxMatchSize]; - final S[] matchSummariesArr = Util.newSummaryArray(summaryTable, maxMatchSize); - int matchCount = 0; - final TupleSketchIterator it = nextTupleSketch.iterator(); - - while (it.next()) { - final long hash = it.getHash(); - if (hash >= thetaLong) { continue; } - final int index = hashSearch(hashTable, lgTableSize, hash); - if (index < 0) { continue; } - //Copy the intersecting items from local hashTables_ - // sequentially into local matchHashArr_ and matchSummaries_ - matchHashArr[matchCount] = hash; - matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], it.getSummary()); - matchCount++; - } - final HashTables resultHT = new HashTables<>(); - resultHT.fromArrays(matchHashArr, matchSummariesArr, matchCount); - return resultHT; - } - - //For Theta Sketches - HashTables getIntersectHashTables( - final org.apache.datasketches.theta2.Sketch nextThetaSketch, - final long thetaLong, - final SummarySetOperations summarySetOps, - final S summary) { - - final Class summaryType = (Class) summary.getClass(); - - //Match nextSketch data with local instance data, filtering by theta - final int maxMatchSize = min(numKeys, nextThetaSketch.getRetainedEntries()); - final long[] matchHashArr = new long[maxMatchSize]; - final S[] matchSummariesArr = (S[]) Array.newInstance(summaryType, maxMatchSize); - int matchCount = 0; - final org.apache.datasketches.theta2.HashIterator it = nextThetaSketch.iterator(); - - //scan B & search A(hashTable) for match - while (it.next()) { - final long hash = it.get(); - if (hash >= thetaLong) { continue; } - final int index = hashSearch(hashTable, lgTableSize, hash); - if (index < 0) { continue; } - //Copy the intersecting items from local hashTables_ - // sequentially into local matchHashArr_ and matchSummaries_ - matchHashArr[matchCount] = hash; - matchSummariesArr[matchCount] = summarySetOps.intersection(summaryTable[index], summary); - matchCount++; - } - final HashTables resultHT = new HashTables<>(); - resultHT.fromArrays(matchHashArr, matchSummariesArr, matchCount); - return resultHT; - } - - void clear() { - hashTable = null; - summaryTable = null; - lgTableSize = 0; - numKeys = 0; - } - - static int getLgTableSize(final int count) { - final int tableSize = max(ceilingPowerOf2((int) ceil(count / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS); - return Integer.numberOfTrailingZeros(tableSize); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Intersection.java b/src/main/java/org/apache/datasketches/tuple2/Intersection.java deleted file mode 100644 index f3e0dde3c..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Intersection.java +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.Math.ceil; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.thetacommon2.ThetaUtil; - - -/** - * Computes an intersection of two or more generic tuple sketches or generic tuple sketches - * combined with theta sketches. - * A new instance represents the Universal Set. Because the Universal Set - * cannot be realized a getResult() on a new instance will produce an error. - * Every update() computes an intersection with the internal state, which will never - * grow larger and may be reduced to zero. - * - * @param Type of Summary - */ -@SuppressWarnings("unchecked") -public class Intersection { - private final SummarySetOperations summarySetOps_; - private boolean empty_; - private long thetaLong_; - private HashTables hashTables_; - private boolean firstCall_; - - /** - * Creates new Intersection instance with instructions on how to process two summaries that - * intersect. - * @param summarySetOps instance of SummarySetOperations - */ - public Intersection(final SummarySetOperations summarySetOps) { - summarySetOps_ = summarySetOps; - empty_ = false; // universal set at the start - thetaLong_ = Long.MAX_VALUE; - hashTables_ = new HashTables<>(); - firstCall_ = true; - } - - /** - * Perform a stateless intersect set operation on the two given tuple sketches and returns the - * result as an unordered CompactSketch on the heap. - * @param tupleSketchA The first sketch argument. It must not be null. - * @param tupleSketchB The second sketch argument. It must not be null. - * @return an unordered CompactSketch on the heap - */ - public CompactSketch intersect( - final Sketch tupleSketchA, - final Sketch tupleSketchB) { - reset(); - intersect(tupleSketchA); - intersect(tupleSketchB); - final CompactSketch csk = getResult(); - reset(); - return csk; - } - - /** - * Perform a stateless intersect set operation on a tuple sketch and a theta sketch and returns the - * result as an unordered CompactSketch on the heap. - * @param tupleSketch The first sketch argument. It must not be null. - * @param thetaSketch The second sketch argument. It must not be null. - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * This must not be null. - * @return an unordered CompactSketch on the heap - */ - public CompactSketch intersect( - final Sketch tupleSketch, - final org.apache.datasketches.theta2.Sketch - thetaSketch, final S summary) { - reset(); - intersect(tupleSketch); - intersect(thetaSketch, summary); - final CompactSketch csk = getResult(); - reset(); - return csk; - } - - /** - * Performs a stateful intersection of the internal set with the given tupleSketch. - * @param tupleSketch input sketch to intersect with the internal state. It must not be null. - */ - public void intersect(final Sketch tupleSketch) { - if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - - final boolean firstCall = firstCall_; - firstCall_ = false; - - // input sketch could be first or next call - - final boolean emptyIn = tupleSketch.isEmpty(); - if (empty_ || emptyIn) { //empty rule - //Whatever the current internal state, we make our local empty. - resetToEmpty(); - return; - } - - final long thetaLongIn = tupleSketch.getThetaLong(); - thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule - - if (tupleSketch.getRetainedEntries() == 0) { - hashTables_.clear(); - return; - } - // input sketch will have valid entries > 0 - - if (firstCall) { - //Copy firstSketch data into local instance hashTables_ - hashTables_.fromSketch(tupleSketch); - } - - //Next Call - else { - if (hashTables_.numKeys == 0) { return; } - //process intersect with current hashTables - hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, summarySetOps_); - } - } - - /** - * Performs a stateful intersection of the internal set with the given thetaSketch by combining entries - * using the hashes from the theta sketch and summary values from the given summary and rules - * from the summarySetOps defined by the Intersection constructor. - * @param thetaSketch input theta sketch to intersect with the internal state. It must not be null. - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * It will be copied for each matching index. It must not be null. - */ - public void intersect(final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) { - if (thetaSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - if (summary == null) { throw new SketchesArgumentException("Summary cannot be null."); } - final boolean firstCall = firstCall_; - firstCall_ = false; - // input sketch is not null, could be first or next call - - final boolean emptyIn = thetaSketch.isEmpty(); - if (empty_ || emptyIn) { //empty rule - //Whatever the current internal state, we make our local empty. - resetToEmpty(); - return; - } - - final long thetaLongIn = thetaSketch.getThetaLong(); - thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule - - final int countIn = thetaSketch.getRetainedEntries(); - if (countIn == 0) { - hashTables_.clear(); - return; - } - // input sketch will have valid entries > 0 - - if (firstCall) { - final org.apache.datasketches.theta2.Sketch firstSketch = thetaSketch; - //Copy firstSketch data into local instance hashTables_ - hashTables_.fromSketch(firstSketch, summary); - } - - //Next Call - else { - if (hashTables_.numKeys == 0) { return; } - hashTables_ = hashTables_.getIntersectHashTables(thetaSketch, thetaLongIn, summarySetOps_, summary); - } - } - - /** - * Gets the internal set as an unordered CompactSketch - * @return result of the intersections so far - */ - public CompactSketch getResult() { - if (firstCall_) { - throw new SketchesStateException( - "getResult() with no intervening intersections is not a legal result."); - } - final int countIn = hashTables_.numKeys; - if (countIn == 0) { - return new CompactSketch<>(null, null, thetaLong_, empty_); - } - - final int tableSize = hashTables_.hashTable.length; - - final long[] hashArr = new long[countIn]; - final S[] summaryArr = Util.newSummaryArray(hashTables_.summaryTable, countIn); - - //compact the arrays - int cnt = 0; - for (int i = 0; i < tableSize; i++) { - final long hash = hashTables_.hashTable[i]; - if (hash == 0 || hash > thetaLong_) { continue; } - hashArr[cnt] = hash; - summaryArr[cnt] = (S) hashTables_.summaryTable[i].copy(); - cnt++; - } - assert cnt == countIn; - return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_); - } - - /** - * Returns true if there is a valid intersection result available - * @return true if there is a valid intersection result available - */ - public boolean hasResult() { - return !firstCall_; - } - - /** - * Resets the internal set to the initial state, which represents the Universal Set - */ - public void reset() { - hardReset(); - } - - private void hardReset() { - empty_ = false; - thetaLong_ = Long.MAX_VALUE; - hashTables_.clear(); - firstCall_ = true; - } - - private void resetToEmpty() { - empty_ = true; - thetaLong_ = Long.MAX_VALUE; - hashTables_.clear(); - firstCall_ = false; - } - - static int getLgTableSize(final int count) { - final int tableSize = max(ceilingPowerOf2((int) ceil(count / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS); - return Integer.numberOfTrailingZeros(tableSize); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java deleted file mode 100644 index 79065453f..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/JaccardSimilarity.java +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.Math.max; -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA; -import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA; -import static org.apache.datasketches.thetacommon2.BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Jaccard similarity of two Tuple Sketches, or alternatively, of a Tuple and Theta Sketch. - * - *

              Note: only retained hash values are compared, and the Tuple summary values are not accounted for in the - * similarity measure.

              - * - * @author Lee Rhodes - * @author David Cromberge - */ -public final class JaccardSimilarity { - private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB - private static final double[] ONES = {1.0, 1.0, 1.0}; - - /** - * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index - * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each - * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are - * distinct from each other. A Jaccard of .95 means the overlap between the two - * populations is 95% of the union of the two populations. - * - *

              Note: For very large pairs of sketches, where the configured nominal entries of the sketches - * are 2^25 or 2^26, this method may produce unpredictable results. - * - * @param sketchA The first argument, a Tuple sketch with summary type S - * @param sketchB The second argument, a Tuple sketch with summary type S - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param Summary - * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index. - * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations. - */ - public static double[] jaccard( - final Sketch sketchA, - final Sketch sketchB, - final SummarySetOperations summarySetOps) { - //Corner case checks - if (sketchA == null || sketchB == null) { return ZEROS.clone(); } - if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); } - if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); } - - final int countA = sketchA.getRetainedEntries(); - final int countB = sketchB.getRetainedEntries(); - - //Create the Union - final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS; - final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS; - final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK); - final Union union = new Union<>(newK, summarySetOps); - union.union(sketchA); - union.union(sketchB); - - final Sketch unionAB = union.getResult(); - final long thetaLongUAB = unionAB.getThetaLong(); - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - final int countUAB = unionAB.getRetainedEntries(); - - //Check for identical data - if (countUAB == countA && countUAB == countB - && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { - return ONES.clone(); - } - - //Create the Intersection - final Intersection inter = new Intersection<>(summarySetOps); - inter.intersect(sketchA); - inter.intersect(sketchB); - inter.intersect(unionAB); //ensures that intersection is a subset of the union - final Sketch interABU = inter.getResult(); - - final double lb = getLowerBoundForBoverA(unionAB, interABU); - final double est = getEstimateOfBoverA(unionAB, interABU); - final double ub = getUpperBoundForBoverA(unionAB, interABU); - return new double[] {lb, est, ub}; - } - - /** - * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index - * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each - * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are - * distinct from each other. A Jaccard of .95 means the overlap between the two - * populations is 95% of the union of the two populations. - * - *

              Note: For very large pairs of sketches, where the configured nominal entries of the sketches - * are 2^25 or 2^26, this method may produce unpredictable results. - * - * @param sketchA The first argument, a Tuple sketch with summary type S - * @param sketchB The second argument, a Theta sketch - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * This may not be null. - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param Summary - * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index. - * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations. - */ - public static double[] jaccard( - final Sketch sketchA, - final org.apache.datasketches.theta2.Sketch sketchB, - final S summary, final SummarySetOperations summarySetOps) { - // Null case checks - if (summary == null) { - throw new SketchesArgumentException("Summary cannot be null."); } - - //Corner case checks - if (sketchA == null || sketchB == null) { return ZEROS.clone(); } - if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); } - if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); } - - final int countA = sketchA.getRetainedEntries(); - final int countB = sketchB.getRetainedEntries(true); - - //Create the Union - final int minK = 1 << ThetaUtil.MIN_LG_NOM_LONGS; - final int maxK = 1 << ThetaUtil.MAX_LG_NOM_LONGS; - final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK); - final Union union = new Union<>(newK, summarySetOps); - union.union(sketchA); - union.union(sketchB, summary); - - final Sketch unionAB = union.getResult(); - final long thetaLongUAB = unionAB.getThetaLong(); - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - final int countUAB = unionAB.getRetainedEntries(); - - //Check for identical data - if (countUAB == countA && countUAB == countB - && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { - return ONES.clone(); - } - - //Create the Intersection - final Intersection inter = new Intersection<>(summarySetOps); - inter.intersect(sketchA); - inter.intersect(sketchB, summary); - inter.intersect(unionAB); //ensures that intersection is a subset of the union - final Sketch interABU = inter.getResult(); - - final double lb = getLowerBoundForBoverA(unionAB, interABU); - final double est = getEstimateOfBoverA(unionAB, interABU); - final double ub = getUpperBoundForBoverA(unionAB, interABU); - return new double[] {lb, est, ub}; - } - - /** - * Returns true if the two given sketches have exactly the same hash values and the same - * theta values. Thus, they are equivalent. - * @param sketchA The first argument, a Tuple sketch with summary type S - * @param sketchB The second argument, a Tuple sketch with summary type S - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param Summary - * @return true if the two given sketches have exactly the same hash values and the same - * theta values. - */ - public static boolean exactlyEqual( - final Sketch sketchA, - final Sketch sketchB, - final SummarySetOperations summarySetOps) { - //Corner case checks - if (sketchA == null || sketchB == null) { return false; } - if (sketchA == sketchB) { return true; } - if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; } - if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; } - - final int countA = sketchA.getRetainedEntries(); - final int countB = sketchB.getRetainedEntries(); - - //Create the Union - final Union union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps); - union.union(sketchA); - union.union(sketchB); - final Sketch unionAB = union.getResult(); - final long thetaLongUAB = unionAB.getThetaLong(); - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - final int countUAB = unionAB.getRetainedEntries(); - - //Check for identical counts and thetas - if (countUAB == countA && countUAB == countB - && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { - return true; - } - return false; - } - - /** - * Returns true if the two given sketches have exactly the same hash values and the same - * theta values. Thus, they are equivalent. - * @param sketchA The first argument, a Tuple sketch with summary type S - * @param sketchB The second argument, a Theta sketch - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * This may not be null. - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param Summary - * @return true if the two given sketches have exactly the same hash values and the same - * theta values. - */ - public static boolean exactlyEqual( - final Sketch sketchA, - final org.apache.datasketches.theta2.Sketch sketchB, - final S summary, final SummarySetOperations summarySetOps) { - // Null case checks - if (summary == null) { - throw new SketchesArgumentException("Summary cannot be null."); } - - //Corner case checks - if (sketchA == null || sketchB == null) { return false; } - if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; } - if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; } - - final int countA = sketchA.getRetainedEntries(); - final int countB = sketchB.getRetainedEntries(true); - - //Create the Union - final Union union = new Union<>(ceilingPowerOf2(countA + countB), summarySetOps); - union.union(sketchA); - union.union(sketchB, summary); - final Sketch unionAB = union.getResult(); - final long thetaLongUAB = unionAB.getThetaLong(); - final long thetaLongA = sketchA.getThetaLong(); - final long thetaLongB = sketchB.getThetaLong(); - final int countUAB = unionAB.getRetainedEntries(); - - //Check for identical counts and thetas - if (countUAB == countA && countUAB == countB - && thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) { - return true; - } - return false; - } - - /** - * Tests similarity of a measured Sketch against an expected Sketch. - * Computes the lower bound of the Jaccard index JLB of the measured and - * expected sketches. - * if JLB ≥ threshold, then the sketches are considered to be - * similar with a confidence of 97.7%. - * - * @param measured a Tuple sketch with summary type S to be tested - * @param expected the reference Tuple sketch with summary type S that is considered to be correct. - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param threshold a real value between zero and one. - * @param Summary - * @return if true, the similarity of the two sketches is greater than the given threshold - * with at least 97.7% confidence. - */ - public static boolean similarityTest( - final Sketch measured, final Sketch expected, - final SummarySetOperations summarySetOps, - final double threshold) { - //index 0: the lower bound - //index 1: the mean estimate - //index 2: the upper bound - final double jRatioLB = jaccard(measured, expected, summarySetOps)[0]; //choosing the lower bound - return jRatioLB >= threshold; - } - - /** - * Tests similarity of a measured Sketch against an expected Sketch. - * Computes the lower bound of the Jaccard index JLB of the measured and - * expected sketches. - * if JLB ≥ threshold, then the sketches are considered to be - * similar with a confidence of 97.7%. - * - * @param measured a Tuple sketch with summary type S to be tested - * @param expected the reference Theta sketch that is considered to be correct. - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * This may not be null. - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param threshold a real value between zero and one. - * @param Summary - * @return if true, the similarity of the two sketches is greater than the given threshold - * with at least 97.7% confidence. - */ - public static boolean similarityTest( - final Sketch measured, final org.apache.datasketches.theta2.Sketch expected, - final S summary, final SummarySetOperations summarySetOps, - final double threshold) { - //index 0: the lower bound - //index 1: the mean estimate - //index 2: the upper bound - final double jRatioLB = jaccard(measured, expected, summary, summarySetOps)[0]; //choosing the lower bound - return jRatioLB >= threshold; - } - - /** - * Tests dissimilarity of a measured Sketch against an expected Sketch. - * Computes the upper bound of the Jaccard index JUB of the measured and - * expected sketches. - * if JUB ≤ threshold, then the sketches are considered to be - * dissimilar with a confidence of 97.7%. - * - * @param measured a Tuple sketch with summary type S to be tested - * @param expected the reference Theta sketch that is considered to be correct. - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param threshold a real value between zero and one. - * @param Summary - * @return if true, the dissimilarity of the two sketches is greater than the given threshold - * with at least 97.7% confidence. - */ - public static boolean dissimilarityTest( - final Sketch measured, final Sketch expected, - final SummarySetOperations summarySetOps, - final double threshold) { - //index 0: the lower bound - //index 1: the mean estimate - //index 2: the upper bound - final double jRatioUB = jaccard(measured, expected, summarySetOps)[2]; //choosing the upper bound - return jRatioUB <= threshold; - } - - /** - * Tests dissimilarity of a measured Sketch against an expected Sketch. - * Computes the upper bound of the Jaccard index JUB of the measured and - * expected sketches. - * if JUB ≤ threshold, then the sketches are considered to be - * dissimilar with a confidence of 97.7%. - * - * @param measured a Tuple sketch with summary type S to be tested - * @param expected the reference Theta sketch that is considered to be correct. - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * This may not be null. - * @param summarySetOps instance of SummarySetOperations used to unify or intersect summaries. - * @param threshold a real value between zero and one. - * @param Summary - * @return if true, the dissimilarity of the two sketches is greater than the given threshold - * with at least 97.7% confidence. - */ - public static boolean dissimilarityTest( - final Sketch measured, final org.apache.datasketches.theta2.Sketch expected, - final S summary, final SummarySetOperations summarySetOps, - final double threshold) { - //index 0: the lower bound - //index 1: the mean estimate - //index 2: the upper bound - final double jRatioUB = jaccard(measured, expected, summary, summarySetOps)[2]; //choosing the upper bound - return jRatioUB <= threshold; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java deleted file mode 100644 index 1babf6ab2..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/QuickSelectSketch.java +++ /dev/null @@ -1,621 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.common.Util.checkBounds; -import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon2.HashOperations.count; - -import java.lang.foreign.MemorySegment; -import java.lang.reflect.Array; -import java.nio.ByteOrder; -import java.util.Objects; - -import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.QuickSelect; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * A generic tuple sketch using the QuickSelect algorithm. - * - * @param type of Summary - */ -class QuickSelectSketch extends Sketch { - private static final byte serialVersionUID = 2; - - private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } - - private static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg(); - private final int nomEntries_; - private final int lgResizeFactor_; - private final float samplingProbability_; - private int lgCurrentCapacity_; - private int retEntries_; - private int rebuildThreshold_; - private long[] hashTable_; - S[] summaryTable_; - - /** - * This is to create a new instance of a QuickSelectSketch with default resize factor. - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * given value. - * @param summaryFactory An instance of a SummaryFactory. - */ - QuickSelectSketch( - final int nomEntries, - final SummaryFactory summaryFactory) { - this(nomEntries, DEFAULT_LG_RESIZE_FACTOR, summaryFactory); - } - - /** - * This is to create a new instance of a QuickSelectSketch with custom resize factor - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * given value. - * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: - *

              -   * 0 - no resizing (max size allocated),
              -   * 1 - double internal hash table each time it reaches a threshold
              -   * 2 - grow four times
              -   * 3 - grow eight times (default)
              -   * 
              - * @param summaryFactory An instance of a SummaryFactory. - */ - QuickSelectSketch( - final int nomEntries, - final int lgResizeFactor, - final SummaryFactory summaryFactory) { - this(nomEntries, lgResizeFactor, 1f, summaryFactory); - } - - /** - * This is to create a new instance of a QuickSelectSketch with custom resize factor and sampling - * probability - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * or equal to the given value. - * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: - *
              -   * 0 - no resizing (max size allocated),
              -   * 1 - double internal hash table each time it reaches a threshold
              -   * 2 - grow four times
              -   * 3 - grow eight times (default)
              -   * 
              - * @param samplingProbability the given sampling probability - * @param summaryFactory An instance of a SummaryFactory. - */ - QuickSelectSketch( - final int nomEntries, - final int lgResizeFactor, - final float samplingProbability, - final SummaryFactory summaryFactory) { - this( - nomEntries, - lgResizeFactor, - samplingProbability, - summaryFactory, - Util.getStartingCapacity(nomEntries, lgResizeFactor) - ); - } - - /** - * Target constructor for above constructors for a new instance. - * @param nomEntries Nominal number of entries. - * @param lgResizeFactor log2(resizeFactor) - * @param samplingProbability the given sampling probability - * @param summaryFactory An instance of a SummaryFactory. - * @param startingSize starting size of the sketch. - */ - private QuickSelectSketch( - final int nomEntries, - final int lgResizeFactor, - final float samplingProbability, - final SummaryFactory summaryFactory, - final int startingSize) { - super( - (long) (Long.MAX_VALUE * (double) samplingProbability), - true, - summaryFactory); - nomEntries_ = ceilingPowerOf2(nomEntries); - lgResizeFactor_ = lgResizeFactor; - samplingProbability_ = samplingProbability; - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingSize); - retEntries_ = 0; - hashTable_ = new long[startingSize]; //must be before setRebuildThreshold - rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); - summaryTable_ = null; // wait for the first summary to call Array.newInstance() - } - - /** - * Copy constructor - * @param sketch the QuickSelectSketch to be deep copied. - */ - QuickSelectSketch(final QuickSelectSketch sketch) { - super( - sketch.thetaLong_, - sketch.empty_, - sketch.summaryFactory_); - nomEntries_ = sketch.nomEntries_; - lgResizeFactor_ = sketch.lgResizeFactor_; - samplingProbability_ = sketch.samplingProbability_; - lgCurrentCapacity_ = sketch.lgCurrentCapacity_; - retEntries_ = sketch.retEntries_; - hashTable_ = sketch.hashTable_.clone(); - rebuildThreshold_ = sketch.rebuildThreshold_; - summaryTable_ = Util.copySummaryArray(sketch.summaryTable_); - } - - /** - * This is to create an instance of a QuickSelectSketch given a serialized form - * @param seg MemorySegment object with serialized QuickSelectSketch - * @param deserializer the SummaryDeserializer - * @param summaryFactory the SummaryFactory - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Heapifying a CompactSketch is not deprecated. - */ - @Deprecated - QuickSelectSketch( - final MemorySegment seg, - final SummaryDeserializer deserializer, - final SummaryFactory summaryFactory) { - this(new Validate<>(), seg, deserializer, summaryFactory); - } - - /* - * This private constructor is used to protect against "Finalizer attacks". - * The private static inner class Validate performs validation and deserialization - * from the input MemorySegment and may throw exceptions. In order to protect against the attack, we must - * perform this validation prior to the constructor's super reaches the Object class. - * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass. - * Using an empty final finalizer() is not recommended and is deprecated as of Java9. - */ - private QuickSelectSketch( - final Validate val, - final MemorySegment seg, - final SummaryDeserializer deserializer, - final SummaryFactory summaryFactory) { - super(val.validate(seg, deserializer), val.myEmpty, summaryFactory); - nomEntries_ = val.myNomEntries; - lgResizeFactor_ = val.myLgResizeFactor; - samplingProbability_ = val.mySamplingProbability; - lgCurrentCapacity_ = val.myLgCurrentCapacity; - retEntries_ = val.myRetEntries; - rebuildThreshold_ = val.myRebuildThreshold; - hashTable_ = val.myHashTable; - summaryTable_ = val.mySummaryTable; - } - - private static final class Validate { - //super fields - long myThetaLong; - boolean myEmpty; - //this fields - int myNomEntries; - int myLgResizeFactor; - float mySamplingProbability; - int myLgCurrentCapacity; - int myRetEntries; - int myRebuildThreshold; - long[] myHashTable; - S[] mySummaryTable; - - @SuppressWarnings("unchecked") - long validate( - final MemorySegment seg, - final SummaryDeserializer deserializer) { - Objects.requireNonNull(seg, "Source MemorySegment must not be null."); - Objects.requireNonNull(deserializer, "Deserializer must not be null."); - checkBounds(0, 8, seg.byteSize()); - - int offset = 0; - final byte preambleLongs = seg.get(JAVA_BYTE, offset++); //byte 0 PreLongs - final byte version = seg.get(JAVA_BYTE, offset++); //byte 1 SerVer - final byte familyId = seg.get(JAVA_BYTE, offset++); //byte 2 FamID - SerializerDeserializer.validateFamily(familyId, preambleLongs); - if (version > serialVersionUID) { - throw new SketchesArgumentException( - "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " - + version); - } - SerializerDeserializer.validateType(seg.get(JAVA_BYTE, offset++), //byte 3 - SerializerDeserializer.SketchType.QuickSelectSketch); - final byte flags = seg.get(JAVA_BYTE, offset++); //byte 4 - final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Endian byte order mismatch"); - } - myNomEntries = 1 << seg.get(JAVA_BYTE, offset++); //byte 5 - myLgCurrentCapacity = seg.get(JAVA_BYTE, offset++); //byte 6 - myLgResizeFactor = seg.get(JAVA_BYTE, offset++); //byte 7 - - checkBounds(0, preambleLongs * 8L, seg.byteSize()); - final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; - mySamplingProbability = isInSamplingMode ? seg.get(JAVA_FLOAT_UNALIGNED, offset) : 1f; //bytes 8 - 11 - if (isInSamplingMode) { - offset += Float.BYTES; - } - - final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; - if (isThetaIncluded) { - myThetaLong = seg.get(JAVA_LONG_UNALIGNED, offset); - offset += Long.BYTES; - } else { - myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability); - } - - int count = 0; - final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; - if (hasEntries) { - count = seg.get(JAVA_INT_UNALIGNED, offset); - offset += Integer.BYTES; - } - final int currentCapacity = 1 << myLgCurrentCapacity; - myHashTable = new long[currentCapacity]; - for (int i = 0; i < count; i++) { - final long hash = seg.get(JAVA_LONG_UNALIGNED, offset); - offset += Long.BYTES; - final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset); - final DeserializeResult summaryResult = deserializer.heapifySummary(segRegion); - final S summary = (S) summaryResult.getObject(); - offset += summaryResult.getSize(); - //in-place equivalent to insert(hash, summary): - final int index = HashOperations.hashInsertOnly(myHashTable, myLgCurrentCapacity, hash); - if (mySummaryTable == null) { - mySummaryTable = (S[]) Array.newInstance(summary.getClass(), myHashTable.length); - } - mySummaryTable[index] = summary; - myRetEntries++; - myEmpty = false; - } - myEmpty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - myRebuildThreshold = setRebuildThreshold(myHashTable, myNomEntries); - return myThetaLong; - } - - } //end class Validate - - /** - * @return a deep copy of this sketch - */ - QuickSelectSketch copy() { - return new QuickSelectSketch<>(this); - } - - long[] getHashTable() { - return hashTable_; - } - - @Override - public int getRetainedEntries() { - return retEntries_; - } - - @Override - public int getCountLessThanThetaLong(final long thetaLong) { - return count(hashTable_, thetaLong); - } - - S[] getSummaryTable() { - return summaryTable_; - } - - /** - * Get configured nominal number of entries - * @return nominal number of entries - */ - public int getNominalEntries() { - return nomEntries_; - } - - /** - * Get log_base2 of Nominal Entries - * @return log_base2 of Nominal Entries - */ - public int getLgK() { - return exactLog2OfLong(nomEntries_); - } - - /** - * Get configured sampling probability - * @return sampling probability - */ - public float getSamplingProbability() { - return samplingProbability_; - } - - /** - * Get current capacity - * @return current capacity - */ - public int getCurrentCapacity() { - return 1 << lgCurrentCapacity_; - } - - /** - * Get configured resize factor - * @return resize factor - */ - public ResizeFactor getResizeFactor() { - return ResizeFactor.getRF(lgResizeFactor_); - } - - /** - * Rebuilds reducing the actual number of entries to the nominal number of entries if needed - */ - public void trim() { - if (retEntries_ > nomEntries_) { - updateTheta(); - resize(hashTable_.length); - } - } - - /** - * Resets this sketch an empty state. - */ - public void reset() { - empty_ = true; - retEntries_ = 0; - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); - final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_); - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); - hashTable_ = new long[startingCapacity]; - summaryTable_ = null; // wait for the first summary to call Array.newInstance() - rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); - } - - /** - * Converts the current state of the sketch into a compact sketch - * @return compact sketch - */ - @Override - @SuppressWarnings("unchecked") - public CompactSketch compact() { - if (getRetainedEntries() == 0) { - if (empty_) { return new CompactSketch<>(null, null, Long.MAX_VALUE, true); } - return new CompactSketch<>(null, null, thetaLong_, false); - } - final long[] hashArr = new long[getRetainedEntries()]; - final S[] summaryArr = Util.newSummaryArray(summaryTable_, getRetainedEntries()); - int i = 0; - for (int j = 0; j < hashTable_.length; j++) { - if (summaryTable_[j] != null) { - hashArr[i] = hashTable_[j]; - summaryArr[i] = (S)summaryTable_[j].copy(); - i++; - } - } - return new CompactSketch<>(hashArr, summaryArr, thetaLong_, empty_); - } - - // Layout of first 8 bytes: - // Long || Start Byte Adr: - // Adr: - // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - // 0 || RF | lgArr | lgNom | Flags | SkType | FamID | SerVer | Preamble_Longs | - /** - * This serializes an UpdatableSketch (QuickSelectSketch). - * @return serialized representation of an UpdatableSketch (QuickSelectSketch). - * @deprecated As of 3.0.0, serializing an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Serializing a CompactSketch is not deprecated. - */ - @Deprecated - @Override - public byte[] toByteArray() { - byte[][] summariesBytes = null; - int summariesBytesLength = 0; - if (retEntries_ > 0) { - summariesBytes = new byte[retEntries_][]; - int i = 0; - for (int j = 0; j < summaryTable_.length; j++) { - if (summaryTable_[j] != null) { - summariesBytes[i] = summaryTable_[j].toByteArray(); - summariesBytesLength += summariesBytes[i].length; - i++; - } - } - } - int sizeBytes = - Byte.BYTES // preamble longs - + Byte.BYTES // serial version - + Byte.BYTES // family - + Byte.BYTES // sketch type - + Byte.BYTES // flags - + Byte.BYTES // log2(nomEntries) - + Byte.BYTES // log2(currentCapacity) - + Byte.BYTES; // log2(resizeFactor) - if (isInSamplingMode()) { - sizeBytes += Float.BYTES; // samplingProbability - } - final boolean isThetaIncluded = isInSamplingMode() - ? thetaLong_ < samplingProbability_ : thetaLong_ < Long.MAX_VALUE; - if (isThetaIncluded) { - sizeBytes += Long.BYTES; - } - if (retEntries_ > 0) { - sizeBytes += Integer.BYTES; // count - } - sizeBytes += Long.BYTES * retEntries_ + summariesBytesLength; - final byte[] bytes = new byte[sizeBytes]; - int offset = 0; - bytes[offset++] = PREAMBLE_LONGS; - bytes[offset++] = serialVersionUID; - bytes[offset++] = (byte) Family.TUPLE.getID(); - bytes[offset++] = (byte) SerializerDeserializer.SketchType.QuickSelectSketch.ordinal(); - final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - bytes[offset++] = (byte) ( - (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) - | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) - | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (retEntries_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) - | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0) - ); - bytes[offset++] = (byte) Integer.numberOfTrailingZeros(nomEntries_); - bytes[offset++] = (byte) lgCurrentCapacity_; - bytes[offset++] = (byte) lgResizeFactor_; - if (samplingProbability_ < 1f) { - ByteArrayUtil.putFloatLE(bytes, offset, samplingProbability_); - offset += Float.BYTES; - } - if (isThetaIncluded) { - ByteArrayUtil.putLongLE(bytes, offset, thetaLong_); - offset += Long.BYTES; - } - if (retEntries_ > 0) { - ByteArrayUtil.putIntLE(bytes, offset, retEntries_); - offset += Integer.BYTES; - } - if (retEntries_ > 0) { - int i = 0; - for (int j = 0; j < hashTable_.length; j++) { - if (summaryTable_[j] != null) { - ByteArrayUtil.putLongLE(bytes, offset, hashTable_[j]); - offset += Long.BYTES; - System.arraycopy(summariesBytes[i], 0, bytes, offset, summariesBytes[i].length); - offset += summariesBytes[i].length; - i++; - } - } - } - return bytes; - } - - // non-public methods below - - // this is a special back door insert for merging - // not sufficient by itself without keeping track of theta of another sketch - @SuppressWarnings("unchecked") - void merge(final long hash, final S summary, final SummarySetOperations summarySetOps) { - empty_ = false; - if (hash > 0 && hash < thetaLong_) { - final int index = findOrInsert(hash); - if (index < 0) { - insertSummary(~index, (S)summary.copy()); //did not find, so insert - } else { - insertSummary(index, summarySetOps.union(summaryTable_[index], (S) summary.copy())); - } - rebuildIfNeeded(); - } - } - - boolean isInSamplingMode() { - return samplingProbability_ < 1f; - } - - void setThetaLong(final long theta) { - thetaLong_ = theta; - } - - void setEmpty(final boolean value) { - empty_ = value; - } - - int findOrInsert(final long hash) { - final int index = HashOperations.hashSearchOrInsert(hashTable_, lgCurrentCapacity_, hash); - if (index < 0) { - retEntries_++; - } - return index; - } - - boolean rebuildIfNeeded() { - if (retEntries_ <= rebuildThreshold_) { - return false; - } - if (hashTable_.length > nomEntries_) { - updateTheta(); - rebuild(); - } else { - resize(hashTable_.length * (1 << lgResizeFactor_)); - } - return true; - } - - void rebuild() { - resize(hashTable_.length); - } - - void insert(final long hash, final S summary) { - final int index = HashOperations.hashInsertOnly(hashTable_, lgCurrentCapacity_, hash); - insertSummary(index, summary); - retEntries_++; - empty_ = false; - } - - private void updateTheta() { - final long[] hashArr = new long[retEntries_]; - int i = 0; - //Because of the association of the hashTable with the summaryTable we cannot destroy the - // hashTable structure. So we must copy. May as well compact at the same time. - // Might consider a whole table clone and use the selectExcludingZeros method instead. - // Not sure if there would be any speed advantage. - for (int j = 0; j < hashTable_.length; j++) { - if (summaryTable_[j] != null) { - hashArr[i++] = hashTable_[j]; - } - } - thetaLong_ = QuickSelect.select(hashArr, 0, retEntries_ - 1, nomEntries_); - } - - private void resize(final int newSize) { - final long[] oldHashTable = hashTable_; - final S[] oldSummaryTable = summaryTable_; - hashTable_ = new long[newSize]; - summaryTable_ = Util.newSummaryArray(summaryTable_, newSize); - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize); - retEntries_ = 0; - for (int i = 0; i < oldHashTable.length; i++) { - if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) { - insert(oldHashTable[i], oldSummaryTable[i]); - } - } - rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); - } - - private static int setRebuildThreshold(final long[] hashTable, final int nomEntries) { - if (hashTable.length > nomEntries) { - return (int) (hashTable.length * ThetaUtil.REBUILD_THRESHOLD); - } else { - return (int) (hashTable.length * ThetaUtil.RESIZE_THRESHOLD); - } - } - - @SuppressWarnings("unchecked") - protected void insertSummary(final int index, final S summary) { - if (summaryTable_ == null) { - summaryTable_ = (S[]) Array.newInstance(summary.getClass(), hashTable_.length); - } - summaryTable_[index] = summary; - } - - @Override - public TupleSketchIterator iterator() { - return new TupleSketchIterator<>(hashTable_, summaryTable_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java deleted file mode 100644 index cbe40ae4c..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/SerializerDeserializer.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; - -/** - * Multipurpose serializer-deserializer for a collection of sketches defined by the enum. - */ -public final class SerializerDeserializer { - - /** - * Defines the sketch classes that this SerializerDeserializer can handle. - */ - public static enum SketchType { - /** QuickSelectSketch */ - QuickSelectSketch, - /** CompactSketch */ - CompactSketch, - /** ArrayOfDoublesQuickSelectSketch */ - ArrayOfDoublesQuickSelectSketch, - /** ArrayOfDoublesCompactSketch */ - ArrayOfDoublesCompactSketch, - /** ArrayOfDoublesUnion */ - ArrayOfDoublesUnion } - - static final int TYPE_BYTE_OFFSET = 3; - - /** - * Validates the preamble-Longs value given the family ID - * @param familyId the given family ID - * @param preambleLongs the given preambleLongs value - */ - public static void validateFamily(final byte familyId, final byte preambleLongs) { - final Family family = Family.idToFamily(familyId); - if (family.equals(Family.TUPLE)) { - if (preambleLongs < Family.TUPLE.getMinPreLongs() || preambleLongs > Family.TUPLE.getMaxPreLongs()) { - throw new SketchesArgumentException( - "Possible corruption: Invalid PreambleLongs value for family TUPLE: " + preambleLongs); - } - } else { - throw new SketchesArgumentException( - "Possible corruption: Invalid Family: " + family.toString()); - } - } - - /** - * Validates the sketch type byte versus the expected value - * @param sketchTypeByte the given sketch type byte - * @param expectedType the expected value - */ - public static void validateType(final byte sketchTypeByte, final SketchType expectedType) { - final SketchType sketchType = getSketchType(sketchTypeByte); - if (!sketchType.equals(expectedType)) { - throw new SketchesArgumentException("Sketch Type mismatch. Expected " + expectedType.name() - + ", got " + sketchType.name()); - } - } - - /** - * Gets the sketch type byte from the given MemorySegment image - * @param seg the given MemorySegment image - * @return the SketchType - */ - public static SketchType getSketchType(final MemorySegment seg) { - final byte sketchTypeByte = seg.get(JAVA_BYTE, TYPE_BYTE_OFFSET); - return getSketchType(sketchTypeByte); - } - - private static SketchType getSketchType(final byte sketchTypeByte) { - if ((sketchTypeByte < 0) || (sketchTypeByte >= SketchType.values().length)) { - throw new SketchesArgumentException("Invalid Sketch Type " + sketchTypeByte); - } - return SketchType.values()[sketchTypeByte]; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketch.java b/src/main/java/org/apache/datasketches/tuple2/Sketch.java deleted file mode 100644 index fc85afb4c..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Sketch.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static org.apache.datasketches.common.Util.LS; - -import org.apache.datasketches.thetacommon2.BinomialBoundsN; - -/** - * This is an equivalent to org.apache.datasketches.theta2.Sketch with - * addition of a user-defined Summary object associated with every unique entry - * in the sketch. - * @param Type of Summary - */ -public abstract class Sketch { - - protected static final byte PREAMBLE_LONGS = 1; - - long thetaLong_; - boolean empty_ = true; - protected SummaryFactory summaryFactory_ = null; - - Sketch(final long thetaLong, final boolean empty, final SummaryFactory summaryFactory) { - this.thetaLong_ = thetaLong; - this.empty_ = empty; - this.summaryFactory_ = summaryFactory; - } - - /** - * Converts this sketch to a CompactSketch on the Java heap. - * - *

              If this sketch is already in compact form this operation returns this. - * - * @return this sketch as a CompactSketch on the Java heap. - */ - public abstract CompactSketch compact(); - - /** - * Estimates the cardinality of the set (number of unique values presented to the sketch) - * @return best estimate of the number of unique values - */ - public double getEstimate() { - if (!isEstimationMode()) { return getRetainedEntries(); } - return getRetainedEntries() / getTheta(); - } - - /** - * Gets the approximate upper error bound given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @return the upper bound. - */ - public double getUpperBound(final int numStdDev) { - if (!isEstimationMode()) { return getRetainedEntries(); } - return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, empty_); - } - - /** - * Gets the approximate lower error bound given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @return the lower bound. - */ - public double getLowerBound(final int numStdDev) { - if (!isEstimationMode()) { return getRetainedEntries(); } - return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, empty_); - } - - /** - * Gets the estimate of the true distinct population of subset tuples represented by the count - * of entries in a subset of the total retained entries of the sketch. - * @param numSubsetEntries number of entries for a chosen subset of the sketch. - * @return the estimate of the true distinct population of subset tuples represented by the count - * of entries in a subset of the total retained entries of the sketch. - */ - public double getEstimate(final int numSubsetEntries) { - if (!isEstimationMode()) { return numSubsetEntries; } - return numSubsetEntries / getTheta(); - } - - /** - * Gets the estimate of the lower bound of the true distinct population represented by the count - * of entries in a subset of the total retained entries of the sketch. - * @param numStdDev - * See Number of Standard Deviations - * @param numSubsetEntries number of entries for a chosen subset of the sketch. - * @return the estimate of the lower bound of the true distinct population represented by the count - * of entries in a subset of the total retained entries of the sketch. - */ - public double getLowerBound(final int numStdDev, final int numSubsetEntries) { - if (!isEstimationMode()) { return numSubsetEntries; } - return BinomialBoundsN.getLowerBound(numSubsetEntries, getTheta(), numStdDev, isEmpty()); - } - - /** - * Gets the estimate of the upper bound of the true distinct population represented by the count - * of entries in a subset of the total retained entries of the sketch. - * @param numStdDev - * See Number of Standard Deviations - * @param numSubsetEntries number of entries for a chosen subset of the sketch. - * @return the estimate of the upper bound of the true distinct population represented by the count - * of entries in a subset of the total retained entries of the sketch. - */ - public double getUpperBound(final int numStdDev, final int numSubsetEntries) { - if (!isEstimationMode()) { return numSubsetEntries; } - return BinomialBoundsN.getUpperBound(numSubsetEntries, getTheta(), numStdDev, isEmpty()); - } - - /** - * See Empty - * @return true if empty. - */ - public boolean isEmpty() { - return empty_; - } - - /** - * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode). - * This is true if theta < 1.0 AND isEmpty() is false. - * @return true if the sketch is in estimation mode. - */ - public boolean isEstimationMode() { - return thetaLong_ < Long.MAX_VALUE && !isEmpty(); - } - - /** - * Returns number of retained entries - * @return number of retained entries - */ - public abstract int getRetainedEntries(); - - /** - * Gets the number of hash values less than the given theta expressed as a long. - * @param thetaLong the given theta as a long between zero and Long.MAX_VALUE. - * @return the number of hash values less than the given thetaLong. - */ - public abstract int getCountLessThanThetaLong(final long thetaLong); - - /** - * Gets the Summary Factory class of type S - * @return the Summary Factory class of type S - */ - public SummaryFactory getSummaryFactory() { - return summaryFactory_; - } - - /** - * Gets the value of theta as a double between zero and one - * @return the value of theta as a double - */ - public double getTheta() { - return getThetaLong() / (double) Long.MAX_VALUE; - } - - /** - * This is to serialize a sketch instance to a byte array. - * - *

              As of 3.0.0, serializing an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Serializing a CompactSketch is not deprecated.

              - * @return serialized representation of the sketch - */ - public abstract byte[] toByteArray(); - - /** - * Returns a SketchIterator - * @return a SketchIterator - */ - public abstract TupleSketchIterator iterator(); - - /** - * Returns Theta as a long - * @return Theta as a long - */ - public long getThetaLong() { - return isEmpty() ? Long.MAX_VALUE : thetaLong_; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); - sb.append(" Estimate : ").append(getEstimate()).append(LS); - sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); - sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); - sb.append(" Theta (double) : ").append(this.getTheta()).append(LS); - sb.append(" Theta (long) : ").append(this.getThetaLong()).append(LS); - sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); - sb.append(" Empty? : ").append(isEmpty()).append(LS); - sb.append(" Retained Entries : ").append(this.getRetainedEntries()).append(LS); - if (this instanceof UpdatableSketch) { - @SuppressWarnings("rawtypes") - final UpdatableSketch updatable = (UpdatableSketch) this; - sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); - sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); - sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); - sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); - } - sb.append("### END SKETCH SUMMARY").append(LS); - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Sketches.java b/src/main/java/org/apache/datasketches/tuple2/Sketches.java deleted file mode 100644 index 1fdc545dc..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Sketches.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import java.lang.foreign.MemorySegment; - -/** - * Convenient static methods to instantiate generic tuple sketches. - */ -@SuppressWarnings("deprecation") -public final class Sketches { - - /** - * Creates an empty sketch. - * @param Type of Summary - * @return an empty instance of Sketch - */ - public static Sketch createEmptySketch() { - return new CompactSketch<>(null, null, Long.MAX_VALUE, true); - } - - /** - * Instantiate a Sketch from a given MemorySegment. - * @param Type of Summary - * @param seg MemorySegment object representing a Sketch - * @param deserializer instance of SummaryDeserializer - * @return Sketch created from its MemorySegment representation - */ - public static Sketch heapifySketch( - final MemorySegment seg, - final SummaryDeserializer deserializer) { - final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); - if (sketchType == SerializerDeserializer.SketchType.QuickSelectSketch) { - return new QuickSelectSketch<>(seg, deserializer, null); - } - return new CompactSketch<>(seg, deserializer); - } - - /** - * Instantiate UpdatableSketch from a given MemorySegment - * @param Type of update value - * @param Type of Summary - * @param seg MemorySegment object representing a Sketch - * @param deserializer instance of SummaryDeserializer - * @param summaryFactory instance of SummaryFactory - * @return Sketch created from its MemorySegment representation - */ - public static > UpdatableSketch heapifyUpdatableSketch( - final MemorySegment seg, - final SummaryDeserializer deserializer, - final SummaryFactory summaryFactory) { - return new UpdatableSketch<>(seg, deserializer, summaryFactory); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Summary.java b/src/main/java/org/apache/datasketches/tuple2/Summary.java deleted file mode 100644 index 8fb028401..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Summary.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * Interface for user-defined Summary, which is associated with every hash in a tuple sketch - */ -public interface Summary { - - /** - * Deep copy. - * - *

              Caution: This must implement a deep copy. - * - * @return deep copy of the Summary - */ - public Summary copy(); - - /** - * This is to serialize a Summary instance to a byte array. - * - *

              The user should encode in the byte array its total size, which is used during - * deserialization, especially if the Summary has variable sized elements. - * - * @return serialized representation of the Summary - */ - public byte[] toByteArray(); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java deleted file mode 100644 index 93d9f54d9..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/SummaryDeserializer.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import java.lang.foreign.MemorySegment; - -/** - * Interface for deserializing user-defined Summary - * @param type of Summary - */ -public interface SummaryDeserializer { - - /** - * This is to create an instance of a Summary given a serialized representation. - * The user may assume that the start of the given MemorySegment is the correct place to start - * deserializing. However, the user must be able to determine the number of bytes required to - * deserialize the summary as the capacity of the given MemorySegment may - * include multiple such summaries and may be much larger than required for a single summary. - * @param seg MemorySegment object with serialized representation of a Summary - * @return DeserializedResult object, which contains a Summary object and number of bytes read - * from the MemorySegment - */ - public DeserializeResult heapifySummary(MemorySegment seg); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java deleted file mode 100644 index ea229a26c..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/SummaryFactory.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * Interface for user-defined SummaryFactory - * @param type of Summary - */ -public interface SummaryFactory { - - /** - * Returns new instance of Summary - * @return new instance of Summary - */ - public S newSummary(); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java deleted file mode 100644 index b0df75dae..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/SummarySetOperations.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * This is to provide methods of producing unions and intersections of two Summary objects. - * @param type of Summary - */ -public interface SummarySetOperations { - - /** - * This is called by the union operator when both sketches have the same hash value. - * - *

              Caution: Do not modify the input Summary objects. Also do not return them directly, - * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is - * important to create a new Summary object with the correct contents to be returned. Do not - * return null summaries. - * - * @param a Summary from sketch A - * @param b Summary from sketch B - * @return union of Summary A and Summary B - */ - public S union(S a, S b); - - /** - * This is called by the intersection operator when both sketches have the same hash value. - * - *

              Caution: Do not modify the input Summary objects. Also do not return them directly, - * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is - * important to create a new Summary object with the correct contents to be returned. Do not - * return null summaries. - * - * @param a Summary from sketch A - * @param b Summary from sketch B - * @return intersection of Summary A and Summary B - */ - public S intersection(S a, S b); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java deleted file mode 100644 index a4faa6018..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/TupleSketchIterator.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * Iterator over a generic tuple sketch - * @param Type of Summary - */ -public class TupleSketchIterator { - - private final long[] hashArrTbl_; //could be either hashArr or hashTable - private final S[] summaryArrTbl_; //could be either summaryArr or summaryTable - private int i_; - - TupleSketchIterator(final long[] hashes, final S[] summaries) { - hashArrTbl_ = hashes; - summaryArrTbl_ = summaries; - i_ = -1; - } - - /** - * Advancing the iterator and checking existence of the next entry - * is combined here for efficiency. This results in an undefined - * state of the iterator before the first call of this method. - * @return true if the next element exists - */ - public boolean next() { - if (hashArrTbl_ == null) { return false; } - i_++; - while (i_ < hashArrTbl_.length) { - if (hashArrTbl_[i_] > 0) { return true; } - i_++; - } - return false; - } - - /** - * Gets the hash from the current entry in the sketch, which is a hash - * of the original key passed to update(). The original keys are not - * retained. Don't call this before calling next() for the first time - * or after getting false from next(). - * @return hash from the current entry - */ - public long getHash() { - return hashArrTbl_[i_]; - } - - /** - * Gets a Summary object from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return Summary object for the current entry (this is not a copy!) - */ - public S getSummary() { - return summaryArrTbl_[i_]; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Union.java b/src/main/java/org/apache/datasketches/tuple2/Union.java deleted file mode 100644 index 0beb8a29e..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Union.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.Math.min; - -import org.apache.datasketches.common.QuickSelect; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Compute the union of two or more generic tuple sketches or generic tuple sketches combined with - * theta sketches. A new instance represents an empty set. - * @param Type of Summary - */ -public class Union { - private final SummarySetOperations summarySetOps_; - private QuickSelectSketch qsk_; - private long unionThetaLong_; // need to maintain outside of the sketch - private boolean empty_; - - /** - * Creates new Union instance with instructions on how to process two summaries that - * overlap. This will have the default nominal entries (K). - * @param summarySetOps instance of SummarySetOperations - */ - public Union(final SummarySetOperations summarySetOps) { - this(ThetaUtil.DEFAULT_NOMINAL_ENTRIES, summarySetOps); - } - - /** - * Creates new Union instance. - * @param nomEntries nominal entries (K). Forced to the nearest power of 2 greater than - * given value. - * @param summarySetOps instance of SummarySetOperations - */ - public Union(final int nomEntries, final SummarySetOperations summarySetOps) { - summarySetOps_ = summarySetOps; - qsk_ = new QuickSelectSketch<>(nomEntries, null); - unionThetaLong_ = qsk_.getThetaLong(); - empty_ = true; - } - - /** - * Perform a stateless, pair-wise union operation between two tuple sketches. - * The returned sketch will be cut back to the smaller of the two k values if required. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param tupleSketchA The first argument - * @param tupleSketchB The second argument - * @return the result ordered CompactSketch on the heap. - */ - public CompactSketch union(final Sketch tupleSketchA, final Sketch tupleSketchB) { - reset(); - union(tupleSketchA); - union(tupleSketchB); - final CompactSketch csk = getResult(true); - return csk; - } - - /** - * Perform a stateless, pair-wise union operation between a tupleSketch and a thetaSketch. - * The returned sketch will be cut back to the smaller of the two k values if required. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param tupleSketch The first argument - * @param thetaSketch The second argument - * @param summary the given proxy summary for the theta sketch, which doesn't have one. - * This may not be null. - * @return the result ordered CompactSketch on the heap. - */ - public CompactSketch union(final Sketch tupleSketch, - final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) { - reset(); - union(tupleSketch); - union(thetaSketch, summary); - final CompactSketch csk = getResult(true); - return csk; - } - - /** - * Performs a stateful union of the internal set with the given tupleSketch. - * @param tupleSketch input tuple sketch to merge with the internal set. - * Nulls and empty sketches are ignored. - */ - public void union(final Sketch tupleSketch) { - if (tupleSketch == null || tupleSketch.isEmpty()) { return; } - empty_ = false; - unionThetaLong_ = min(tupleSketch.thetaLong_, unionThetaLong_); - final TupleSketchIterator it = tupleSketch.iterator(); - while (it.next()) { - qsk_.merge(it.getHash(), it.getSummary(), summarySetOps_); - } - unionThetaLong_ = min(unionThetaLong_, qsk_.thetaLong_); - } - - /** - * Performs a stateful union of the internal set with the given thetaSketch by combining entries - * using the hashes from the theta sketch and summary values from the given summary. - * @param thetaSketch the given theta sketch input. If null or empty, it is ignored. - * @param summary the given proxy summary for the theta sketch, which doesn't have one. This may - * not be null. - */ - public void union(final org.apache.datasketches.theta2.Sketch thetaSketch, final S summary) { - if (summary == null) { - throw new SketchesArgumentException("Summary cannot be null."); } - if (thetaSketch == null || thetaSketch.isEmpty()) { return; } - empty_ = false; - final long thetaIn = thetaSketch.getThetaLong(); - unionThetaLong_ = min(thetaIn, unionThetaLong_); - final org.apache.datasketches.theta2.HashIterator it = thetaSketch.iterator(); - while (it.next()) { - qsk_.merge(it.get(), summary, summarySetOps_); //copies summary - } - unionThetaLong_ = min(unionThetaLong_, qsk_.thetaLong_); - } - - /** - * Gets the result of a sequence of stateful union operations as an unordered CompactSketch - * @return result of the stateful unions so far. The state of this operation is not reset after the - * result is returned. - */ - public CompactSketch getResult() { - return getResult(false); - } - - /** - * Gets the result of a sequence of stateful union operations as an unordered CompactSketch. - * @param reset If true, clears this operator to the empty state after this result is - * returned. Set this to false if you wish to obtain an intermediate result. - * @return result of the stateful union - */ - @SuppressWarnings("unchecked") - public CompactSketch getResult(final boolean reset) { - final CompactSketch result; - if (empty_) { - result = qsk_.compact(); - } else if (unionThetaLong_ >= qsk_.thetaLong_ && qsk_.getRetainedEntries() <= qsk_.getNominalEntries()) { - //unionThetaLong_ >= qsk_.thetaLong_ means we can ignore unionThetaLong_. We don't need to rebuild. - //qsk_.getRetainedEntries() <= qsk_.getNominalEntries() means we don't need to pull back to k. - result = qsk_.compact(); - } else { - final long tmpThetaLong = min(unionThetaLong_, qsk_.thetaLong_); - - //count the number of valid hashes in because Alpha can have dirty values - int numHashesIn = 0; - TupleSketchIterator it = qsk_.iterator(); - while (it.next()) { //counts valid hashes - if (it.getHash() < tmpThetaLong) { numHashesIn++; } - } - - if (numHashesIn == 0) { - //numHashes == 0 && empty == false means Theta < 1.0 - //Therefore, this is a degenerate sketch: theta < 1.0, count = 0, empty = false - result = new CompactSketch<>(null, null, tmpThetaLong, empty_); - } - - else { - //we know: empty == false, count > 0 - final int numHashesOut; - final long thetaLongOut; - if (numHashesIn > qsk_.getNominalEntries()) { - //we need to trim hashes and need a new thetaLong - final long[] tmpHashArr = new long[numHashesIn]; // temporary, order will be destroyed by quick select - it = qsk_.iterator(); - int i = 0; - while (it.next()) { - final long hash = it.getHash(); - if (hash < tmpThetaLong) { tmpHashArr[i++] = hash; } - } - numHashesOut = qsk_.getNominalEntries(); - thetaLongOut = QuickSelect.select(tmpHashArr, 0, numHashesIn - 1, numHashesOut); - } else { - numHashesOut = numHashesIn; - thetaLongOut = tmpThetaLong; - } - //now prepare the output arrays - final long[] hashArr = new long[numHashesOut]; - final S[] summaries = Util.newSummaryArray(qsk_.getSummaryTable(), numHashesOut); - it = qsk_.iterator(); - int i = 0; - while (it.next()) { //select the qualifying hashes from the gadget synchronized with the summaries - final long hash = it.getHash(); - if (hash < thetaLongOut) { - hashArr[i] = hash; - summaries[i] = (S) it.getSummary().copy(); - i++; - } - } - result = new CompactSketch<>(hashArr, summaries, thetaLongOut, empty_); - } - } - if (reset) { reset(); } - return result; - } - - /** - * Resets the internal set to the initial state, which represents an empty set. This is only useful - * after sequences of stateful union operations. - */ - public void reset() { - qsk_.reset(); - unionThetaLong_ = qsk_.getThetaLong(); - empty_ = true; - } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java deleted file mode 100644 index b9a01c084..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketch.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; - -import org.apache.datasketches.hash.MurmurHash3; - -/** - * An extension of QuickSelectSketch<S>, which can be updated with many types of keys. - * Summary objects are created using a user-defined SummaryFactory class, - * which should allow very flexible parameterization if needed. - * Keys are presented to a sketch along with values of a user-defined - * update type U. When an entry is inserted into a sketch or a duplicate key is - * presented to a sketch then summary.update(U value) method will be called. So - * any kind of user-defined accumulation is possible. Summaries also must know - * how to copy themselves. Also union and intersection of summaries can be - * implemented in a sub-class of SummarySetOperations, which will be used in - * case Union or Intersection of two instances of Tuple Sketch is needed - * @param Type of the value, which is passed to update method of a Summary - * @param Type of the UpdatableSummary<U> - */ -public class UpdatableSketch> extends QuickSelectSketch { - - /** - * This is to create a new instance of an UpdatableQuickSelectSketch. - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * or equal to the given value. - * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: - *
              -   * 0 - no resizing (max size allocated),
              -   * 1 - double internal hash table each time it reaches a threshold
              -   * 2 - grow four times
              -   * 3 - grow eight times (default)
              -   * 
              - * @param samplingProbability - * See Sampling Probability - * @param summaryFactory An instance of a SummaryFactory. - */ - public UpdatableSketch(final int nomEntries, final int lgResizeFactor, - final float samplingProbability, final SummaryFactory summaryFactory) { - super(nomEntries, lgResizeFactor, samplingProbability, summaryFactory); - } - - /** - * This is to create an instance of a sketch given a serialized form - * @param srcSeg MemorySegment object with data of a serialized UpdatableSketch - * @param deserializer instance of SummaryDeserializer - * @param summaryFactory instance of SummaryFactory - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Heapifying a CompactSketch is not deprecated. - */ - @Deprecated - public UpdatableSketch( - final MemorySegment srcSeg, - final SummaryDeserializer deserializer, - final SummaryFactory summaryFactory) { - super(srcSeg, deserializer, summaryFactory); - } - - /** - * Copy Constructor - * @param sketch the sketch to copy - */ - public UpdatableSketch(final UpdatableSketch sketch) { - super(sketch); - } - - /** - * @return a deep copy of this sketch - */ - @Override - public UpdatableSketch copy() { - return new UpdatableSketch<>(this); - } - - /** - * Updates this sketch with a long key and U value. - * The value is passed to update() method of the Summary object associated with the key - * - * @param key The given long key - * @param value The given U value - */ - public void update(final long key, final U value) { - update(new long[] {key}, value); - } - - /** - * Updates this sketch with a double key and U value. - * The value is passed to update() method of the Summary object associated with the key - * - * @param key The given double key - * @param value The given U value - */ - public void update(final double key, final U value) { - update(Util.doubleToLongArray(key), value); - } - - /** - * Updates this sketch with a String key and U value. - * The value is passed to update() method of the Summary object associated with the key - * - * @param key The given String key - * @param value The given U value - */ - public void update(final String key, final U value) { - update(Util.stringToByteArray(key), value); - } - - /** - * Updates this sketch with a byte[] key and U value. - * The value is passed to update() method of the Summary object associated with the key - * - * @param key The given byte[] key - * @param value The given U value - */ - public void update(final byte[] key, final U value) { - if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); - } - - /** - * Updates this sketch with a ByteBuffer and U value - * The value is passed to the update() method of the Summary object associated with the key - * - * @param buffer The given ByteBuffer key - * @param value The given U value - */ - public void update(final ByteBuffer buffer, final U value) { - if (buffer == null || buffer.hasRemaining() == false) { return; } - insertOrIgnore(MurmurHash3.hash(buffer, DEFAULT_UPDATE_SEED)[0] >>> 1, value); - } - - /** - * Updates this sketch with a int[] key and U value. - * The value is passed to update() method of the Summary object associated with the key - * - * @param key The given int[] key - * @param value The given U value - */ - public void update(final int[] key, final U value) { - if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); - } - - /** - * Updates this sketch with a long[] key and U value. - * The value is passed to update() method of the Summary object associated with the key - * - * @param key The given long[] key - * @param value The given U value - */ - public void update(final long[] key, final U value) { - if ((key == null) || (key.length == 0)) { return; } - insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value); - } - - void insertOrIgnore(final long hash, final U value) { - setEmpty(false); - if (hash >= getThetaLong()) { return; } - int index = findOrInsert(hash); - if (index < 0) { - index = ~index; - insertSummary(index, getSummaryFactory().newSummary()); - } - summaryTable_[index].update(value); - rebuildIfNeeded(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java deleted file mode 100644 index de3dae88a..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/UpdatableSketchBuilder.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * For building a new generic tuple UpdatableSketch - * @param Type of update value - * @param Type of Summary - */ -public class UpdatableSketchBuilder> { - - private int nomEntries_; - private ResizeFactor resizeFactor_; - private float samplingProbability_; - private final SummaryFactory summaryFactory_; - - private static final float DEFAULT_SAMPLING_PROBABILITY = 1; - private static final ResizeFactor DEFAULT_RESIZE_FACTOR = ResizeFactor.X8; - - /** - * Creates an instance of UpdatableSketchBuilder with default parameters - * @param summaryFactory An instance of SummaryFactory. - */ - public UpdatableSketchBuilder(final SummaryFactory summaryFactory) { - nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES; - resizeFactor_ = DEFAULT_RESIZE_FACTOR; - samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; - summaryFactory_ = summaryFactory; - } - - /** - * This is to set the nominal number of entries. - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * or equal to the given value. - * @return this UpdatableSketchBuilder - */ - public UpdatableSketchBuilder setNominalEntries(final int nomEntries) { - nomEntries_ = 1 << ThetaUtil.checkNomLongs(nomEntries); - return this; - } - - /** - * This is to set the resize factor. - * Value of X1 means that the maximum capacity is allocated from the start. - * Default resize factor is X8. - * @param resizeFactor value of X1, X2, X4 or X8 - * @return this UpdatableSketchBuilder - */ - public UpdatableSketchBuilder setResizeFactor(final ResizeFactor resizeFactor) { - resizeFactor_ = resizeFactor; - return this; - } - - /** - * This is to set sampling probability. - * Default probability is 1. - * @param samplingProbability sampling probability from 0 to 1 - * @return this UpdatableSketchBuilder - */ - public UpdatableSketchBuilder setSamplingProbability(final float samplingProbability) { - if ((samplingProbability < 0) || (samplingProbability > 1f)) { - throw new SketchesArgumentException("sampling probability must be between 0 and 1"); - } - samplingProbability_ = samplingProbability; - return this; - } - - /** - * Returns an UpdatableSketch with the current configuration of this Builder. - * @return an UpdatableSketch - */ - public UpdatableSketch build() { - return new UpdatableSketch<>(nomEntries_, resizeFactor_.lg(), samplingProbability_, - summaryFactory_); - } - - /** - * Resets the Nominal Entries, Resize Factor and Sampling Probability to their default values. - * The assignment of U and S remain the same. - */ - public void reset() { - nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES; - resizeFactor_ = DEFAULT_RESIZE_FACTOR; - samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; - } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java deleted file mode 100644 index 2ec5df695..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/UpdatableSummary.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * Interface for updating user-defined Summary - * @param type of update value - */ -public interface UpdatableSummary extends Summary { - - /** - * This is to provide a method of updating summaries. - * This is primarily used internally. - * @param value update value - * @return this - */ - UpdatableSummary update(U value); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/Util.java b/src/main/java/org/apache/datasketches/tuple2/Util.java deleted file mode 100644 index 24f326b01..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/Util.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.hash.XxHash.hashCharArr; -import static org.apache.datasketches.hash.XxHash.hashString; - -import java.lang.reflect.Array; - -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Common utility functions for Tuples - */ -public final class Util { - private static final int PRIME = 0x7A3C_CA71; - - /** - * Converts a double to a long[]. - * @param value the given double value - * @return the long array - */ - public static final long[] doubleToLongArray(final double value) { - final double d = (value == 0.0) ? 0.0 : value; // canonicalize -0.0, 0.0 - final long[] array = { Double.doubleToLongBits(d) }; // canonicalize all NaN & +/- infinity forms - return array; - } - - /** - * Converts a String to a UTF_8 byte array. If the given value is either null or empty this - * method returns null. - * @param value the given String value - * @return the UTF_8 byte array - */ - public static final byte[] stringToByteArray(final String value) { - if ((value == null) || value.isEmpty()) { return null; } - return value.getBytes(UTF_8); - } - - /** - * Gets the starting capacity of a new sketch given the Nominal Entries and the log Resize Factor. - * @param nomEntries the given Nominal Entries - * @param lgResizeFactor the given log Resize Factor - * @return the starting capacity - */ - public static int getStartingCapacity(final int nomEntries, final int lgResizeFactor) { - return 1 << ThetaUtil.startingSubMultiple( - // target table size is twice the number of nominal entries - Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2), - lgResizeFactor, - ThetaUtil.MIN_LG_ARR_LONGS - ); - } - - /** - * Concatenate array of Strings to a single String. - * @param strArr the given String array - * @return the concatenated String - */ - public static String stringConcat(final String[] strArr) { - final int len = strArr.length; - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < len; i++) { - sb.append(strArr[i]); - if ((i + 1) < len) { sb.append(','); } - } - return sb.toString(); - } - - /** - * Returns the hash of the given string - * @param s the string to hash - * @return the hash of the given string - */ - public static long stringHash(final String s) { - return hashString(s, 0, s.length(), PRIME); - } - - /** - * Returns the hash of the concatenated strings - * @param strArray array of Strings - * @return the hash of concatenated strings. - */ - public static long stringArrHash(final String[] strArray) { - final String s = stringConcat(strArray); - return hashCharArr(s.toCharArray(), 0, s.length(), PRIME); - } - - /** - * Will copy compact summary arrays as well as hashed summary tables (with nulls). - * @param type of summary - * @param summaryArr the given summary array or table - * @return the copy - */ - @SuppressWarnings("unchecked") - public static S[] copySummaryArray(final S[] summaryArr) { - final int len = summaryArr.length; - final S[] tmpSummaryArr = newSummaryArray(summaryArr, len); - for (int i = 0; i < len; i++) { - final S summary = summaryArr[i]; - if (summary == null) { continue; } - tmpSummaryArr[i] = (S) summary.copy(); - } - return tmpSummaryArr; - } - - /** - * Creates a new Summary Array with the specified length - * @param summaryArr example array, only used to obtain the component type. It has no data. - * @param length the desired length of the returned array. - * @param the summary class type - * @return a new Summary Array with the specified length - */ - @SuppressWarnings("unchecked") - public static S[] newSummaryArray(final S[] summaryArr, final int length) { - final Class summaryType = (Class) summaryArr.getClass().getComponentType(); - final S[] tmpSummaryArr = (S[]) Array.newInstance(summaryType, length); - return tmpSummaryArr; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java deleted file mode 100644 index ee17bdd7b..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSketch.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.tuple2.UpdatableSketch; - -/** - * Extends UpdatableSketch<Double, DoubleSummary> - * @author Lee Rhodes - */ -public class DoubleSketch extends UpdatableSketch { - - /** - * Constructs this sketch with given lgK. - * @param lgK Log_base2 of Nominal Entries. - * See Nominal Entries - * @param mode The DoubleSummary mode to be used - */ - public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) { - this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode); - } - - /** - * Creates this sketch with the following parameters: - * @param lgK Log_base2 of Nominal Entries. - * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: - *
              -   * 0 - no resizing (max size allocated),
              -   * 1 - double internal hash table each time it reaches a threshold
              -   * 2 - grow four times
              -   * 3 - grow eight times (default)
              -   * 
              - * @param samplingProbability - * See Sampling Probability - * @param mode The DoubleSummary mode to be used - */ - public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplingProbability, - final DoubleSummary.Mode mode) { - super(1 << lgK, lgResizeFactor, samplingProbability, new DoubleSummaryFactory(mode)); - } - - /** - * Constructs this sketch from a MemorySegment image, which must be from an DoubleSketch, and - * usually with data. - * @param seg the given MemorySegment - * @param mode The DoubleSummary mode to be used - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Heapifying a CompactSketch is not deprecated. - */ - @Deprecated - public DoubleSketch(final MemorySegment seg, final DoubleSummary.Mode mode) { - super(seg, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); - } - - @Override - public void update(final String key, final Double value) { - super.update(key, value); - } - - @Override - public void update(final long key, final Double value) { - super.update(key, value); - } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java deleted file mode 100644 index 80e5fd2cd..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummary.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.tuple2.DeserializeResult; -import org.apache.datasketches.tuple2.UpdatableSummary; - -/** - * Summary for generic tuple sketches of type Double. - * This summary keeps a double value. On update a predefined operation is performed depending on - * the mode. - * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum. - */ -public final class DoubleSummary implements UpdatableSummary { - private double value_; - private final Mode mode_; - - /** - * The aggregation modes for this Summary - */ - public enum Mode { - - /** - * The aggregation mode is the summation function. - * - *

              New retained value = previous retained value + incoming value

              - */ - Sum, - - /** - * The aggregation mode is the minimum function. - * - *

              New retained value = min(previous retained value, incoming value)

              - */ - Min, - - /** - * The aggregation mode is the maximum function. - * - *

              New retained value = max(previous retained value, incoming value)

              - */ - Max, - - /** - * The aggregation mode is always one. - * - *

              New retained value = 1.0

              - */ - AlwaysOne - } - - /** - * Creates an instance of DoubleSummary with a given starting value and mode - * @param value starting value - * @param mode update mode - */ - private DoubleSummary(final double value, final Mode mode) { - value_ = value; - mode_ = mode; - } - - /** - * Creates an instance of DoubleSummary with a given mode. - * @param mode update mode - */ - public DoubleSummary(final Mode mode) { - mode_ = mode; - switch (mode) { - case Sum: - value_ = 0; - break; - case Min: - value_ = Double.POSITIVE_INFINITY; - break; - case Max: - value_ = Double.NEGATIVE_INFINITY; - break; - case AlwaysOne: - value_ = 1.0; - break; - } - } - - @Override - public DoubleSummary update(final Double value) { - switch (mode_) { - case Sum: - value_ += value; - break; - case Min: - if (value < value_) { value_ = value; } - break; - case Max: - if (value > value_) { value_ = value; } - break; - case AlwaysOne: - value_ = 1.0; - break; - } - return this; - } - - @Override - public DoubleSummary copy() { - return new DoubleSummary(value_, mode_); - } - - /** - * Returns current value of the DoubleSummary - * @return current value of the DoubleSummary - */ - public double getValue() { - return value_; - } - - private static final int SERIALIZED_SIZE_BYTES = 9; - private static final int VALUE_INDEX = 0; - private static final int MODE_BYTE_INDEX = 8; - - @Override - public byte[] toByteArray() { - final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; - ByteArrayUtil.putDoubleLE(bytes, VALUE_INDEX, value_); - bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal(); - return bytes; - } - - /** - * Creates an instance of the DoubleSummary given a serialized representation - * @param seg MemorySegment object with serialized DoubleSummary - * @return DeserializedResult object, which contains a DoubleSummary object and number of bytes - * read from the MemorySegment - */ - public static DeserializeResult fromMemorySegment(final MemorySegment seg) { - return new DeserializeResult<>(new DoubleSummary(seg.get(JAVA_DOUBLE_UNALIGNED, VALUE_INDEX), - Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java deleted file mode 100644 index 95b86002a..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryDeserializer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.tuple2.DeserializeResult; -import org.apache.datasketches.tuple2.SummaryDeserializer; - -/** - * Implements SummaryDeserializer<DoubleSummary> - * @author Lee Rhodes - */ -public class DoubleSummaryDeserializer implements SummaryDeserializer { - - @Override - public DeserializeResult heapifySummary(final MemorySegment seg) { - return DoubleSummary.fromMemorySegment(seg); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java deleted file mode 100644 index 18fa33fe1..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummaryFactory.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import org.apache.datasketches.tuple2.SummaryFactory; - -/** - * Factory for DoubleSummary. - * - * @author Lee Rhodes - */ -public final class DoubleSummaryFactory implements SummaryFactory { - - private final DoubleSummary.Mode summaryMode_; - - /** - * Creates an instance of DoubleSummaryFactory with a given mode - * @param summaryMode summary mode - */ - public DoubleSummaryFactory(final DoubleSummary.Mode summaryMode) { - summaryMode_ = summaryMode; - } - - @Override - public DoubleSummary newSummary() { - return new DoubleSummary(summaryMode_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java deleted file mode 100644 index 7bad24567..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/DoubleSummarySetOperations.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import org.apache.datasketches.tuple2.SummarySetOperations; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; - -/** - * Methods for defining how unions and intersections of two objects of type DoubleSummary - * are performed. - */ -public final class DoubleSummarySetOperations implements SummarySetOperations { - - private final Mode unionSummaryMode_; - - /** - * Intersection is not well defined or even meaningful between numeric values. - * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes. - */ - private final Mode intersectionSummaryMode_; - - /** - * Creates an instance with default mode of sum for both union and intersection. - * This exists for backward compatibility. - */ - public DoubleSummarySetOperations() { - unionSummaryMode_ = DoubleSummary.Mode.Sum; - intersectionSummaryMode_ = DoubleSummary.Mode.Sum; - } - - /** - * Creates an instance given a DoubleSummary update mode where the mode is the same for both - * union and intersection. This exists for backward compatibility. - * @param summaryMode DoubleSummary update mode. - */ - public DoubleSummarySetOperations(final Mode summaryMode) { - unionSummaryMode_ = summaryMode; - intersectionSummaryMode_ = summaryMode; - } - - /** - * Creates an instance with two modes. - * @param unionSummaryMode for unions - * @param intersectionSummaryMode for intersections - */ - public DoubleSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) { - unionSummaryMode_ = unionSummaryMode; - intersectionSummaryMode_ = intersectionSummaryMode; - } - - @Override - public DoubleSummary union(final DoubleSummary a, final DoubleSummary b) { - final DoubleSummary result = new DoubleSummary(unionSummaryMode_); - result.update(a.getValue()); - result.update(b.getValue()); - return result; - } - - @Override - public DoubleSummary intersection(final DoubleSummary a, final DoubleSummary b) { - final DoubleSummary result = new DoubleSummary(intersectionSummaryMode_); - result.update(a.getValue()); - result.update(b.getValue()); - return result; - } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java b/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java deleted file mode 100644 index c72f3df00..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/adouble/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * This package is for a generic implementation of the Tuple sketch for single Double value. - */ -package org.apache.datasketches.tuple2.adouble; diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java deleted file mode 100644 index a344f5ef2..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSketch.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.tuple2.UpdatableSketch; - -/** - * Extends UpdatableSketch<Integer, IntegerSummary> - * @author Lee Rhodes - */ -public class IntegerSketch extends UpdatableSketch { - - /** - * Constructs this sketch with given lgK. - * @param lgK Log_base2 of Nominal Entries. - * See Nominal Entries - * @param mode The IntegerSummary mode to be used - */ - public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) { - this(lgK, ResizeFactor.X8.ordinal(), 1.0F, mode); - } - - /** - * Creates this sketch with the following parameters: - * @param lgK Log_base2 of Nominal Entries. - * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: - *
              -   * 0 - no resizing (max size allocated),
              -   * 1 - double internal hash table each time it reaches a threshold
              -   * 2 - grow four times
              -   * 3 - grow eight times (default)
              -   * 
              - * @param samplingProbability - * See Sampling Probability - * @param mode The IntegerSummary mode to be used - */ - public IntegerSketch(final int lgK, final int lgResizeFactor, final float samplingProbability, - final IntegerSummary.Mode mode) { - super(1 << lgK, lgResizeFactor, samplingProbability, new IntegerSummaryFactory(mode)); - } - - /** - * Constructs this sketch from a MemorySegment image, which must be from an IntegerSketch, and - * usually with data. - * @param seg the given MemorySegment - * @param mode The IntegerSummary mode to be used - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Heapifying a CompactSketch is not deprecated. - */ - @Deprecated - public IntegerSketch(final MemorySegment seg, final IntegerSummary.Mode mode) { - super(seg, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode)); - } - - @Override - public void update(final String key, final Integer value) { - super.update(key, value); - } - - @Override - public void update(final long key, final Integer value) { - super.update(key, value); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java deleted file mode 100644 index 047a8fc58..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummary.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.tuple2.DeserializeResult; -import org.apache.datasketches.tuple2.UpdatableSummary; - -/** - * Summary for generic tuple sketches of type Integer. - * This summary keeps an Integer value. On update a predefined operation is performed depending on - * the mode. - * Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum. - */ -public class IntegerSummary implements UpdatableSummary { - private int value_; - private final Mode mode_; - - /** - * The aggregation modes for this Summary - */ - public enum Mode { - - /** - * The aggregation mode is the summation function. - * - *

              New retained value = previous retained value + incoming value

              - */ - Sum, - - /** - * The aggregation mode is the minimum function. - * - *

              New retained value = min(previous retained value, incoming value)

              - */ - Min, - - /** - * The aggregation mode is the maximum function. - * - *

              New retained value = max(previous retained value, incoming value)

              - */ - Max, - - /** - * The aggregation mode is always one. - * - *

              New retained value = 1

              - */ - AlwaysOne - } - - /** - * Creates an instance of IntegerSummary with a given starting value and mode. - * @param value starting value - * @param mode update mode - */ - private IntegerSummary(final int value, final Mode mode) { - value_ = value; - mode_ = mode; - } - - /** - * Creates an instance of IntegerSummary with a given mode. - * @param mode update mode. This should not be called by a user. - */ - public IntegerSummary(final Mode mode) { - mode_ = mode; - switch (mode) { - case Sum: - value_ = 0; - break; - case Min: - value_ = Integer.MAX_VALUE; - break; - case Max: - value_ = Integer.MIN_VALUE; - break; - case AlwaysOne: - value_ = 1; - break; - } - } - - @Override - public IntegerSummary update(final Integer value) { - switch (mode_) { - case Sum: - value_ += value; - break; - case Min: - if (value < value_) { value_ = value; } - break; - case Max: - if (value > value_) { value_ = value; } - break; - case AlwaysOne: - value_ = 1; - break; - } - return this; - } - - @Override - public IntegerSummary copy() { - return new IntegerSummary(value_, mode_); - } - - /** - * Returns the current value of the IntegerSummary - * @return current value of the IntegerSummary - */ - public int getValue() { - return value_; - } - - private static final int SERIALIZED_SIZE_BYTES = 5; - private static final int VALUE_INDEX = 0; - private static final int MODE_BYTE_INDEX = 4; - - @Override - public byte[] toByteArray() { - final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; - ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_); - bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal(); - return bytes; - } - - /** - * Creates an instance of the IntegerSummary given a serialized representation - * @param seg MemorySegment object with serialized IntegerSummary - * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes - * read from the MemorySegment - */ - public static DeserializeResult fromMemorySegment(final MemorySegment seg) { - return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX), - Mode.values()[seg.get(JAVA_BYTE, MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java deleted file mode 100644 index 0b7387e60..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryDeserializer.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.tuple2.DeserializeResult; -import org.apache.datasketches.tuple2.SummaryDeserializer; - -/** - * Implements SummaryDeserializer<IntegerSummary> - * @author Lee Rhodes - */ -public class IntegerSummaryDeserializer implements SummaryDeserializer { - - @Override - public DeserializeResult heapifySummary(final MemorySegment seg) { - return IntegerSummary.fromMemorySegment(seg); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java deleted file mode 100644 index 97bf9df24..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummaryFactory.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import org.apache.datasketches.tuple2.SummaryFactory; - -/** - * Factory for IntegerSummary. - * - * @author Lee Rhodes - */ -public class IntegerSummaryFactory implements SummaryFactory { - - private final IntegerSummary.Mode summaryMode_; - - /** - * Creates an instance of IntegerSummaryFactory with a given mode - * @param summaryMode summary mode - */ - public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) { - summaryMode_ = summaryMode; - } - - @Override - public IntegerSummary newSummary() { - return new IntegerSummary(summaryMode_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java deleted file mode 100644 index 5e5555d22..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/IntegerSummarySetOperations.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import org.apache.datasketches.tuple2.SummarySetOperations; -import org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode; - -/** - * Methods for defining how unions and intersections of two objects of type IntegerSummary - * are performed. - * - * @author Lee Rhodes - */ -public class IntegerSummarySetOperations implements SummarySetOperations { - - private final Mode unionSummaryMode_; - - /** - * Intersection is not well defined or even meaningful between numeric values. - * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes. - */ - private final Mode intersectionSummaryMode_; - - /** - * Creates a new instance with two modes - * @param unionSummaryMode for unions - * @param intersectionSummaryMode for intersections - */ - public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) { - unionSummaryMode_ = unionSummaryMode; - intersectionSummaryMode_ = intersectionSummaryMode; - } - - @Override - public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) { - final IntegerSummary result = new IntegerSummary(unionSummaryMode_); - result.update(a.getValue()); - result.update(b.getValue()); - return result; - } - - @Override - public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) { - final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_); - result.update(a.getValue()); - result.update(b.getValue()); - return result; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java b/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java deleted file mode 100644 index a80924a62..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/aninteger/package-info.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * This package is for a generic implementation of the Tuple sketch for single Integer value. - */ -package org.apache.datasketches.tuple2.aninteger; diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java deleted file mode 100644 index 55e96be42..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotB.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -/** - * Computes a set difference of two tuple sketches of type ArrayOfDoubles - */ -public abstract class ArrayOfDoublesAnotB { - - ArrayOfDoublesAnotB() {} - - /** - * Perform A-and-not-B set operation on the two given sketches. - * A null sketch is interpreted as an empty sketch. - * This is not an accumulating update. Calling update() more than once - * without calling getResult() will discard the result of previous update(). - * Both input sketches must have the same numValues. - * - * @param a The incoming sketch for the first argument - * @param b The incoming sketch for the second argument - */ - public abstract void update(ArrayOfDoublesSketch a, ArrayOfDoublesSketch b); - - /** - * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch - * @return compact sketch representing the result of the operation - */ - public abstract ArrayOfDoublesCompactSketch getResult(); - - /** - * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch - * @param seg MemorySegment for the result (can be null) - * @return compact sketch representing the result of the operation (off-heap if MemorySegment is - * provided) - */ - public abstract ArrayOfDoublesCompactSketch getResult(MemorySegment seg); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java deleted file mode 100644 index f2a940528..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBImpl.java +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.exactLog2OfLong; -import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon2.HashOperations.convertToHashTable; -import static org.apache.datasketches.thetacommon2.HashOperations.count; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.SetOperationCornerCases; -import org.apache.datasketches.thetacommon2.SetOperationCornerCases.AnotbAction; -import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Computes a set difference, A-AND-NOT-B, of two ArrayOfDoublesSketches. - * - *

              This class includes a stateless operation as follows:

              - * - *
              
              - * CompactSketch csk = anotb.aNotB(ArrayOfDoublesSketch skA, ArrayOfDoublesSketch skB);
              - * 
              - * - * @author Lee Rhodes - */ -public class ArrayOfDoublesAnotBImpl extends ArrayOfDoublesAnotB { - private int numValues_; - private short seedHash_; - - private long thetaLong_ = Long.MAX_VALUE; - private boolean empty_ = true; - private long[] keys_; - private double[] values_; - private int count_; - - ArrayOfDoublesAnotBImpl(final int numValues, final long seed) { - numValues_ = numValues; - seedHash_ = Util.computeSeedHash(seed); - } - - @Override - @SuppressFBWarnings(value = "EI_EXPOSE_REP2", justification = "This is OK here") - public void update(final ArrayOfDoublesSketch skA, final ArrayOfDoublesSketch skB) { - if (skA == null || skB == null) { - throw new SketchesArgumentException("Neither argument may be null."); - } - numValues_ = skA.getNumValues(); - seedHash_ = skA.getSeedHash(); - if (numValues_ != skB.getNumValues()) { - throw new SketchesArgumentException("Inputs cannot have different numValues"); - } - if (seedHash_ != skB.getSeedHash()) { - throw new SketchesArgumentException("Inputs cannot have different seedHashes"); - } - - final long thetaLongA = skA.getThetaLong(); - final int countA = skA.getRetainedEntries(); - final boolean emptyA = skA.isEmpty(); - - final long thetaLongB = skB.getThetaLong(); - final int countB = skB.getRetainedEntries(); - final boolean emptyB = skB.isEmpty(); - - final int id = - SetOperationCornerCases.createCornerCaseId(thetaLongA, countA, emptyA, thetaLongB, countB, emptyB); - final CornerCase cCase = CornerCase.caseIdToCornerCase(id); - final AnotbAction anotbAction = cCase.getAnotbAction(); - - final long minThetaLong = min(thetaLongA, thetaLongB); - - switch (anotbAction) { - case EMPTY_1_0_T: { - reset(); - break; - } - case DEGEN_MIN_0_F: { - keys_ = null; - values_ = null; - thetaLong_ = minThetaLong; - empty_ = false; - count_ = 0; - break; - } - case DEGEN_THA_0_F: { - keys_ = null; - values_ = null; - thetaLong_ = thetaLongA; - empty_ = false; - count_ = 0; - break; - } - case TRIM_A: { - final DataArrays daA = new DataArrays(skA.getKeys(), skA.getValuesAsOneDimension(), countA); - final DataArrays da = trimDataArrays(daA, minThetaLong, numValues_); - keys_ = da.hashArr; - values_ = da.valuesArr; - thetaLong_ = minThetaLong; - empty_ = skA.isEmpty(); - count_ = da.count; - break; - } - case SKETCH_A: { - final ArrayOfDoublesCompactSketch csk = skA.compact(); - keys_ = csk.getKeys(); - values_ = csk.getValuesAsOneDimension(); - thetaLong_ = csk.thetaLong_; - empty_ = csk.isEmpty(); - count_ = csk.getRetainedEntries(); - break; - } - case FULL_ANOTB: { //both A and B should have valid entries. - final long[] keysA = skA.getKeys(); - final double[] valuesA = skA.getValuesAsOneDimension(); - final DataArrays daR = getResultArrays(minThetaLong, countA, keysA, valuesA, skB); - count_ = daR.count; - keys_ = (count_ == 0) ? null : daR.hashArr; - values_ = (count_ == 0) ? null : daR.valuesArr; - thetaLong_ = minThetaLong; - empty_ = (minThetaLong == Long.MAX_VALUE) && (count_ == 0); - break; - } - //default: not possible - } - } - - @Override - public ArrayOfDoublesCompactSketch getResult() { - return new HeapArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_); - } - - @Override - public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { - return new DirectArrayOfDoublesCompactSketch(keys_, values_, thetaLong_, empty_, numValues_, seedHash_, dstSeg); - } - - private static DataArrays getResultArrays( - final long minThetaLong, - final int countA, - final long[] hashArrA, - final double[] valuesArrA, - final ArrayOfDoublesSketch skB) { - final int numValues = skB.numValues_; - - //create hashtable of skB - final long[] hashTableB = convertToHashTable(skB.getKeys(), skB.getRetainedEntries(), minThetaLong, - ThetaUtil.REBUILD_THRESHOLD); - - //build temporary arrays of skA - long[] tmpHashArrA = new long[countA]; - double[] tmpValuesArrA = new double[countA * numValues]; - - //search for non matches and build temp arrays - final int lgHTBLen = exactLog2OfLong(hashTableB.length); - int nonMatches = 0; - for (int i = 0; i < countA; i++) { - final long hash = hashArrA[i]; - if (continueCondition(minThetaLong, hash)) { continue; } - final int index = hashSearch(hashTableB, lgHTBLen, hash); - if (index == -1) { - tmpHashArrA[nonMatches] = hash; - System.arraycopy(valuesArrA, i * numValues, tmpValuesArrA, nonMatches * numValues, numValues); - nonMatches++; - } - } - tmpHashArrA = Arrays.copyOf(tmpHashArrA, nonMatches); - tmpValuesArrA = Arrays.copyOf(tmpValuesArrA, nonMatches * numValues); - final DataArrays daR = new DataArrays(tmpHashArrA, tmpValuesArrA, nonMatches); - return daR; - } - - private static class DataArrays { - long[] hashArr; - double[] valuesArr; - int count; - - DataArrays(final long[] hashArr, final double[] valuesArr, final int count) { - this.hashArr = hashArr; - this.valuesArr = valuesArr; - this.count = count; - } - } - - private static DataArrays trimDataArrays(final DataArrays da, final long thetaLong, final int numValues) { - final long[] hashArrIn = da.hashArr; - final double[] valuesArrIn = da.valuesArr; - final int count = count(hashArrIn, thetaLong); - final long[] hashArrOut = new long[count]; - final double[] valuesArrOut = new double[count * numValues]; - int haInIdx; - int vaInIdx = 0; - int haOutIdx = 0; - int vaOutIdx = 0; - for (haInIdx = 0; haInIdx < count; haInIdx++, vaInIdx += numValues) { - final long hash = hashArrIn[haInIdx]; - if (continueCondition(thetaLong, hash)) { continue; } - hashArrOut[haOutIdx] = hashArrIn[haInIdx]; - System.arraycopy(valuesArrIn, vaInIdx, valuesArrOut, vaOutIdx, numValues); - haOutIdx++; - vaOutIdx += numValues; - } - return new DataArrays(hashArrOut, valuesArrOut, count); - } - - private void reset() { - empty_ = true; - thetaLong_ = Long.MAX_VALUE; - keys_ = null; - values_ = null; - count_ = 0; - } -} - diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java deleted file mode 100644 index 2679debea..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCombiner.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -/** - * Combines two arrays of double values for use with ArrayOfDoubles tuple sketches - */ -public interface ArrayOfDoublesCombiner { - - /** - * Method of combining two arrays of double values - * @param a Array A. - * @param b Array B. - * @return Result of combining A and B - */ - public double[] combine(double[] a, double[] b); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java deleted file mode 100644 index 35e8cb15d..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketch.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -/** - * Top level compact tuple sketch of type ArrayOfDoubles. Compact sketches are never created - * directly. They are created as a result of the compact() method on a QuickSelectSketch - * or the getResult() method of a set operation like Union, Intersection or AnotB. - * Compact sketch consists of a compact list (i.e. no intervening spaces) of hash values, - * corresponding list of double values, and a value for theta. The lists may or may - * not be ordered. A compact sketch is read-only. - */ -public abstract class ArrayOfDoublesCompactSketch extends ArrayOfDoublesSketch { - - static final byte serialVersionUID = 1; - - // Layout of retained entries: - // Long || Start Byte Adr: - // Adr: - // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - // 3 ||-----------------------------------|----------Retained Entries------------| - - static final int EMPTY_SIZE = 16; - static final int RETAINED_ENTRIES_INT = 16; - // 4 bytes of padding for 8 byte alignment - static final int ENTRIES_START = 24; - - ArrayOfDoublesCompactSketch(final int numValues) { - super(numValues); - } - - @Override - public int getCurrentBytes() { - final int count = getRetainedEntries(); - int sizeBytes = EMPTY_SIZE; - if (count > 0) { - sizeBytes = ENTRIES_START + (SIZE_OF_KEY_BYTES * count) - + (SIZE_OF_VALUE_BYTES * numValues_ * count); - } - return sizeBytes; - } - - @Override - public int getMaxBytes() { - return getCurrentBytes(); - } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java deleted file mode 100644 index d1bbbf810..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersection.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.Math.min; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.common.Util; - -/** - * Computes the intersection of two or more tuple sketches of type ArrayOfDoubles. - * A new instance represents the Universal Set. - * Every update() computes an intersection with the internal set - * and can only reduce the internal set. - */ -public abstract class ArrayOfDoublesIntersection { - //not changed by resetToEmpty() or hardReset() - private final short seedHash_; - private final int numValues_; - //nulled or reset by resetToEmpty - private HashTables hashTables_; - private boolean empty_; - private boolean firstCall_; - private long thetaLong_; - - /** - * Internal constructor, called by HeapArrayOfDoublesIntersection and DirectArrayOfDoublesIntersection - * @param numValues the number of double values in the summary array - * @param seed the hash function update seed. - */ - ArrayOfDoublesIntersection(final int numValues, final long seed) { - seedHash_ = Util.computeSeedHash(seed); - numValues_ = numValues; - hashTables_ = null; - empty_ = false; - thetaLong_ = Long.MAX_VALUE; - firstCall_ = true; - } - - /** - * Performs a stateful intersection of the internal set with the given tupleSketch. - * The given tupleSketch and the internal state must have the same numValues. - * @param tupleSketch Input sketch to intersect with the internal set. - * @param combiner Method of combining two arrays of double values - */ - public void intersect(final ArrayOfDoublesSketch tupleSketch, final ArrayOfDoublesCombiner combiner) { - if (tupleSketch == null) { throw new SketchesArgumentException("Sketch must not be null"); } - Util.checkSeedHashes(seedHash_, tupleSketch.getSeedHash()); - if (tupleSketch.numValues_ != numValues_) { - throw new SketchesArgumentException( - "Input tupleSketch cannot have different numValues from the internal numValues."); - } - - final boolean isFirstCall = firstCall_; - firstCall_ = false; - - //could be first or next call - - final boolean emptyIn = tupleSketch.isEmpty(); - if (empty_ || emptyIn) { //empty rule - //Whatever the current internal state, we make our local empty. - resetToEmpty(); // - return; - } - - final long thetaLongIn = tupleSketch.getThetaLong(); - thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule - - if (tupleSketch.getRetainedEntries() == 0) { - if (hashTables_ != null) { - hashTables_.clear(); - } - } - // input sketch will have valid entries > 0 - - if (isFirstCall) { - //Copy first sketch data into local instance hashTables_ - hashTables_ = new HashTables(tupleSketch); - } - - //Next Call - else { - assert hashTables_ != null; - if (hashTables_.getNumKeys() == 0) { return; } - //process intersect with current hashTables - hashTables_ = hashTables_.getIntersectHashTables(tupleSketch, thetaLong_, combiner); - } - } - - /** - * Gets the internal set as an on-heap compact sketch. - * @return Result of the intersections so far as a compact sketch. - */ - public ArrayOfDoublesCompactSketch getResult() { - return getResult(null); - } - - /** - * Gets the result of stateful intersections so far. - * @param dstSeg MemorySegment for the compact sketch (can be null). - * @return Result of the intersections so far as a compact sketch. - */ - public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { - if (firstCall_) { - throw new SketchesStateException( - "getResult() with no intervening intersections is not a legal result."); - } - long[] hashArrOut = new long[0]; - double[] valuesArrOut = new double[0]; - if (hashTables_ != null && hashTables_.getHashTable() != null) { - final int numKeys = hashTables_.getNumKeys(); - - if (numKeys > 0) { - final int tableSize = hashTables_.getHashTable().length; - - hashArrOut = new long[numKeys]; - valuesArrOut = new double[numKeys * numValues_]; - - // & flatten the hash tables - int cnt = 0; - final long[] hashTable = hashTables_.getHashTable(); - final double[][] valueTable = hashTables_.getValueTable(); - for (int i = 0; i < tableSize; i++) { - final long hash = hashTable[i]; - if (hash == 0 || hash > thetaLong_) { continue; } - hashArrOut[cnt] = hash; - System.arraycopy(valueTable[i], 0, valuesArrOut, cnt * numValues_, numValues_); - cnt++; - } - assert cnt == numKeys; - } - } - - return (dstSeg == null) - ? new HeapArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut, - thetaLong_, empty_, numValues_, seedHash_) - : new DirectArrayOfDoublesCompactSketch(hashArrOut, valuesArrOut, - thetaLong_, empty_, numValues_, seedHash_, dstSeg); - } - - /** - * Resets the internal set to the initial state, which represents the Universal Set - */ - public void reset() { - hardReset(); - } - - private void hardReset() { - empty_ = false; - firstCall_ = true; - thetaLong_ = Long.MAX_VALUE; - if (hashTables_ != null) { hashTables_.clear(); } - } - - private void resetToEmpty() { - empty_ = true; - firstCall_ = false; - thetaLong_ = Long.MAX_VALUE; - if (hashTables_ != null) { hashTables_.clear(); } - } - - protected abstract ArrayOfDoublesQuickSelectSketch createSketch(int nomEntries, int numValues, long seed); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java deleted file mode 100644 index f691f153b..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.apache.datasketches.common.Util.ceilingPowerOf2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.QuickSelect; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * Top level class for hash table based implementations of tuple sketch of type - * ArrayOfDoubles that uses the QuickSelect algorithm. - */ -abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSketch { - - static final byte serialVersionUID = 1; - - // Layout of next 16 bytes: - // Long || Start Byte Adr: - // Adr: - // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | - // 3 ||-----------P (float)---------------|--------|--lgRF--|--lgArr-|---lgNom---| - // || 31 | 30 | 29 | 28 | 27 | 26 | 25 | 24 | - // 4 ||-----------------------------------|----------Retained Entries------------| - - static final int LG_NOM_ENTRIES_BYTE = 16; - static final int LG_CUR_CAPACITY_BYTE = 17; - static final int LG_RESIZE_FACTOR_BYTE = 18; - // 1 byte of padding for alignment - static final int SAMPLING_P_FLOAT = 20; - static final int RETAINED_ENTRIES_INT = 24; - // 4 bytes of padding for alignment - static final int ENTRIES_START = 32; - - static final int DEFAULT_LG_RESIZE_FACTOR = 3; - - // these can be derived from other things, but are kept here for performance - int rebuildThreshold_; //absolute value relative to current capacity - int lgCurrentCapacity_; - - ArrayOfDoublesQuickSelectSketch(final int numValues, final long seed) { - super(numValues, seed); - } - - abstract void updateValues(int index, double[] values); - - abstract void setNotEmpty(); - - abstract boolean isInSamplingMode(); - - abstract void rebuild(int newCapacity); - - abstract long getKey(int index); - - abstract void setValues(int index, double[] values); - - abstract void incrementCount(); - - abstract void setThetaLong(long thetaLong); - - abstract int insertKey(long key); - - abstract int findOrInsertKey(long key); - - abstract double[] find(long key); - - abstract int getSerializedSizeBytes(); - - abstract void serializeInto(MemorySegment seg); - - @Override - public void trim() { - if (getRetainedEntries() > getNominalEntries()) { - setThetaLong(getNewThetaLong()); - rebuild(); - } - } - - @Override - public int getMaxBytes() { - final int nomEntries = getNominalEntries(); - final int numValues = getNumValues(); - return getMaxBytes(nomEntries, numValues); - } - - @Override - public int getCurrentBytes() { - return getSerializedSizeBytes(); - } - - /** - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to - * given value. - * @param numValues Number of double values to keep for each key - * @return maximum required storage bytes given nomEntries and numValues - */ - static int getMaxBytes(final int nomEntries, final int numValues) { - return ENTRIES_START - + (SIZE_OF_KEY_BYTES + SIZE_OF_VALUE_BYTES * numValues) * ceilingPowerOf2(nomEntries) * 2; - } - - // non-public methods below - - // this is a special back door insert for merging - // not sufficient by itself without keeping track of theta of another sketch - void merge(final long key, final double[] values) { - setNotEmpty(); - if (key < thetaLong_) { - final int index = findOrInsertKey(key); - if (index < 0) { - incrementCount(); - setValues(~index, values); - } else { - updateValues(index, values); - } - rebuildIfNeeded(); - } - } - - void rebuildIfNeeded() { - if (getRetainedEntries() <= rebuildThreshold_) { return; } - if (getCurrentCapacity() > getNominalEntries()) { - setThetaLong(getNewThetaLong()); - rebuild(); - } else { - rebuild(getCurrentCapacity() * getResizeFactor().getValue()); - } - } - - void rebuild() { - rebuild(getCurrentCapacity()); - } - - void insert(final long key, final double[] values) { - final int index = insertKey(key); - setValues(index, values); - incrementCount(); - } - - final void setRebuildThreshold() { - if (getCurrentCapacity() > getNominalEntries()) { - rebuildThreshold_ = (int) (getCurrentCapacity() * ThetaUtil.REBUILD_THRESHOLD); - } else { - rebuildThreshold_ = (int) (getCurrentCapacity() * ThetaUtil.RESIZE_THRESHOLD); - } - } - - @Override - void insertOrIgnore(final long key, final double[] values) { - if (values.length != getNumValues()) { - throw new SketchesArgumentException("input array of values must have " + getNumValues() - + " elements, but has " + values.length); - } - setNotEmpty(); - if ((key == 0) || (key >= thetaLong_)) { return; } - final int index = findOrInsertKey(key); - if (index < 0) { - incrementCount(); - setValues(~index, values); - } else { - updateValues(index, values); - } - rebuildIfNeeded(); - } - - long getNewThetaLong() { - final long[] keys = new long[getRetainedEntries()]; - int i = 0; - for (int j = 0; j < getCurrentCapacity(); j++) { - final long key = getKey(j); - if (key != 0) { keys[i++] = key; } - } - return QuickSelect.select(keys, 0, getRetainedEntries() - 1, getNominalEntries()); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java deleted file mode 100644 index dd19792c2..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSetOperationBuilder.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; - -/** - * Builds set operations object for tuple sketches of type ArrayOfDoubles. - */ -public class ArrayOfDoublesSetOperationBuilder { - - private int nomEntries_; - private int numValues_; - private long seed_; - - /** - * Default Nominal Entries (a.k.a. K) - */ - public static final int DEFAULT_NOMINAL_ENTRIES = 4096; - - /** - * Default number of values - */ - public static final int DEFAULT_NUMBER_OF_VALUES = 1; - - /** - * Creates an instance of the builder with default parameters - */ - public ArrayOfDoublesSetOperationBuilder() { - nomEntries_ = DEFAULT_NOMINAL_ENTRIES; - numValues_ = DEFAULT_NUMBER_OF_VALUES; - seed_ = Util.DEFAULT_UPDATE_SEED; - } - - /** - * This is to set the nominal number of entries. - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * given value. - * @return this builder - */ - public ArrayOfDoublesSetOperationBuilder setNominalEntries(final int nomEntries) { - nomEntries_ = nomEntries; - return this; - } - - /** - * This is to set the number of double values associated with each key - * @param numValues number of double values - * @return this builder - */ - public ArrayOfDoublesSetOperationBuilder setNumberOfValues(final int numValues) { - numValues_ = numValues; - return this; - } - - /** - * Sets the long seed value that is required by the hashing function. - * @param seed See seed - * @return this builder - */ - public ArrayOfDoublesSetOperationBuilder setSeed(final long seed) { - seed_ = seed; - return this; - } - - /** - * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder. - * The new instance is allocated on the heap. - * @return an instance of ArrayOfDoublesUnion - */ - public ArrayOfDoublesUnion buildUnion() { - return new HeapArrayOfDoublesUnion(nomEntries_, numValues_, seed_); - } - - /** - * Creates an instance of ArrayOfDoublesUnion based on the current configuration of the builder - * and the given destination MemorySegment. - * @param dstSeg destination MemorySegment to be used by the sketch - * @return an instance of ArrayOfDoublesUnion - */ - public ArrayOfDoublesUnion buildUnion(final MemorySegment dstSeg) { - return new DirectArrayOfDoublesUnion(nomEntries_, numValues_, seed_, dstSeg); - } - - /** - * Creates an instance of ArrayOfDoublesIntersection based on the current configuration of the - * builder. - * The new instance is allocated on the heap. - * The number of nominal entries is not relevant to this, so it is ignored. - * @return an instance of ArrayOfDoublesIntersection - */ - public ArrayOfDoublesIntersection buildIntersection() { - return new HeapArrayOfDoublesIntersection(numValues_, seed_); - } - - /** - * Creates an instance of ArrayOfDoublesIntersection in the given MemorySegment and based on the - * current configuration of the builder. - * The number of nominal entries is not relevant to this, so it is ignored. - * @param dstSeg destination MemorySegment to be used by the sketch - * @return an instance of ArrayOfDoublesIntersection - */ - public ArrayOfDoublesIntersection buildIntersection(final MemorySegment dstSeg) { - return new DirectArrayOfDoublesIntersection(numValues_, seed_, dstSeg); - } - - /** - * Creates an instance of ArrayOfDoublesAnotB based on the current configuration of the builder. - * The number of nominal entries is not relevant to this, so it is ignored. - * @return an instance of ArrayOfDoublesAnotB - */ - public ArrayOfDoublesAnotB buildAnotB() { - return new ArrayOfDoublesAnotBImpl(numValues_, seed_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java deleted file mode 100644 index 145458419..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketch.java +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.apache.datasketches.common.Util.LS; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.BinomialBoundsN; -import org.apache.datasketches.tuple2.SerializerDeserializer; - -/** - * The base class for the tuple sketch of type ArrayOfDoubles, where an array of double values - * is associated with each key. - * A primitive array of doubles is used here, as opposed to a generic Summary object, - * for improved performance. - */ -public abstract class ArrayOfDoublesSketch { - - // The concept of being empty is about representing an empty set. - // So a sketch can be non-empty, and have no entries. - // For example, as a result of a sampling, when some data was presented to the sketch, but no - // entries were retained. - static enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES } - - static final int SIZE_OF_KEY_BYTES = Long.BYTES; - static final int SIZE_OF_VALUE_BYTES = Double.BYTES; - - // Common Layout of first 16 bytes and Empty AoDCompactSketch: - // Long || Start Byte Adr: - // Adr: - // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - // 0 || Seed Hash | #Dbls | Flags | SkType | FamID | SerVer | Preamble_Longs | - // || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - // 1 ||-------------------------Theta Long------------------------------------------------| - - static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1 - static final int SERIAL_VERSION_BYTE = 1; - static final int FAMILY_ID_BYTE = 2; - static final int SKETCH_TYPE_BYTE = 3; - static final int FLAGS_BYTE = 4; - static final int NUM_VALUES_BYTE = 5; - static final int SEED_HASH_SHORT = 6; - static final int THETA_LONG = 8; - - final int numValues_; - - long thetaLong_; - boolean isEmpty_ = true; - - ArrayOfDoublesSketch(final int numValues) { - numValues_ = numValues; - } - - /** - * Heapify the given MemorySegment as an ArrayOfDoublesSketch - * @param seg the given MemorySegment - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch heapify(final MemorySegment seg) { - return heapify(seg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch - * @param seg the given MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch heapify(final MemorySegment seg, final long seed) { - final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); - if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) { - return new HeapArrayOfDoublesQuickSelectSketch(seg, seed); - } - return new HeapArrayOfDoublesCompactSketch(seg, seed); - } - - /** - * Wrap the given MemorySegment as an ArrayOfDoublesSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param seg the given MemorySegment - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch wrap(final MemorySegment seg) { - return wrap(seg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param seg the given MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch wrap(final MemorySegment seg, final long seed) { - final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg); - if (sketchType == SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch) { - return new DirectArrayOfDoublesQuickSelectSketchR(seg, seed); - } - return new DirectArrayOfDoublesCompactSketch(seg, seed); - } - - /** - * Estimates the cardinality of the set (number of unique values presented to the sketch) - * @return best estimate of the number of unique values - */ - public double getEstimate() { - if (!isEstimationMode()) { return getRetainedEntries(); } - return getRetainedEntries() / getTheta(); - } - - /** - * Gets the approximate upper error bound given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @return the upper bound. - */ - public double getUpperBound(final int numStdDev) { - if (!isEstimationMode()) { return getRetainedEntries(); } - return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); - } - - /** - * Gets the approximate lower error bound given the specified number of Standard Deviations. - * This will return getEstimate() if isEmpty() is true. - * - * @param numStdDev - * See Number of Standard Deviations - * @return the lower bound. - */ - public double getLowerBound(final int numStdDev) { - if (!isEstimationMode()) { return getRetainedEntries(); } - return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); - } - - /** - * Returns true if this sketch's data structure is backed by MemorySegment. - * @return true if this sketch's data structure is backed by MemorySegment. - */ - public abstract boolean hasMemorySegment(); - - /** - * Returns the MemorySegment object if it exists, otherwise null. - * @return the MemorySegment object if it exists, otherwise null. - */ - abstract MemorySegment getMemorySegment(); - - /** - * See Empty - * @return true if empty. - */ - public boolean isEmpty() { - return isEmpty_; - } - - /** - * Returns number of double values associated with each key - * @return number of double values associated with each key - */ - public int getNumValues() { - return numValues_; - } - - /** - * Returns true if the sketch is Estimation Mode (as opposed to Exact Mode). - * This is true if theta < 1.0 AND isEmpty() is false. - * @return true if the sketch is in estimation mode. - */ - public boolean isEstimationMode() { - return ((thetaLong_ < Long.MAX_VALUE) && !isEmpty()); - } - - /** - * Gets the value of theta as a double between zero and one - * @return the value of theta as a double - */ - public double getTheta() { - return getThetaLong() / (double) Long.MAX_VALUE; - } - - /** - * Returns number of retained entries - * @return number of retained entries - */ - public abstract int getRetainedEntries(); - - /** - * Returns the maximum number of bytes for this sketch when serialized. - * @return the maximum number of bytes for this sketch when serialized. - */ - public abstract int getMaxBytes(); - - /** - * For compact sketches this is the same as getMaxBytes(). - * @return the current number of bytes for this sketch when serialized. - */ - public abstract int getCurrentBytes(); - - /** - * Returns serialized representation of the sketch - * @return serialized representation of the sketch - */ - public abstract byte[] toByteArray(); - - /** - * Returns array of arrays of double values in the sketch - * @return array of arrays of double values in the sketch - */ - public abstract double[][] getValues(); - - abstract double[] getValuesAsOneDimension(); - - abstract long[] getKeys(); - - /** - * Returns the value of theta as a long - * @return the value of theta as a long - */ - long getThetaLong() { - return isEmpty() ? Long.MAX_VALUE : thetaLong_; - } - - abstract short getSeedHash(); - - /** - * Returns an iterator over the sketch - * @return an iterator over the sketch - */ - public abstract ArrayOfDoublesSketchIterator iterator(); - - /** - * Returns this sketch in compact form, which is immutable. - * @return this sketch in compact form, which is immutable. - */ - public ArrayOfDoublesCompactSketch compact() { - return compact(null); - } - - /** - * Returns this sketch in compact form, which is immutable. - * @param dstSeg the destination MemorySegment - * @return this sketch in compact form, which is immutable. - */ - public abstract ArrayOfDoublesCompactSketch compact(MemorySegment dstSeg); - - @Override - public String toString() { - final int seedHash = Short.toUnsignedInt(getSeedHash()); - final StringBuilder sb = new StringBuilder(); - sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); - sb.append(" Estimate : ").append(getEstimate()).append(LS); - sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); - sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); - sb.append(" Theta (double) : ").append(getTheta()).append(LS); - sb.append(" Theta (long) : ").append(getThetaLong()).append(LS); - sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); - sb.append(" Empty? : ").append(isEmpty()).append(LS); - sb.append(" Retained Entries : ").append(getRetainedEntries()).append(LS); - if (this instanceof ArrayOfDoublesUpdatableSketch) { - final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this; - sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); - sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); - sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); - sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); - } - sb.append(" Seed Hash : ") - .append(Integer.toHexString(seedHash)).append(" | ").append(seedHash).append(LS); - sb.append("### END SKETCH SUMMARY").append(LS); - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java deleted file mode 100644 index 71ed63216..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketchIterator.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -/** - * Interface for iterating over tuple sketches of type ArrayOfDoubles - */ -public interface ArrayOfDoublesSketchIterator { - /** - * Advancing the iterator and checking existence of the next entry - * is combined here for efficiency. This results in an undefined - * state of the iterator before the first call of this method. - * @return true if the next element exists - */ - public boolean next(); - - /** - * Gets a key from the current entry in the sketch, which is a hash - * of the original key passed to update(). The original keys are not - * retained. Don't call this before calling next() for the first time - * or after getting false from next(). - * @return hash key from the current entry - */ - public long getKey(); - - /** - * Gets an array of values from the current entry in the sketch. - * Don't call this before calling next() for the first time - * or after getting false from next(). - * @return array of double values for the current entry (may or may not be a copy) - */ - public double[] getValues(); -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java deleted file mode 100644 index b75c72f89..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesSketches.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; - -/** - * Convenient static methods to instantiate tuple sketches of type ArrayOfDoubles. - */ -public final class ArrayOfDoublesSketches { - - /** - * Heapify the given MemorySegment as an ArrayOfDoublesSketch - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg) { - return heapifySketch(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given MemorySegment and seed as a ArrayOfDoublesSketch - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch heapifySketch(final MemorySegment srcSeg, final long seed) { - return ArrayOfDoublesSketch.heapify(srcSeg, seed); - } - - /** - * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg) { - return heapifyUpdatableSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch heapifyUpdatableSketch(final MemorySegment srcSeg, final long seed) { - return ArrayOfDoublesUpdatableSketch.heapify(srcSeg, seed); - } - - /** - * Wrap the given MemorySegment as an ArrayOfDoublesSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg) { - return wrapSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesSketch - */ - public static ArrayOfDoublesSketch wrapSketch(final MemorySegment srcSeg, final long seed) { - return ArrayOfDoublesSketch.wrap(srcSeg, seed); - } - - /** - * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg) { - return wrapUpdatableSketch(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch wrapUpdatableSketch(final MemorySegment srcSeg, final long seed) { - return ArrayOfDoublesUpdatableSketch.wrap(srcSeg, seed); - } - - /** - * Heapify the given MemorySegment as an ArrayOfDoublesUnion - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg) { - return heapifyUnion(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapifyUnion(final MemorySegment srcSeg, final long seed) { - return ArrayOfDoublesUnion.heapify(srcSeg, seed); - } - - /** - * Wrap the given MemorySegment as an ArrayOfDoublesUnion - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg) { - return wrapUnion(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion wrapUnion(final MemorySegment srcSeg, final long seed) { - return ArrayOfDoublesUnion.wrap(srcSeg, seed); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java deleted file mode 100644 index aea9204b2..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnion.java +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.Math.min; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.tuple2.SerializerDeserializer; - -/** - * The base class for unions of tuple sketches of type ArrayOfDoubles. - */ -public abstract class ArrayOfDoublesUnion { - - static final byte serialVersionUID = 1; - //For layout see toByteArray() - static final int PREAMBLE_SIZE_BYTES = 16; - static final int PREAMBLE_LONGS_BYTE = 0; // not used, always 1 - static final int SERIAL_VERSION_BYTE = 1; - static final int FAMILY_ID_BYTE = 2; - static final int SKETCH_TYPE_BYTE = 3; - static final int FLAGS_BYTE = 4; - static final int NUM_VALUES_BYTE = 5; - static final int SEED_HASH_SHORT = 6; - static final int THETA_LONG = 8; - - ArrayOfDoublesQuickSelectSketch gadget_; - long unionThetaLong_; - - /** - * Constructs this Union initializing it with the given sketch, which can be on-heap or off-heap. - * @param sketch the given sketch. - */ - ArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch sketch) { - gadget_ = sketch; - unionThetaLong_ = sketch.getThetaLong(); - } - - /** - * Heapify the given MemorySegment as an ArrayOfDoublesUnion. - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg) { - return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given MemorySegment and seed as an ArrayOfDoublesUnion. - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion heapify(final MemorySegment srcSeg, final long seed) { - return HeapArrayOfDoublesUnion.heapifyUnion(srcSeg, seed); - } - - /** - * Wrap the given MemorySegment as an ArrayOfDoublesUnion. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg) { - return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap the given MemorySegment and seed as an ArrayOfDoublesUnion. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param srcSeg the given source MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUnion - */ - public static ArrayOfDoublesUnion wrap(final MemorySegment srcSeg, final long seed) { - return DirectArrayOfDoublesUnion.wrapUnion(srcSeg, seed, !srcSeg.isReadOnly()); - } - - /** - * Updates the union by adding a set of entries from a given sketch, which can be on-heap or off-heap. - * Both the given tupleSketch and the internal state of the Union must have the same numValues. - * - *

              Nulls and empty sketches are ignored.

              - * - * @param tupleSketch sketch to add to the union. - */ - public void union(final ArrayOfDoublesSketch tupleSketch) { - if (tupleSketch == null) { return; } - Util.checkSeedHashes(gadget_.getSeedHash(), tupleSketch.getSeedHash()); - if (gadget_.getNumValues() != tupleSketch.getNumValues()) { - throw new SketchesArgumentException("Incompatible sketches: number of values mismatch " - + gadget_.getNumValues() + " and " + tupleSketch.getNumValues()); - } - - if (tupleSketch.isEmpty()) { return; } - else { gadget_.setNotEmpty(); } - - setUnionThetaLong(min(min(unionThetaLong_, tupleSketch.getThetaLong()), gadget_.getThetaLong())); - - if (tupleSketch.getRetainedEntries() == 0) { return; } - final ArrayOfDoublesSketchIterator it = tupleSketch.iterator(); - while (it.next()) { - if (it.getKey() < unionThetaLong_) { - gadget_.merge(it.getKey(), it.getValues()); - } - } - // keep the union theta as low as possible for performance - if (gadget_.getThetaLong() < unionThetaLong_) { - setUnionThetaLong(gadget_.getThetaLong()); - } - } - - /** - * Returns the resulting union in the form of a compact sketch - * @param dstSeg MemorySegment for the result (can be null) - * @return compact sketch representing the union (off-heap if MemorySegment is provided) - */ - public ArrayOfDoublesCompactSketch getResult(final MemorySegment dstSeg) { - long unionThetaLong = unionThetaLong_; - if (gadget_.getRetainedEntries() > gadget_.getNominalEntries()) { - unionThetaLong = Math.min(unionThetaLong, gadget_.getNewThetaLong()); - } - if (dstSeg == null) { - return new HeapArrayOfDoublesCompactSketch(gadget_, unionThetaLong); - } - return new DirectArrayOfDoublesCompactSketch(gadget_, unionThetaLong, dstSeg); - } - - /** - * Returns the resulting union in the form of a compact sketch - * @return on-heap compact sketch representing the union - */ - public ArrayOfDoublesCompactSketch getResult() { - return getResult(null); - } - - /** - * Resets the union to an empty state - */ - public void reset() { - gadget_.reset(); - setUnionThetaLong(gadget_.getThetaLong()); - } - - // Layout of first 16 bytes: - // Long || Start Byte Adr: - // Adr: - // || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | - // 0 || Seed Hash=0 | #Dbls=0|Flags=0 | SkType | FamID | SerVer | Preamble_Longs | - // || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | - // 1 ||---------------------------Union Theta Long-----------------------------------------| - /** - * Returns a byte array representation of this object - * @return a byte array representation of this object - */ - public byte[] toByteArray() { - final int sizeBytes = PREAMBLE_SIZE_BYTES + gadget_.getSerializedSizeBytes(); - final byte[] byteArray = new byte[sizeBytes]; - final MemorySegment seg = MemorySegment.ofArray(byteArray); - seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 - seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); - //byte 4-7 automatically zero - seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, unionThetaLong_); - gadget_.serializeInto(seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES)); - return byteArray; - } - - /** - * Returns maximum required storage bytes given nomEntries and numValues - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than or equal to - * given value. - * @param numValues Number of double values to keep for each key - * @return maximum required storage bytes given nomEntries and numValues - */ - public static int getMaxBytes(final int nomEntries, final int numValues) { - return ArrayOfDoublesQuickSelectSketch.getMaxBytes(nomEntries, numValues) + PREAMBLE_SIZE_BYTES; - } - - void setUnionThetaLong(final long thetaLong) { - unionThetaLong_ = thetaLong; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java deleted file mode 100644 index d1384e9d2..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketch.java +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.apache.datasketches.common.Util.computeSeedHash; -import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.hash.MurmurHash3; -import org.apache.datasketches.tuple2.Util; - -/** - * The top level for updatable tuple sketches of type ArrayOfDoubles. - */ -public abstract class ArrayOfDoublesUpdatableSketch extends ArrayOfDoublesSketch { - - final long seed_; - - ArrayOfDoublesUpdatableSketch(final int numValues, final long seed) { - super(numValues); - seed_ = seed; - } - - /** - * Heapify the given MemorySegment as an ArrayOfDoublesUpdatableSketch - * @param seg the given MemorySegment - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg) { - return heapify(seg, DEFAULT_UPDATE_SEED); - } - - /** - * Heapify the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch - * @param seg the given MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch heapify(final MemorySegment seg, final long seed) { - return new HeapArrayOfDoublesQuickSelectSketch(seg, seed); - } - - /** - * Wrap the given MemorySegment as an ArrayOfDoublesUpdatableSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param seg the given MemorySegment - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg) { - return wrap(seg, DEFAULT_UPDATE_SEED); - } - - /** - * Wrap the given MemorySegment and seed as a ArrayOfDoublesUpdatableSketch. - * If the given source MemorySegment is read-only, the returned Union object will also be read-only. - * @param seg the given MemorySegment - * @param seed the given seed - * @return an ArrayOfDoublesUpdatableSketch - */ - public static ArrayOfDoublesUpdatableSketch wrap(final MemorySegment seg, final long seed) { - return new DirectArrayOfDoublesQuickSelectSketch(seg, seed); - } - - /** - * Updates this sketch with a long key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given long key - * @param values The given values - */ - public void update(final long key, final double[] values) { - update(new long[] {key}, values); - } - - /** - * Updates this sketch with a double key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given double key - * @param values The given values - */ - public void update(final double key, final double[] values) { - update(Util.doubleToLongArray(key), values); - } - - /** - * Updates this sketch with a String key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given String key - * @param values The given values - */ - public void update(final String key, final double[] values) { - update(Util.stringToByteArray(key), values); - } - - /** - * Updates this sketch with a byte[] key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given byte[] key - * @param values The given values - */ - public void update(final byte[] key, final double[] values) { - if (key == null || key.length == 0) { return; } - insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); - } - - /** - * Updates this sketch with a ByteBuffer key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given ByteBuffer key - * @param values The given values - */ - public void update(final ByteBuffer key, final double[] values) { - if (key == null || key.hasRemaining() == false) { return; } - insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); - } - - /** - * Updates this sketch with a int[] key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given int[] key - * @param values The given values - */ - public void update(final int[] key, final double[] values) { - if (key == null || key.length == 0) { return; } - insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); - } - - /** - * Updates this sketch with a long[] key and double values. - * The values will be stored or added to the ones associated with the key - * - * @param key The given long[] key - * @param values The given values - */ - public void update(final long[] key, final double[] values) { - if (key == null || key.length == 0) { return; } - insertOrIgnore(MurmurHash3.hash(key, seed_)[0] >>> 1, values); - } - - /** - * Gets the configured nominal number of entries - * @return nominal number of entries - */ - public abstract int getNominalEntries(); - - /** - * Gets the configured resize factor - * @return resize factor - */ - public abstract ResizeFactor getResizeFactor(); - - /** - * Gets the configured sampling probability - * @return sampling probability - */ - public abstract float getSamplingProbability(); - - /** - * Rebuilds reducing the actual number of entries to the nominal number of entries if needed - */ - public abstract void trim(); - - /** - * Resets this sketch an empty state. - */ - public abstract void reset(); - - /** - * Gets an on-heap compact representation of the sketch - * @return compact sketch - */ - @Override - public ArrayOfDoublesCompactSketch compact() { - return compact(null); - } - - /** - * Gets an off-heap compact representation of the sketch using the given MemorySegment - * @param dstSeg MemorySegment for the compact sketch (can be null) - * @return compact sketch (off-heap if MemorySegment is provided) - */ - @Override - public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { - if (dstSeg == null) { - return new HeapArrayOfDoublesCompactSketch(this); - } - return new DirectArrayOfDoublesCompactSketch(this, dstSeg); - } - - abstract int getCurrentCapacity(); - - long getSeed() { - return seed_; - } - - @Override - short getSeedHash() { - return computeSeedHash(seed_); - } - - /** - * Insert if key is less than thetaLong and not a duplicate, otherwise ignore. - * @param key the hash value of the input value - * @param values array of values to update the summary - */ - abstract void insertOrIgnore(long key, double[] values); - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java deleted file mode 100644 index 99723ec6f..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUpdatableSketchBuilder.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; - -/** - * For building a new ArrayOfDoublesUpdatableSketch - */ -public class ArrayOfDoublesUpdatableSketchBuilder { - - private int nomEntries_; - private ResizeFactor resizeFactor_; - private int numValues_; - private float samplingProbability_; - private long seed_; - - private static final int DEFAULT_NUMBER_OF_VALUES = 1; - private static final float DEFAULT_SAMPLING_PROBABILITY = 1; - private static final ResizeFactor DEFAULT_RESIZE_FACTOR = ResizeFactor.X8; - - /** - * Creates an instance of builder with default parameters - */ - public ArrayOfDoublesUpdatableSketchBuilder() { - nomEntries_ = ThetaUtil.DEFAULT_NOMINAL_ENTRIES; - resizeFactor_ = DEFAULT_RESIZE_FACTOR; - numValues_ = DEFAULT_NUMBER_OF_VALUES; - samplingProbability_ = DEFAULT_SAMPLING_PROBABILITY; - seed_ = Util.DEFAULT_UPDATE_SEED; - } - - /** - * This is to set the nominal number of entries. - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * or equal to given value. - * @return this builder - */ - public ArrayOfDoublesUpdatableSketchBuilder setNominalEntries(final int nomEntries) { - nomEntries_ = 1 << ThetaUtil.checkNomLongs(nomEntries); - return this; - } - - /** - * This is to set the resize factor. - * Value of X1 means that the maximum capacity is allocated from the start. - * Default resize factor is X8. - * @param resizeFactor value of X1, X2, X4 or X8 - * @return this UpdatableSketchBuilder - */ - public ArrayOfDoublesUpdatableSketchBuilder setResizeFactor(final ResizeFactor resizeFactor) { - resizeFactor_ = resizeFactor; - return this; - } - - /** - * This is to set sampling probability. - * Default probability is 1. - * @param samplingProbability sampling probability from 0 to 1 - * @return this builder - */ - public ArrayOfDoublesUpdatableSketchBuilder - setSamplingProbability(final float samplingProbability) { - if ((samplingProbability < 0) || (samplingProbability > 1f)) { - throw new SketchesArgumentException("sampling probability must be between 0 and 1"); - } - samplingProbability_ = samplingProbability; - return this; - } - - /** - * This is to set the number of double values associated with each key - * @param numValues number of double values - * @return this builder - */ - public ArrayOfDoublesUpdatableSketchBuilder setNumberOfValues(final int numValues) { - numValues_ = numValues; - return this; - } - - /** - * Sets the long seed value that is required by the hashing function. - * @param seed See seed - * @return this builder - */ - public ArrayOfDoublesUpdatableSketchBuilder setSeed(final long seed) { - seed_ = seed; - return this; - } - - /** - * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder. - * @return an ArrayOfDoublesUpdatableSketch - */ - public ArrayOfDoublesUpdatableSketch build() { - return new HeapArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(), - samplingProbability_, numValues_, seed_); - } - - /** - * Returns an ArrayOfDoublesUpdatableSketch with the current configuration of this Builder. - * @param dstSeg instance of MemorySegment to be used by the sketch - * @return an ArrayOfDoublesUpdatableSketch - */ - public ArrayOfDoublesUpdatableSketch build(final MemorySegment dstSeg) { - return new DirectArrayOfDoublesQuickSelectSketch(nomEntries_, resizeFactor_.lg(), - samplingProbability_, numValues_, seed_, dstSeg); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java deleted file mode 100644 index f41f35991..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteOrder; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.tuple2.SerializerDeserializer; - -/** - * Direct Compact Sketch of type ArrayOfDoubles. - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - */ -final class DirectArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch { - - // this value exists only on heap, never serialized - private MemorySegment seg_; - - /** - * Converts the given UpdatableArrayOfDoublesSketch to this compact form. - * @param sketch the given UpdatableArrayOfDoublesSketch - * @param dstSeg the given destination MemorySegment. - */ - DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, - final MemorySegment dstSeg) { - this(sketch, sketch.getThetaLong(), dstSeg); - } - - /** - * Converts the given UpdatableArrayOfDoublesSketch to this compact form - * trimming if necessary according to given theta - * @param sketch the given UpdatableArrayOfDoublesSketch - * @param thetaLong new value of thetaLong - * @param dstSeg the given destination MemorySegment. - */ - DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, - final long thetaLong, final MemorySegment dstSeg) { - super(sketch.getNumValues()); - checkMemorySegmentSize(dstSeg, sketch.getRetainedEntries(), sketch.getNumValues()); - seg_ = dstSeg; - dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); - dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) - SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); - final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - isEmpty_ = sketch.isEmpty(); - final int count = sketch.getRetainedEntries(); - dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( - (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) - | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) - )); - dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, Util.computeSeedHash(sketch.getSeed())); - thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - if (count > 0) { - int keyOffset = ENTRIES_START; - int valuesOffset = keyOffset + (SIZE_OF_KEY_BYTES * sketch.getRetainedEntries()); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - int actualCount = 0; - while (it.next()) { - if (it.getKey() < thetaLong_) { - dstSeg.set(JAVA_LONG_UNALIGNED, keyOffset, it.getKey()); - MemorySegment.copy(it.getValues(), 0, dstSeg, JAVA_DOUBLE_UNALIGNED, valuesOffset, numValues_); - keyOffset += SIZE_OF_KEY_BYTES; - valuesOffset += SIZE_OF_VALUE_BYTES * numValues_; - actualCount++; - } - } - dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, actualCount); - } - } - - /* - * Creates an instance from components - */ - DirectArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong, - final boolean isEmpty, final int numValues, final short seedHash, final MemorySegment dstSeg) { - super(numValues); - checkMemorySegmentSize(dstSeg, values.length, numValues); - seg_ = dstSeg; - dstSeg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); - dstSeg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - dstSeg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - dstSeg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) - SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); - final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - isEmpty_ = isEmpty; - final int count = keys.length; - dstSeg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( - (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) - | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (count > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) - )); - dstSeg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - dstSeg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash); - thetaLong_ = thetaLong; - dstSeg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - if (count > 0) { - dstSeg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count); - MemorySegment.copy(keys, 0, dstSeg, JAVA_LONG_UNALIGNED, ENTRIES_START, count); - MemorySegment.copy(values, 0, dstSeg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values.length); - } - } - - /** - * Wraps the given MemorySegment. - * @param seg the given MemorySegment - */ - DirectArrayOfDoublesCompactSketch(final MemorySegment seg) { - super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); - seg_ = seg; - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), - seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); - final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE); - if (version != serialVersionUID) { - throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID - + ", actual: " + version); - } - final boolean isBigEndian = - (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Byte order mismatch"); - } - - isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); - } - - /** - * Wraps the given MemorySegment. - * @param seg the given MemorySegment. - * @param seed See seed - */ - DirectArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) { - super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); - seg_ = seg; - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), - seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); - final byte version = seg_.get(JAVA_BYTE, SERIAL_VERSION_BYTE); - if (version != serialVersionUID) { - throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID - + ", actual: " + version); - } - final boolean isBigEndian = - (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Byte order mismatch"); - } - Util.checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), Util.computeSeedHash(seed)); - isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); - } - - @Override - public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { - if (dstSeg == null) { - return new - HeapArrayOfDoublesCompactSketch(getKeys(), getValuesAsOneDimension(), thetaLong_, isEmpty_, numValues_, - getSeedHash()); - } else { - MemorySegment.copy(seg_, 0, dstSeg, 0, seg_.byteSize()); - return new DirectArrayOfDoublesCompactSketch(dstSeg); - } - } - - @Override - public int getRetainedEntries() { - final boolean hasEntries = - (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; - return (hasEntries ? seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0); - } - - @Override - //converts compact MemorySegment array of double[] to compact double[][] - public double[][] getValues() { - final int count = getRetainedEntries(); - final double[][] values = new double[count][]; - if (count > 0) { - int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count); - for (int i = 0; i < count; i++) { - final double[] array = new double[numValues_]; - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_); - values[i] = array; - valuesOffset += SIZE_OF_VALUE_BYTES * numValues_; - } - } - return values; - } - - @Override - //converts compact MemorySegment array of double[] to compact double[] - double[] getValuesAsOneDimension() { - final int count = getRetainedEntries(); - final int numDoubles = count * numValues_; - final double[] values = new double[numDoubles]; - if (count > 0) { - final int valuesOffset = ENTRIES_START + (SIZE_OF_KEY_BYTES * count); - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, values, 0, numDoubles); - } - return values; - } - - @Override - //converts compact MemorySegment array of long[] to compact long[] - long[] getKeys() { - final int count = getRetainedEntries(); - final long[] keys = new long[count]; - if (count > 0) { - for (int i = 0; i < count; i++) { - MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, ENTRIES_START, keys, 0, count); - } - } - return keys; - } - - @Override - public byte[] toByteArray() { - final int sizeBytes = getCurrentBytes(); - final byte[] byteArray = new byte[sizeBytes]; - final MemorySegment seg = MemorySegment.ofArray(byteArray); - MemorySegment.copy(seg_, 0, seg, 0, sizeBytes); - return byteArray; - } - - @Override - public ArrayOfDoublesSketchIterator iterator() { - return new DirectArrayOfDoublesSketchIterator( - seg_, ENTRIES_START, getRetainedEntries(), numValues_); - } - - @Override - short getSeedHash() { - return seg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); - } - - @Override - public boolean hasMemorySegment() { return true; } - - @Override - MemorySegment getMemorySegment() { return seg_; } - - private static void checkMemorySegmentSize(final MemorySegment seg, final int numEntries, - final int numValues) { - final int sizeNeeded = - ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); - if (sizeNeeded > seg.byteSize()) { - throw new SketchesArgumentException("Not enough space: need " + sizeNeeded - + " bytes, got " + seg.byteSize() + " bytes"); - } - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java deleted file mode 100644 index 52e8c24e3..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -/** - * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - */ -final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { - - private MemorySegment seg_; - - /** - * Creates an instance of a DirectArrayOfDoublesIntersection with a custom update seed - * @param numValues number of double values associated with each key - * @param seed See seed - * @param dstSeg the destination MemorySegment - */ - DirectArrayOfDoublesIntersection(final int numValues, final long seed, final MemorySegment dstSeg) { - super(numValues, seed); - seg_ = dstSeg; - } - - @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, - final long seed) { - return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, seg_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java deleted file mode 100644 index 0b8de6a71..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ /dev/null @@ -1,435 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.common.Util.checkSeedHashes; -import static org.apache.datasketches.common.Util.clear; -import static org.apache.datasketches.common.Util.clearBits; -import static org.apache.datasketches.common.Util.computeSeedHash; -import static org.apache.datasketches.common.Util.setBits; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteOrder; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.tuple2.SerializerDeserializer; -import org.apache.datasketches.tuple2.Util; - -/** - * Direct QuickSelect tuple sketch of type ArrayOfDoubles. - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - */ -class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch { - - // these values exist only on heap, never serialized - private MemorySegment seg_; - // these can be derived from the seg_ contents, but are kept here for performance - private int keysOffset_; - private int valuesOffset_; - - /** - * Construct a new sketch using the given MemorySegment as its backing store. - * - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * given value. - * @param lgResizeFactor log2(resize factor) - value from 0 to 3: - * 0 - no resizing (max size allocated), - * 1 - double internal hash table each time it reaches a threshold - * 2 - grow four times - * 3 - grow eight times (default) - * @param samplingProbability - * See Sampling Probability - * @param numValues Number of double values to keep for each key. - * @param seed See seed - * @param dstSeg the destination MemorySegment. - */ - DirectArrayOfDoublesQuickSelectSketch( - final int nomEntries, - final int lgResizeFactor, - final float samplingProbability, - final int numValues, - final long seed, - final MemorySegment dstSeg) { - this(checkMemorySegment(nomEntries, lgResizeFactor, numValues, dstSeg), - //SpotBugs CT_CONSTRUCTOR_THROW is false positive. - //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J - nomEntries, - lgResizeFactor, - samplingProbability, - numValues, - seed, - dstSeg); - } - - private DirectArrayOfDoublesQuickSelectSketch( - final boolean secure, //required part of Finalizer Attack prevention - final int nomEntries, - final int lgResizeFactor, - final float samplingProbability, - final int numValues, - final long seed, - final MemorySegment dstSeg) { - super(numValues, seed); - seg_ = dstSeg; - final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); - seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) - SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal()); - final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - seg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( - (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) - | (samplingProbability < 1f ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) - | (1 << Flags.IS_EMPTY.ordinal()) - )); - seg_.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues); - seg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(seed)); - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); - seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - seg_.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) Integer.numberOfTrailingZeros(nomEntries)); - seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); - seg_.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor); - seg_.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability); - seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); - keysOffset_ = ENTRIES_START; - valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity); - clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); - setRebuildThreshold(); - } - - private static final boolean checkMemorySegment( - final int nomEntries, - final int lgResizeFactor, - final int numValues, - final MemorySegment dstSeg) { - final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - checkMemorySegmentSize(dstSeg, startingCapacity, numValues); - return true; - } - - /** - * Wraps the given MemorySegment. - * @param seg the given MemorySegment - * @param seed update seed - */ - DirectArrayOfDoublesQuickSelectSketch( - final MemorySegment seg, - final long seed) { - this(checkSerVer_Endianness(seg), seg, seed); - //SpotBugs CT_CONSTRUCTOR_THROW is false positive. - //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J - } - - private DirectArrayOfDoublesQuickSelectSketch( - final boolean secure, //required part of Finalizer Attack prevention - final MemorySegment seg, - final long seed) { - super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed); - seg_ = seg; - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), - seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg_.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - - checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); - keysOffset_ = ENTRIES_START; - valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); - // to do: make parent take care of its own parts - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); - thetaLong_ = seg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); - isEmpty_ = (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - setRebuildThreshold(); - } - - private static final boolean checkSerVer_Endianness(final MemorySegment seg) { - final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); - if (version != serialVersionUID) { - throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID - + ", actual: " + version); - } - final boolean isBigEndian = - (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Byte order mismatch"); - } - return true; - } - - @Override - //converts MemorySegment hashTable of double[] to compacted double[][] - public double[][] getValues() { - final int count = getRetainedEntries(); - final double[][] values = new double[count][]; - if (count > 0) { - long keyOffset = keysOffset_; - long valuesOffset = valuesOffset_; - int cnt = 0; - for (int j = 0; j < getCurrentCapacity(); j++) { - if (seg_.get(JAVA_LONG_UNALIGNED, keyOffset) != 0) { - final double[] array = new double[numValues_]; - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset, array, 0, numValues_); - values[cnt++] = array; - } - keyOffset += SIZE_OF_KEY_BYTES; - valuesOffset += (long)SIZE_OF_VALUE_BYTES * numValues_; - } - } - return values; - } - - @Override - //converts heap hashTable of double[] to compacted double[] - double[] getValuesAsOneDimension() { - final int count = getRetainedEntries(); - final double[] values = new double[count * numValues_]; - final int cap = getCurrentCapacity(); - if (count > 0) { - long keyOffsetBytes = keysOffset_; - long valuesOffsetBytes = valuesOffset_; - int cnt = 0; - for (int j = 0; j < cap; j++) { - if (seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes) != 0) { - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffsetBytes, values, cnt++ * numValues_, numValues_); - } - keyOffsetBytes += SIZE_OF_KEY_BYTES; - valuesOffsetBytes += (long)SIZE_OF_VALUE_BYTES * numValues_; - } - assert cnt == count; - } - return values; - } - - @Override - //converts heap hashTable of long[] to compacted long[] - long[] getKeys() { - final int count = getRetainedEntries(); - final long[] keys = new long[count]; - final int cap = getCurrentCapacity(); - if (count > 0) { - long keyOffsetBytes = keysOffset_; - int cnt = 0; - for (int j = 0; j < cap; j++) { - final long key; - if ((key = seg_.get(JAVA_LONG_UNALIGNED, keyOffsetBytes)) != 0) { - keys[cnt++] = key; - } - keyOffsetBytes += SIZE_OF_KEY_BYTES; - } - assert cnt == count; - } - return keys; - } - - @Override - public int getRetainedEntries() { - return seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); - } - - @Override - public int getNominalEntries() { - return 1 << seg_.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); - } - - @Override - public ResizeFactor getResizeFactor() { - return ResizeFactor.getRF(seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE)); - } - - @Override - public float getSamplingProbability() { - return seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); - } - - @Override - public byte[] toByteArray() { - final int sizeBytes = getSerializedSizeBytes(); - final byte[] byteArray = new byte[sizeBytes]; - final MemorySegment seg = MemorySegment.ofArray(byteArray); - serializeInto(seg); - return byteArray; - } - - @Override - public ArrayOfDoublesSketchIterator iterator() { - return new DirectArrayOfDoublesSketchIterator(seg_, keysOffset_, getCurrentCapacity(), numValues_); - } - - @Override - public boolean hasMemorySegment() { return true; } - - @Override - MemorySegment getMemorySegment() { return seg_; } - - @Override - int getSerializedSizeBytes() { - return valuesOffset_ + (SIZE_OF_VALUE_BYTES * numValues_ * getCurrentCapacity()); - } - - @Override - void serializeInto(final MemorySegment seg) { - MemorySegment.copy(seg_, 0, seg, 0, seg.byteSize()); - } - - @Override - public void reset() { - if (!isEmpty_) { - isEmpty_ = true; - setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); - } - final int lgResizeFactor = seg_.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE); - final float samplingProbability = seg_.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); - final int startingCapacity = Util.getStartingCapacity(getNominalEntries(), lgResizeFactor); - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); - seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity)); - seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); - keysOffset_ = ENTRIES_START; - valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * startingCapacity); - clear(seg_, keysOffset_, (long) SIZE_OF_KEY_BYTES * startingCapacity); //clear keys only - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); - setRebuildThreshold(); - } - - @Override - protected long getKey(final int index) { - return seg_.get(JAVA_LONG_UNALIGNED, keysOffset_ + ((long) SIZE_OF_KEY_BYTES * index)); - } - - @Override - protected void incrementCount() { - final int count = seg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); - if (count == 0) { - setBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.HAS_ENTRIES.ordinal())); - } - seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count + 1); - } - - @Override - protected final int getCurrentCapacity() { - return 1 << seg_.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE); - } - - @Override - protected void setThetaLong(final long thetaLong) { - thetaLong_ = thetaLong; - seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - } - - @Override - protected void setValues(final int index, final double[] values) { - long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index); - for (int i = 0; i < numValues_; i++) { - seg_.set(JAVA_DOUBLE_UNALIGNED, offset, values[i]); - offset += SIZE_OF_VALUE_BYTES; - } - } - - @Override - protected void updateValues(final int index, final double[] values) { - long offset = valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index); - for (int i = 0; i < numValues_; i++) { - seg_.set(JAVA_DOUBLE_UNALIGNED, offset, seg_.get(JAVA_DOUBLE_UNALIGNED, offset) + values[i]); - offset += SIZE_OF_VALUE_BYTES; - } - } - - @Override - protected void setNotEmpty() { - if (isEmpty_) { - isEmpty_ = false; - clearBits(seg_, FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal())); - - } - } - - @Override - protected boolean isInSamplingMode() { - return (seg_.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_IN_SAMPLING_MODE.ordinal())) != 0; - } - - // rebuild in the same MemorySegment - @Override - protected void rebuild(final int newCapacity) { - final int numValues = getNumValues(); - checkMemorySegmentSize(seg_, newCapacity, numValues); - final int currCapacity = getCurrentCapacity(); - final long[] keys = new long[currCapacity]; - final double[] values = new double[currCapacity * numValues]; - MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, keysOffset_, keys, 0, currCapacity); - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_, values, 0, currCapacity * numValues); - - clear(seg_, keysOffset_, ((long) SIZE_OF_KEY_BYTES * newCapacity) + ((long) SIZE_OF_VALUE_BYTES * newCapacity * numValues)); - seg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); - seg_.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte)Integer.numberOfTrailingZeros(newCapacity)); - valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * newCapacity); - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity); - for (int i = 0; i < keys.length; i++) { - if ((keys[i] != 0) && (keys[i] < thetaLong_)) { - insert(keys[i], Arrays.copyOfRange(values, i * numValues, (i + 1) * numValues)); - } - } - setRebuildThreshold(); - } - - @Override - protected int insertKey(final long key) { - return HashOperations.hashInsertOnlyMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); - } - - @Override - protected int findOrInsertKey(final long key) { - return HashOperations.hashSearchOrInsertMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); - } - - @Override - protected double[] find(final long key) { - final int index = HashOperations.hashSearchMemorySegment(seg_, lgCurrentCapacity_, key, ENTRIES_START); - if (index == -1) { return null; } - final double[] array = new double[numValues_]; - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, valuesOffset_ - + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index), array, 0, numValues_); - return array; - } - - private static void checkMemorySegmentSize(final MemorySegment seg, final int numEntries, final int numValues) { - final int sizeNeeded = - ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues)) * numEntries); - if (sizeNeeded > seg.byteSize()) { - throw new SketchesArgumentException("Not enough space: need " - + sizeNeeded + " bytes, got " + seg.byteSize() + " bytes"); - } - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java deleted file mode 100644 index 7d2af2ba9..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesReadOnlyException; - -final class DirectArrayOfDoublesQuickSelectSketchR extends DirectArrayOfDoublesQuickSelectSketch { - - DirectArrayOfDoublesQuickSelectSketchR(final MemorySegment seg, final long seed) { - super(seg, seed); - } - - @Override - void insertOrIgnore(final long key, final double[] values) { - throw new SketchesReadOnlyException(); - } - - @Override - public void trim() { - throw new SketchesReadOnlyException(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java deleted file mode 100644 index 953853b91..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -/** - * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - */ -final class DirectArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { - - private MemorySegment seg_; - private int offset_; - private int numEntries_; - private int numValues_; - private int i_; - private static final int SIZE_OF_KEY_BYTES = 8; - private static final int SIZE_OF_VALUE_BYTES = 8; - - DirectArrayOfDoublesSketchIterator(final MemorySegment seg, final int offset, final int numEntries, - final int numValues) { - seg_ = seg; - offset_ = offset; - numEntries_ = numEntries; - numValues_ = numValues; - i_ = -1; - } - - @Override - public boolean next() { - i_++; - while (i_ < numEntries_) { - final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_); - if (seg_.get(JAVA_LONG_UNALIGNED, off) != 0) { return true; } - i_++; - } - return false; - } - - @Override - public long getKey() { - final long off = offset_ + ((long) SIZE_OF_KEY_BYTES * i_); - return seg_.get(JAVA_LONG_UNALIGNED, off); - } - - @Override - public double[] getValues() { - long off; - if (numValues_ == 1) { - off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_); - return new double[] { seg_.get(JAVA_DOUBLE_UNALIGNED, off) }; - } - final double[] array = new double[numValues_]; - off = offset_ + ((long) SIZE_OF_KEY_BYTES * numEntries_) + ((long) SIZE_OF_VALUE_BYTES * i_ * numValues_); - MemorySegment.copy(seg_, JAVA_DOUBLE_UNALIGNED, off, array, 0, numValues_); - return array; - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java deleted file mode 100644 index 954a1d916..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.SerializerDeserializer; - -/** - * Direct Union operation for tuple sketches of type ArrayOfDoubles. - * - *

              This implementation uses data in a given MemorySegment that is owned and managed by the caller. - * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for - * the JVM to perform garbage collection.

              - */ -class DirectArrayOfDoublesUnion extends ArrayOfDoublesUnion { - - final MemorySegment seg_; - - /** - * Creates an instance of DirectArrayOfDoublesUnion - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than given value. - * @param numValues Number of double values to keep for each key. - * @param seed See seed - * @param dstSeg the destination MemorySegment - */ - DirectArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed, - final MemorySegment dstSeg) { - super(new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed, - dstSeg.asSlice(PREAMBLE_SIZE_BYTES, dstSeg.byteSize() - PREAMBLE_SIZE_BYTES))); - seg_ = dstSeg; - seg_.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); // unused, always 1 - seg_.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - seg_.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - seg_.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesUnion.ordinal()); - seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, gadget_.getThetaLong()); - } - - //Called from wrapUnion below and extended by DirectArrayOfDoublesUnionR - DirectArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) { - super(gadget); - seg_ = seg; - unionThetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); - } - - @Override - void setUnionThetaLong(final long thetaLong) { - super.setUnionThetaLong(thetaLong); - seg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); - } - - static ArrayOfDoublesUnion wrapUnion(final MemorySegment seg, final long seed, final boolean isWritable) { - final byte version = seg.get(JAVA_BYTE, ArrayOfDoublesUnion.SERIAL_VERSION_BYTE); - if (version != ArrayOfDoublesUnion.serialVersionUID) { - throw new SketchesArgumentException("Serial version mismatch. Expected: " - + serialVersionUID + ", actual: " + version); - } - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesUnion); - - if (isWritable) { - final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); - return new DirectArrayOfDoublesUnion(new DirectArrayOfDoublesQuickSelectSketch(sketchSeg, seed), seg); - } - final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); - return new DirectArrayOfDoublesUnionR(new DirectArrayOfDoublesQuickSelectSketchR(sketchSeg, seed), seg); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java deleted file mode 100644 index 13d3add4d..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesUnionR.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesReadOnlyException; - -final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion { - - /** - * Wraps the given MemorySegment. - * @param gadget the ArrayOfDoublesQuickSelectSketch - * @param seg the destination MemorySegment - */ - DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final MemorySegment seg) { - super(gadget, seg); - } - - @Override - public void union(final ArrayOfDoublesSketch tupleSketch) { - throw new SketchesReadOnlyException(); - } - - @Override - public void reset() { - throw new SketchesReadOnlyException(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java deleted file mode 100644 index 0da06c5fc..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HashTables.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.Math.ceil; -import static java.lang.Math.max; -import static java.lang.Math.min; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; - -import org.apache.datasketches.thetacommon2.ThetaUtil; - -class HashTables { - private long[] hashTable = null; - private double[][] valueTable = null; - private int numValues = 0; - private int lgTableSize = 0; - private int numKeys = 0; - - //Construct from sketch - HashTables(final ArrayOfDoublesSketch sketchIn) { - numKeys = sketchIn.getRetainedEntries(); - numValues = sketchIn.getNumValues(); - - lgTableSize = getLgTableSize(numKeys); - final int tableSize = 1 << lgTableSize; - hashTable = new long[tableSize]; - valueTable = new double[tableSize][]; - final ArrayOfDoublesSketchIterator it = sketchIn.iterator(); - - while (it.next()) { - final long hash = it.getKey(); - final int index = hashInsertOnly(hashTable, lgTableSize, hash); - valueTable[index] = new double[numValues]; - System.arraycopy(it.getValues(), 0, valueTable[index], 0, numValues); - } - } - - //Construct: Load the hash and value tables from packed hash and value arrays - private HashTables(final long[] hashArr, final double[][] valuesArr, final int numKeys, final int numValues) { - this.numValues = numValues; - this.numKeys = numKeys; - lgTableSize = getLgTableSize(numKeys); - - final int tableSize = 1 << lgTableSize; - hashTable = new long[tableSize]; - valueTable = new double[tableSize][]; - - for (int i = 0; i < numKeys; i++) { - final long hash = hashArr[i]; - final int index = hashInsertOnly(hashTable, lgTableSize, hash); - valueTable[index] = new double[numValues]; - System.arraycopy(valuesArr[i], 0, valueTable[index], 0, numValues); - } - } - - HashTables getIntersectHashTables( - final ArrayOfDoublesSketch nextTupleSketch, - final long thetaLong, - final ArrayOfDoublesCombiner combiner) { - //Match nextSketch data with local instance data, filtering by theta - final int maxMatchSize = min(numKeys, nextTupleSketch.getRetainedEntries()); - assert numValues == nextTupleSketch.numValues_; - final long[] matchHashArr = new long[maxMatchSize]; - final double[][] matchValuesArr = new double[maxMatchSize][]; - - //Copy the intersecting items from local hashTables_ - // sequentially into local packed matchHashArr_ and matchValuesArr - int matchCount = 0; - final ArrayOfDoublesSketchIterator it = nextTupleSketch.iterator(); - while (it.next()) { - final long hash = it.getKey(); - if (hash >= thetaLong) { continue; } - final int index = hashSearch(hashTable, lgTableSize, hash); - if (index < 0) { continue; } - matchHashArr[matchCount] = hash; - matchValuesArr[matchCount] = combiner.combine(valueTable[index], it.getValues()); - matchCount++; - } - return new HashTables(matchHashArr, matchValuesArr, matchCount, numValues); - } - - int getNumKeys() { - return numKeys; - } - - int getNumValues() { - return numValues; - } - - long[] getHashTable() { - return hashTable; - } - - double[][] getValueTable() { - return valueTable; - } - - void clear() { - hashTable = null; - valueTable = null; - numValues = 0; - lgTableSize = 0; - numKeys = 0; - } - - static int getLgTableSize(final int numKeys) { - final int tableSize = max(ceilingPowerOf2((int) ceil(numKeys / 0.75)), 1 << ThetaUtil.MIN_LG_NOM_LONGS); - return Integer.numberOfTrailingZeros(tableSize); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java deleted file mode 100644 index 8741fc639..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketch.java +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteOrder; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.tuple2.SerializerDeserializer; - -/** - * The on-heap implementation of tuple Compact Sketch of type ArrayOfDoubles. - */ -final class HeapArrayOfDoublesCompactSketch extends ArrayOfDoublesCompactSketch { - - private final short seedHash_; - private long[] keys_; - private double[] values_; - - /** - * Converts the given UpdatableArrayOfDoublesSketch to this compact form. - * @param sketch the given UpdatableArrayOfDoublesSketch - */ - HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch) { - this(sketch, sketch.getThetaLong()); - } - - /** - * Converts the given UpdatableArrayOfDoublesSketch to this compact form - * trimming if necessary according to given thetaLong - * @param sketch the given UpdatableArrayOfDoublesSketch - * @param thetaLong new value of thetaLong - */ - HeapArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final long thetaLong) { - super(sketch.getNumValues()); - isEmpty_ = sketch.isEmpty(); - thetaLong_ = Math.min(sketch.getThetaLong(), thetaLong); - seedHash_ = Util.computeSeedHash(sketch.getSeed()); - final int count = sketch.getRetainedEntries(); - if (count > 0) { - keys_ = new long[count]; - values_ = new double[count * numValues_]; - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - int i = 0; - while (it.next()) { - final long key = it.getKey(); - if (key < thetaLong_) { - keys_[i] = key; - System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_); - i++; - } - } - // trim if necessary - if (i < count) { - if (i == 0) { - keys_ = null; - values_ = null; - } else { - keys_ = Arrays.copyOf(keys_, i); - values_ = Arrays.copyOf(values_, i * numValues_); - } - } - } - } - - /* - * Creates an instance from components - */ - HeapArrayOfDoublesCompactSketch(final long[] keys, final double[] values, final long thetaLong, - final boolean isEmpty, final int numValues, final short seedHash) { - super(numValues); - keys_ = keys; - values_ = values; - thetaLong_ = thetaLong; - isEmpty_ = isEmpty; - seedHash_ = seedHash; - } - - /** - * This is to create an instance given a serialized form - * @param seg the destination segment - */ - HeapArrayOfDoublesCompactSketch(final MemorySegment seg) { - this(seg, Util.DEFAULT_UPDATE_SEED); - } - - /** - * This is to create an instance given a serialized form - * @param seg the source MemorySegment - * @param seed See seed - */ - HeapArrayOfDoublesCompactSketch(final MemorySegment seg, final long seed) { - super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE)); - seedHash_ = seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), - seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch); - final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); - if (version != serialVersionUID) { - throw new SketchesArgumentException( - "Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); - } - final boolean isBigEndian = - (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_BIG_ENDIAN.ordinal())) != 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Byte order mismatch"); - } - Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); - isEmpty_ = (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); - final boolean hasEntries = - (seg.get(JAVA_BYTE, FLAGS_BYTE) & (1 << Flags.HAS_ENTRIES.ordinal())) != 0; - if (hasEntries) { - final int count = seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); - keys_ = new long[count]; - values_ = new double[count * numValues_]; - MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, count); - MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_, 0, values_.length); - } - } - - @Override - public ArrayOfDoublesCompactSketch compact(final MemorySegment dstSeg) { - if (dstSeg == null) { - return new - HeapArrayOfDoublesCompactSketch(keys_.clone(), values_.clone(), thetaLong_, isEmpty_, numValues_, seedHash_); - } else { - final byte[] byteArr = this.toByteArray(); - MemorySegment.copy(byteArr, 0, dstSeg, JAVA_BYTE, 0, byteArr.length); - return new DirectArrayOfDoublesCompactSketch(dstSeg); - } - } - - @Override - public int getRetainedEntries() { - return keys_ == null ? 0 : keys_.length; - } - - @Override - public byte[] toByteArray() { - final int count = getRetainedEntries(); - final int sizeBytes = getCurrentBytes(); - final byte[] bytes = new byte[sizeBytes]; - final MemorySegment seg = MemorySegment.ofArray(bytes); - seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); - seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, (byte) SerializerDeserializer.SketchType.ArrayOfDoublesCompactSketch.ordinal()); - final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) ( - ((isBigEndian ? 1 : 0) << Flags.IS_BIG_ENDIAN.ordinal()) - | ((isEmpty() ? 1 : 0) << Flags.IS_EMPTY.ordinal()) - | ((count > 0 ? 1 : 0) << Flags.HAS_ENTRIES.ordinal()) - )); - seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); - seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - if (count > 0) { - seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count); - MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, count); - MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * count), values_.length); - } - return bytes; - } - - @Override - //converts compact heap array of double[] to compact double[][] - public double[][] getValues() { - final int count = getRetainedEntries(); - final double[][] values = new double[count][]; - if (count > 0) { - int i = 0; - for (int j = 0; j < count; j++) { - values[i++] = Arrays.copyOfRange(values_, j * numValues_, (j + 1) * numValues_); - } - } - return values; - } - - @Override - double[] getValuesAsOneDimension() { - return values_.clone(); - } - - @Override - long[] getKeys() { - return keys_.clone(); - } - - @Override - public ArrayOfDoublesSketchIterator iterator() { - return new HeapArrayOfDoublesSketchIterator(keys_, values_, numValues_); - } - - @Override - short getSeedHash() { - return seedHash_; - } - - @Override - public boolean hasMemorySegment() { return false; } - - @Override - MemorySegment getMemorySegment() { return null; } -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java deleted file mode 100644 index dc0383567..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesIntersection.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -/** - * On-heap implementation of intersection set operation for tuple sketches of type - * ArrayOfDoubles. - */ -final class HeapArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { - - /** - * Creates an instance of a HeapArrayOfDoublesIntersection with a custom update seed - * @param numValues number of double values associated with each key - * @param seed See seed - */ - HeapArrayOfDoublesIntersection(final int numValues, final long seed) { - super(numValues, seed); - } - - @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { - return new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java deleted file mode 100644 index 5d8744754..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketch.java +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; -import static org.apache.datasketches.common.Util.ceilingPowerOf2; -import static org.apache.datasketches.common.Util.checkSeedHashes; -import static org.apache.datasketches.common.Util.computeSeedHash; -import static org.apache.datasketches.common.Util.exactLog2OfLong; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteOrder; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.thetacommon2.HashOperations; -import org.apache.datasketches.tuple2.SerializerDeserializer; -import org.apache.datasketches.tuple2.Util; - -/** - * The on-heap implementation of the tuple QuickSelect sketch of type ArrayOfDoubles. - */ - -final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSketch { - - private final int lgNomEntries_; - private final int lgResizeFactor_; - private final float samplingProbability_; - - private int count_; - private long[] keys_; - private double[] values_; - - /** - * This is to create an instance of a QuickSelectSketch with custom resize factor and sampling - * probability - * @param nomEntries Nominal number of entries. Forced to the smallest power of 2 greater than - * or equal to the given value. - * @param lgResizeFactor log2(resize factor) - value from 0 to 3: - * 0 - no resizing (max size allocated), - * 1 - double internal hash table each time it reaches a threshold - * 2 - grow four times - * 3 - grow eight times (default) - * @param samplingProbability - * See Sampling Probability - * @param numValues number of double values to keep for each key - * @param seed See seed - */ - HeapArrayOfDoublesQuickSelectSketch(final int nomEntries, final int lgResizeFactor, - final float samplingProbability, final int numValues, final long seed) { - super(numValues, seed); - lgNomEntries_ = exactLog2OfLong(ceilingPowerOf2(nomEntries)); - lgResizeFactor_ = lgResizeFactor; - samplingProbability_ = samplingProbability; - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); - final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - keys_ = new long[startingCapacity]; - values_ = new double[startingCapacity * numValues]; - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); - setRebuildThreshold(); - } - - /** - * This is to create an instance given a serialized form - * @param seg the source MemorySegment - * @param seed See seed - */ - HeapArrayOfDoublesQuickSelectSketch(final MemorySegment seg, final long seed) { - super(seg.get(JAVA_BYTE, NUM_VALUES_BYTE), seed); - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), - seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); - if (version != serialVersionUID) { - throw new SketchesArgumentException("Serial version mismatch. Expected: " - + serialVersionUID + ", actual: " + version); - } - final byte flags = seg.get(JAVA_BYTE, FLAGS_BYTE); - final boolean isBigEndian = (flags & (1 << Flags.IS_BIG_ENDIAN.ordinal())) > 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Byte order mismatch"); - } - checkSeedHashes(seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT), computeSeedHash(seed)); - isEmpty_ = (flags & (1 << Flags.IS_EMPTY.ordinal())) > 0; - lgNomEntries_ = seg.get(JAVA_BYTE, LG_NOM_ENTRIES_BYTE); - thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); - final int currentCapacity = 1 << seg.get(JAVA_BYTE, LG_CUR_CAPACITY_BYTE); - lgResizeFactor_ = seg.get(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE); - samplingProbability_ = seg.get(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT); - keys_ = new long[currentCapacity]; - values_ = new double[currentCapacity * numValues_]; - final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; - count_ = hasEntries ? seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) : 0; - if (count_ > 0) { - MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_, 0, currentCapacity); - final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * currentCapacity); - MemorySegment.copy(seg, JAVA_DOUBLE_UNALIGNED, off, values_, 0, currentCapacity * numValues_); - - } - setRebuildThreshold(); - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(currentCapacity); - } - - @Override - //converts heap hashTable of double[] to compacted double[][] - public double[][] getValues() { - final int numVal = numValues_; - final int count = getRetainedEntries(); - final double[][] values = new double[count][]; - if (count > 0) { - int cnt = 0; - for (int j = 0; j < keys_.length; j++) { - if (keys_[j] == 0) { continue; } - values[cnt++] = Arrays.copyOfRange(values_, j * numVal, (j + 1) * numVal); - } - assert cnt == count; - } - return values; - } - - @Override - //converts heap hashTable of double[] to compacted double[] - double[] getValuesAsOneDimension() { - final int numVal = numValues_; - final int count = getRetainedEntries(); - final double[] values = new double[count * numVal]; - if (count > 0) { - int cnt = 0; - for (int j = 0; j < keys_.length; j++) { - if (keys_[j] == 0) { continue; } - System.arraycopy(values_, j * numVal, values, cnt++ * numVal, numVal); - } - assert cnt == count; - } - return values; - } - - @Override - //converts heap hashTable of long[] to compacted long[] - long[] getKeys() { - final int count = getRetainedEntries(); - final long[] keysArr = new long[count]; - if (count > 0) { - int cnt = 0; - for (int j = 0; j < keys_.length; j++) { - if (keys_[j] == 0) { continue; } - keysArr[cnt++] = keys_[j]; - } - assert cnt == count; - } - return keysArr; - } - - @Override - public int getRetainedEntries() { - return count_; - } - - @Override - public int getNominalEntries() { - return 1 << lgNomEntries_; - } - - @Override - public float getSamplingProbability() { - return samplingProbability_; - } - - @Override - public ResizeFactor getResizeFactor() { - return ResizeFactor.getRF(lgResizeFactor_); - } - - @Override - public byte[] toByteArray() { - final byte[] byteArray = new byte[getSerializedSizeBytes()]; - final MemorySegment seg = MemorySegment.ofArray(byteArray); - serializeInto(seg); - return byteArray; - } - - @Override - public ArrayOfDoublesSketchIterator iterator() { - return new HeapArrayOfDoublesSketchIterator(keys_, values_, numValues_); - } - - @Override - int getSerializedSizeBytes() { - return ENTRIES_START + ((SIZE_OF_KEY_BYTES + (SIZE_OF_VALUE_BYTES * numValues_)) * getCurrentCapacity()); - } - - // X/Y: X = Byte index for just AoDQuickSelectSketch - // Y = Byte index when combined with Union Preamble - // Long || Start Byte Adr: - // Adr: - // First 16 bytes are preamble from AoDUnion - // || 7/23 | 6/22 | 5/21 | 4/20 | 3/19 | 2/18 | 1/17 | 0/16 | - // 0/2 || Seed Hash | #Dbls | Flags | SkType2 | FamID | SerVer | Preamble_Longs | - // || 15/31 | 14/30 | 13/29 | 12/28 | 11/27 | 10/26 | 9/25 | 8/24 | - // 1/3 ||------------------------------Theta Long----------------------------------------------| - // || 23/39 | 22/38 | 21/37 | 20/36 | 19/35 | 18/34 | 17/33 | 16/32 | - // 2/4 || Sampling P Float | | LgRF |lgCapLongs| LgNomEntries | - // || 31/47 | 30/46 | 29/45 | 28/44 | 27/43 | 26/42 | 25/41 | 24/40 | - // 3/5 || | Retained Entries Int | - // || | 32/48 | - // 4/6 || Keys Array longs * keys[] Length | - // || Values Array doubles * values[] Length | - - @Override - void serializeInto(final MemorySegment seg) { - seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) 1); - seg.set(JAVA_BYTE, SERIAL_VERSION_BYTE, serialVersionUID); - seg.set(JAVA_BYTE, FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); - seg.set(JAVA_BYTE, SKETCH_TYPE_BYTE, - (byte) SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch.ordinal()); - final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN); - seg.set(JAVA_BYTE, FLAGS_BYTE, (byte)( - (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) - | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) - | (isEmpty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) - )); - seg.set(JAVA_BYTE, NUM_VALUES_BYTE, (byte) numValues_); - seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, computeSeedHash(seed_)); - seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); - seg.set(JAVA_BYTE, LG_NOM_ENTRIES_BYTE, (byte) lgNomEntries_); - seg.set(JAVA_BYTE, LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(keys_.length)); - seg.set(JAVA_BYTE, LG_RESIZE_FACTOR_BYTE, (byte) lgResizeFactor_); - seg.set(JAVA_FLOAT_UNALIGNED, SAMPLING_P_FLOAT, samplingProbability_); - seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, count_); - if (count_ > 0) { - MemorySegment.copy(keys_, 0, seg, JAVA_LONG_UNALIGNED, ENTRIES_START, keys_.length); - final long off = ENTRIES_START + ((long) SIZE_OF_KEY_BYTES * keys_.length); - MemorySegment.copy(values_, 0, seg, JAVA_DOUBLE_UNALIGNED, off, values_.length); - } - } - - @Override - public boolean hasMemorySegment() { return false; } - - @Override - MemorySegment getMemorySegment() { return null; } - - @Override - public void reset() { - isEmpty_ = true; - count_ = 0; - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); - final int startingCapacity = Util.getStartingCapacity(1 << lgNomEntries_, lgResizeFactor_); - keys_ = new long[startingCapacity]; - values_ = new double[startingCapacity * numValues_]; - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); - setRebuildThreshold(); - } - - @Override - protected long getKey(final int index) { - return keys_[index]; - } - - @Override - protected void incrementCount() { - count_++; - } - - @Override - protected void setValues(final int index, final double[] values) { - if (numValues_ == 1) { - values_[index] = values[0]; - } else { - System.arraycopy(values, 0, values_, index * numValues_, numValues_); - } - } - - @Override - protected void updateValues(final int index, final double[] values) { - if (numValues_ == 1) { - values_[index] += values[0]; - } else { - final int offset = index * numValues_; - for (int i = 0; i < numValues_; i++) { - values_[offset + i] += values[i]; - } - } - } - - @Override - protected void setNotEmpty() { - isEmpty_ = false; - } - - @Override - protected boolean isInSamplingMode() { - return samplingProbability_ < 1f; - } - - @Override - protected void setThetaLong(final long thetaLong) { - thetaLong_ = thetaLong; - } - - @Override - protected int getCurrentCapacity() { - return keys_.length; - } - - @Override - protected void rebuild(final int newCapacity) { - final long[] oldKeys = keys_; - final double[] oldValues = values_; - keys_ = new long[newCapacity]; - values_ = new double[newCapacity * numValues_]; - count_ = 0; - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newCapacity); - for (int i = 0; i < oldKeys.length; i++) { - if ((oldKeys[i] != 0) && (oldKeys[i] < thetaLong_)) { - insert(oldKeys[i], Arrays.copyOfRange(oldValues, i * numValues_, (i + 1) * numValues_)); - } - } - setRebuildThreshold(); - } - - @Override - protected int insertKey(final long key) { - return HashOperations.hashInsertOnly(keys_, lgCurrentCapacity_, key); - } - - @Override - protected int findOrInsertKey(final long key) { - return HashOperations.hashSearchOrInsert(keys_, lgCurrentCapacity_, key); - } - - @Override - protected double[] find(final long key) { - final int index = HashOperations.hashSearch(keys_, lgCurrentCapacity_, key); - if (index == -1) { return null; } - return Arrays.copyOfRange(values_, index * numValues_, (index + 1) * numValues_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java deleted file mode 100644 index 7d77978e8..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.util.Arrays; - -/** - * Iterator over the on-heap ArrayOfDoublesSketch (compact or hash table) - */ -final class HeapArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { - - private long[] keys_; - private double[] values_; - private int numValues_; - private int i_; - - HeapArrayOfDoublesSketchIterator(final long[] keys, final double[] values, final int numValues) { - keys_ = keys; - values_ = values; - numValues_ = numValues; - i_ = -1; - } - - @Override - public boolean next() { - if (keys_ == null) { return false; } - i_++; - while (i_ < keys_.length) { - if (keys_[i_] != 0) { return true; } - i_++; - } - return false; - } - - @Override - public long getKey() { - return keys_[i_]; - } - - @Override - public double[] getValues() { - if (numValues_ == 1) { - return new double[] { values_[i_] }; - } - return Arrays.copyOfRange(values_, i_ * numValues_, (i_ + 1) * numValues_); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java deleted file mode 100644 index 6603aad95..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesUnion.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.SerializerDeserializer; - -/** - * The on-heap implementation of the Union set operation for tuple sketches of type - * ArrayOfDoubles. - */ -final class HeapArrayOfDoublesUnion extends ArrayOfDoublesUnion { - - /** - * Creates an instance of HeapArrayOfDoublesUnion with a custom seed - * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than - * given value. - * @param numValues Number of double values to keep for each key. - * @param seed See seed - */ - HeapArrayOfDoublesUnion(final int nomEntries, final int numValues, final long seed) { - super(new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 3, 1f, numValues, seed)); - } - - HeapArrayOfDoublesUnion(final ArrayOfDoublesQuickSelectSketch gadget, final long unionThetaLong) { - super(gadget); - unionThetaLong_ = unionThetaLong; - } - - /** - * This is to create an instance given a serialized form and a custom seed - * @param seg the source MemorySegment - * @param seed See seed - * @return a ArrayOfDoublesUnion on the Java heap - */ - static ArrayOfDoublesUnion heapifyUnion(final MemorySegment seg, final long seed) { - final byte version = seg.get(JAVA_BYTE, SERIAL_VERSION_BYTE); - if (version != serialVersionUID) { - throw new SketchesArgumentException("Serial version mismatch. Expected: " - + serialVersionUID + ", actual: " + version); - } - SerializerDeserializer.validateFamily(seg.get(JAVA_BYTE, FAMILY_ID_BYTE), seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE)); - SerializerDeserializer.validateType(seg.get(JAVA_BYTE, SKETCH_TYPE_BYTE), - SerializerDeserializer.SketchType.ArrayOfDoublesUnion); - - final MemorySegment sketchSeg = seg.asSlice(PREAMBLE_SIZE_BYTES, seg.byteSize() - PREAMBLE_SIZE_BYTES); - final ArrayOfDoublesQuickSelectSketch sketch = new HeapArrayOfDoublesQuickSelectSketch(sketchSeg, seed); - return new HeapArrayOfDoublesUnion(sketch, seg.get(JAVA_LONG_UNALIGNED, THETA_LONG)); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java b/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java deleted file mode 100644 index 5044b0e3e..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/arrayofdoubles/package-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * This package is for a concrete implementation of the Tuple sketch for an array of double values. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; diff --git a/src/main/java/org/apache/datasketches/tuple2/package-info.java b/src/main/java/org/apache/datasketches/tuple2/package-info.java deleted file mode 100644 index 1cb15c83e..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/package-info.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * The tuple package contains a number of sketches based on the same - * fundamental algorithms of the Theta Sketch Framework and extend these - * concepts for whole new families of sketches. - */ -package org.apache.datasketches.tuple2; diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java deleted file mode 100644 index 3c8129bc0..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketch.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import static org.apache.datasketches.tuple2.Util.stringArrHash; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.tuple2.UpdatableSketch; - -/** - * Extends UpdatableSketch<String[], ArrayOfStringsSummary> - * @author Lee Rhodes - */ -public class ArrayOfStringsSketch extends UpdatableSketch { - - /** - * Constructs new sketch with default K = 4096 (lgK = 12), default ResizeFactor=X8, - * and default p = 1.0. - */ - public ArrayOfStringsSketch() { - this(12); - } - - /** - * Constructs new sketch with default ResizeFactor=X8, default p = 1.0 and given lgK. - * @param lgK Log_base2 of Nominal Entries. - * See Nominal Entries - */ - public ArrayOfStringsSketch(final int lgK) { - this(lgK, ResizeFactor.X8, 1.0F); - } - - /** - * Constructs new sketch with given ResizeFactor, p and lgK. - * @param lgK Log_base2 of Nominal Entries. - * See Nominal Entries - * @param rf ResizeFactor - * See Resize Factor - * @param p sampling probability - * See Sampling Probability - */ - public ArrayOfStringsSketch(final int lgK, final ResizeFactor rf, final float p) { - super(1 << lgK, rf.lg(), p, new ArrayOfStringsSummaryFactory()); - } - - /** - * Constructs this sketch from a MemorySegment image, which must be from an ArrayOfStringsSketch, and - * usually with data. - * @param seg the given MemorySegment - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. - * This capability will be removed in a future release. - * Heapifying a CompactSketch is not deprecated. - */ - @Deprecated - public ArrayOfStringsSketch(final MemorySegment seg) { - super(seg, new ArrayOfStringsSummaryDeserializer(), new ArrayOfStringsSummaryFactory()); - } - - /** - * Copy Constructor - * @param sketch the sketch to copy - */ - public ArrayOfStringsSketch(final ArrayOfStringsSketch sketch) { - super(sketch); - } - - /** - * @return a deep copy of this sketch - */ - @Override - public ArrayOfStringsSketch copy() { - return new ArrayOfStringsSketch(this); - } - - /** - * Updates the sketch with String arrays for both key and value. - * @param strArrKey the given String array key - * @param strArr the given String array value - */ - public void update(final String[] strArrKey, final String[] strArr) { - super.update(stringArrHash(strArrKey), strArr); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java deleted file mode 100644 index 1eed22fe1..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummary.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.datasketches.tuple2.Util.stringArrHash; -import static org.apache.datasketches.tuple2.Util.stringConcat; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.UpdatableSummary; - -/** - * Implements UpdatableSummary<String[]> - * @author Lee Rhodes - */ -public final class ArrayOfStringsSummary implements UpdatableSummary { - - private String[] stringArr = null; - - ArrayOfStringsSummary() { //required for ArrayOfStringsSummaryFactory - stringArr = null; - } - - //Used by copy() and in test - ArrayOfStringsSummary(final String[] stringArr) { - this.stringArr = stringArr.clone(); - checkNumNodes(stringArr.length); - } - - //used by fromMemorySegment and in test - /** - * This reads a MemorySegment that has a layout similar to the C struct: - * {@snippet : - * typedef struct { - * int totBytes; - * byte nodes; //number of Nodes. - * Node[nodes] = { Node[0], Node[1], ... } - * } - * } - * Where a Node has a layout similar to the C struct: - * {@snippet : - * typedef struct { - * int numBytes; - * byte[] byteArray; //UTF-8 byte array. Not null terminated. - * } - * } - * @param seg the MemorySegment containing the Summary data - */ - ArrayOfStringsSummary(final MemorySegment seg) { - int pos = 0; - final int totBytes = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES; - checkInBytes(seg, totBytes); - final int nodes = seg.get(JAVA_BYTE, pos); pos += Byte.BYTES; - checkNumNodes(nodes); - final String[] stringArr = new String[nodes]; - for (int i = 0; i < nodes; i++) { - final int len = seg.get(JAVA_INT_UNALIGNED, pos); pos += Integer.BYTES; - final byte[] byteArr = new byte[len]; - MemorySegment.copy(seg, JAVA_BYTE, pos, byteArr, 0, len); pos += len; - stringArr[i] = new String(byteArr, UTF_8); - } - assert pos == totBytes; - this.stringArr = stringArr; - } - - @Override - public ArrayOfStringsSummary copy() { - final ArrayOfStringsSummary nodes = new ArrayOfStringsSummary(stringArr); - return nodes; - } - - @Override - public byte[] toByteArray() { - final ComputeBytes cb = new ComputeBytes(stringArr); - final int totBytes = cb.totBytes_; - final byte[] out = new byte[totBytes]; - final MemorySegment wseg = MemorySegment.ofArray(out); - int pos = 0; - wseg.set(JAVA_INT_UNALIGNED, pos, totBytes); pos += Integer.BYTES; - final int numNodes = cb.numNodes_; - wseg.set(JAVA_BYTE, pos, (byte)numNodes); pos += Byte.BYTES; - for (int i = 0; i < numNodes; i++) { - final int nodeLen = cb.nodeLengthsArr_[i]; - wseg.set(JAVA_INT_UNALIGNED, pos, nodeLen); pos += Integer.BYTES; - MemorySegment.copy(cb.nodeBytesArr_[i], 0, wseg, JAVA_BYTE, pos, nodeLen); pos += nodeLen; - } - assert pos == totBytes; - return out; - } - - //From UpdatableSummary - - @Override - public ArrayOfStringsSummary update(final String[] value) { - if (stringArr == null) { - stringArr = value.clone(); - } - return this; - } - - //From Object - - @Override - public int hashCode() { - return (int) stringArrHash(stringArr); - } - - @Override - public boolean equals(final Object summary) { - if (summary == null || !(summary instanceof ArrayOfStringsSummary)) { - return false; - } - final String thatStr = stringConcat(((ArrayOfStringsSummary) summary).stringArr); - final String thisStr = stringConcat(stringArr); - return thisStr.equals(thatStr); - } - - /** - * Returns the nodes array for this summary. - * @return the nodes array for this summary. - */ - public String[] getValue() { - return stringArr.clone(); - } - - //also used in test - static void checkNumNodes(final int numNodes) { - if (numNodes > 127 || numNodes < 0) { - throw new SketchesArgumentException("Number of nodes cannot exceed 127 or be negative."); - } - } - - //also used in test - static void checkInBytes(final MemorySegment seg, final int totBytes) { - if (seg.byteSize() < totBytes) { - throw new SketchesArgumentException("Incoming MemorySegment has insufficient capacity."); - } - } - - /** - * Computes total bytes and number of nodes from the given string array. - */ - private static class ComputeBytes { - final byte numNodes_; - final int[] nodeLengthsArr_; - final byte[][] nodeBytesArr_; - final int totBytes_; - - ComputeBytes(final String[] stringArr) { - numNodes_ = (byte) stringArr.length; - checkNumNodes(numNodes_); - nodeLengthsArr_ = new int[numNodes_]; - nodeBytesArr_ = new byte[numNodes_][]; - int sumNodeBytes = 0; - for (int i = 0; i < numNodes_; i++) { - nodeBytesArr_[i] = stringArr[i].getBytes(UTF_8); - nodeLengthsArr_[i] = nodeBytesArr_[i].length; - sumNodeBytes += nodeLengthsArr_[i]; - } - totBytes_ = sumNodeBytes + (numNodes_ + 1) * Integer.BYTES + 1; - } - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java deleted file mode 100644 index 3a82da6df..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryDeserializer.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.tuple2.DeserializeResult; -import org.apache.datasketches.tuple2.SummaryDeserializer; - -/** - * Implements SummaryDeserializer<ArrayOfStringsSummary> - * @author Lee Rhodes - */ -public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer { - - @Override - public DeserializeResult heapifySummary(final MemorySegment seg) { - return ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); - } - - /** - * Also used in test. - * @param seg the given MemorySegment - * @return the DeserializeResult - */ - static DeserializeResult fromMemorySegment(final MemorySegment seg) { - final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(seg); - final int totBytes = seg.get(JAVA_INT_UNALIGNED, 0); - return new DeserializeResult<>(nsum, totBytes); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java deleted file mode 100644 index b0fb5a539..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryFactory.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import org.apache.datasketches.tuple2.SummaryFactory; - -/** - * Implements SummaryFactory<ArrayOfStringsSummary> - * @author Lee Rhodes - */ -public class ArrayOfStringsSummaryFactory implements SummaryFactory { - - @Override - public ArrayOfStringsSummary newSummary() { - return new ArrayOfStringsSummary(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java deleted file mode 100644 index 07225f45c..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummarySetOperations.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import org.apache.datasketches.tuple2.SummarySetOperations; - -/** - * Implements SummarySetOperations<ArrayOfStringsSummary> - * @author Lee Rhodes - */ -public class ArrayOfStringsSummarySetOperations implements SummarySetOperations { - - @Override - public ArrayOfStringsSummary union(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) { - return a.copy(); - } - - @Override - public ArrayOfStringsSummary intersection(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) { - return a.copy(); - } - -} diff --git a/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java b/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java deleted file mode 100644 index 25a2be3e6..000000000 --- a/src/main/java/org/apache/datasketches/tuple2/strings/package-info.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/** - * This package is for a generic implementation of the Tuple sketch for single String value. - */ - -package org.apache.datasketches.tuple2.strings; diff --git a/src/test/java/org/apache/datasketches/fdt/FdtSketchTest.java b/src/test/java/org/apache/datasketches/fdt/FdtSketchTest.java index 14945bac0..4c1527b7a 100644 --- a/src/test/java/org/apache/datasketches/fdt/FdtSketchTest.java +++ b/src/test/java/org/apache/datasketches/fdt/FdtSketchTest.java @@ -23,10 +23,13 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; import java.util.List; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.fdt.FdtSketch; +import org.apache.datasketches.fdt.Group; +import org.apache.datasketches.fdt.PostProcessor; import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.strings.ArrayOfStringsSummary; import org.testng.annotations.Test; @@ -59,8 +62,8 @@ public void checkFdtSketch() { //serialize final byte[] byteArr = sketch.toByteArray(); //deserialize - Memory mem = Memory.wrap(byteArr); - FdtSketch sketch2 = new FdtSketch(mem); + MemorySegment seg = MemorySegment.ofArray(byteArr); + FdtSketch sketch2 = new FdtSketch(seg); //check output final TupleSketchIterator it2 = sketch2.iterator(); diff --git a/src/test/java/org/apache/datasketches/fdt/GroupTest.java b/src/test/java/org/apache/datasketches/fdt/GroupTest.java index 21427fb0b..2d84a0d1d 100644 --- a/src/test/java/org/apache/datasketches/fdt/GroupTest.java +++ b/src/test/java/org/apache/datasketches/fdt/GroupTest.java @@ -21,6 +21,7 @@ import static org.testng.Assert.assertEquals; +import org.apache.datasketches.fdt.Group; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java b/src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java deleted file mode 100644 index 59f7e6565..000000000 --- a/src/test/java/org/apache/datasketches/fdt2/FdtSketchTest.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.fdt2; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import java.util.List; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.strings.ArrayOfStringsSummary; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class FdtSketchTest { - private static final String LS = System.getProperty("line.separator"); - private static final char sep = '|'; //string separator - - @SuppressWarnings("deprecation") - @Test - public void checkFdtSketch() { - final int lgK = 14; - final FdtSketch sketch = new FdtSketch(lgK); - - final String[] nodesArr = {"abc", "def" }; - sketch.update(nodesArr); - - final TupleSketchIterator it = sketch.iterator(); - int count = 0; - while (it.next()) { - final String[] nodesArr2 = it.getSummary().getValue(); - assertEquals(nodesArr2, nodesArr); - count++; - } - assertEquals(count, 1); - - //serialize - final byte[] byteArr = sketch.toByteArray(); - //deserialize - MemorySegment seg = MemorySegment.ofArray(byteArr); - FdtSketch sketch2 = new FdtSketch(seg); - - //check output - final TupleSketchIterator it2 = sketch2.iterator(); - int count2 = 0; - while (it2.next()) { - final String[] nodesArr2 = it2.getSummary().getValue(); - assertEquals(nodesArr2, nodesArr); - count2++; - } - assertEquals(count, count2); - assertEquals(sketch2.getEstimate(), sketch.getEstimate()); - assertEquals(sketch2.getLowerBound(2), sketch.getLowerBound(2)); - assertEquals(sketch2.getUpperBound(2), sketch.getUpperBound(2)); - } - - @Test - public void checkAlternateLgK() { - int lgK = FdtSketch.computeLgK(.01, .01); - assertEquals(lgK, 20); - lgK = FdtSketch.computeLgK(.02, .05); - assertEquals(lgK, 15); - try { - lgK = FdtSketch.computeLgK(.01, .001); - fail(); - } catch (SketchesArgumentException e) { - //ok - } - } - - @Test - public void checkFdtSketchWithThreshold() { - FdtSketch sk = new FdtSketch(.02, .05); //thresh, RSE - assertEquals(sk.getLgK(), 15); - println("LgK: " + sk.getLgK()); - } - - @Test - public void simpleCheckPostProcessing() { - FdtSketch sk = new FdtSketch(8); - int[] priKeyIndices = {0,2}; - String[] arr1 = {"a", "1", "c"}; - String[] arr2 = {"a", "2", "c"}; - String[] arr3 = {"a", "3", "c"}; - String[] arr4 = {"a", "4", "c"}; - String[] arr5 = {"a", "1", "d"}; - String[] arr6 = {"a", "2", "d"}; - sk.update(arr1); - sk.update(arr2); - sk.update(arr3); - sk.update(arr4); - sk.update(arr5); - sk.update(arr6); - //get results from PostProcessor directly - Group gp = new Group(); //uninitialized - PostProcessor post = new PostProcessor(sk, gp, sep); - post = sk.getPostProcessor(gp, sep); - post = sk.getPostProcessor(); //equivalent - List list = post.getGroupList(priKeyIndices, 2, 0); - assertEquals(list.size(), 2); - assertEquals(post.getGroupCount(), 2); - println(gp.getHeader()); - for (int i = 0; i < list.size(); i++) { - println(list.get(i).toString()); - } - list = post.getGroupList(priKeyIndices, 2, 1); - assertEquals(list.size(), 1); - - //get results from sketch directly - list = sk.getResult(priKeyIndices, 0, 2, sep); - assertEquals(list.size(), 2); - } - - @Test - public void checkEstimatingPostProcessing() { - FdtSketch sk = new FdtSketch(4); - int[] priKeyIndices = {0}; - for (int i = 0; i < 32; i++) { - String[] arr = {"a", Integer.toHexString(i)}; - sk.update(arr); - } - assertTrue(sk.isEstimationMode()); - List list = sk.getResult(priKeyIndices, 0, 2, sep); - assertEquals(list.size(), 1); - println(new Group().getHeader()); - for (int i = 0; i < list.size(); i++) { - println(list.get(i).toString()); - } - } - - @Test - public void checkCopyCtor() { - final int lgK = 14; - final FdtSketch sk = new FdtSketch(lgK); - - final String[] nodesArr = {"abc", "def" }; - sk.update(nodesArr); - assertEquals(sk.getRetainedEntries(), 1); - final FdtSketch sk2 = sk.copy(); - assertEquals(sk2.getRetainedEntries(), 1); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - print(s + LS); - } - - /** - * @param s value to print - */ - static void print(String s) { - //System.out.print(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/fdt2/GroupTest.java b/src/test/java/org/apache/datasketches/fdt2/GroupTest.java deleted file mode 100644 index 929e6b4c6..000000000 --- a/src/test/java/org/apache/datasketches/fdt2/GroupTest.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.fdt2; - -import static org.testng.Assert.assertEquals; - -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class GroupTest { - private static final String LS = System.getProperty("line.separator"); - - @Test - public void checkToString() { //check visually - Group gp = new Group(); - gp.init("AAAAAAAA,BBBBBBBBBB", 100_000_000, 1E8, 1.2E8, 8E7, 0.1, 0.01); - assertEquals(gp.getPrimaryKey(), "AAAAAAAA,BBBBBBBBBB"); - assertEquals(gp.getCount(), 100_000_000); - assertEquals(gp.getEstimate(), 1E8); - assertEquals(gp.getUpperBound(), 1.2E8); - assertEquals(gp.getLowerBound(), 8E7); - assertEquals(gp.getFraction(), 0.1); - assertEquals(gp.getRse(), 0.01); - - println(gp.getHeader()); - println(gp.toString()); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - print(s + LS); - } - - /** - * @param s value to print - */ - static void print(String s) { - //System.out.print(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java index 48c9fc2fb..e400dd1fa 100644 --- a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java +++ b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java @@ -24,10 +24,17 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.AnotB; +import org.apache.datasketches.theta.AnotBimpl; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.SetOperation; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.UpdateSketch; import org.testng.annotations.Test; /** @@ -70,16 +77,16 @@ public void checkExactAnotB_AvalidNoOverlap() { assertEquals(rsk1.getEstimate(), k/2.0); final int bytes = rsk1.getCurrentBytes(); - final WritableMemory wmem = WritableMemory.allocate(bytes); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); aNb.setA(usk1); aNb.notB(usk2); - rsk1 = aNb.getResult(false, wmem, true); //unordered, reset + rsk1 = aNb.getResult(false, wseg, true); //unordered, reset assertEquals(rsk1.getEstimate(), k/2.0); aNb.setA(usk1); aNb.notB(usk2); - rsk1 = aNb.getResult(true, wmem, true); //ordered, reset + rsk1 = aNb.getResult(true, wseg, true); //ordered, reset assertEquals(rsk1.getEstimate(), k/2.0); } @@ -214,12 +221,12 @@ public void checkAnotBnotC() { final UpdateSketch cU = UpdateSketch.builder().setNominalEntries(k).build(); for (int i=k/2; i<3*k/4; i++) { cU.update(i); } //third 256 - final int memBytes = Sketch.getMaxUpdateSketchBytes(k); + final int segBytes = Sketch.getMaxUpdateSketchBytes(k); CompactSketch result1, result2, result3; - final WritableMemory wmem1 = WritableMemory.allocate(memBytes); - final WritableMemory wmem2 = WritableMemory.allocate(memBytes); - final WritableMemory wmem3 = WritableMemory.allocate(memBytes); + final MemorySegment wseg1 = MemorySegment.ofArray(new byte[segBytes]); + final MemorySegment wseg2 = MemorySegment.ofArray(new byte[segBytes]); + final MemorySegment wseg3 = MemorySegment.ofArray(new byte[segBytes]); final AnotB aNb = SetOperation.builder().buildANotB(); @@ -227,11 +234,11 @@ public void checkAnotBnotC() { aNb.setA(aU); //stateful - result1 = aNb.aNotB(aU, bU, ordered, wmem1); //stateless + result1 = aNb.aNotB(aU, bU, ordered, wseg1); //stateless aNb.notB(bU); //stateful - result2 = aNb.aNotB(result1, cU, ordered, wmem2); //stateless + result2 = aNb.aNotB(result1, cU, ordered, wseg2); //stateless aNb.notB(cU); //stateful @@ -239,13 +246,13 @@ public void checkAnotBnotC() { println("est: "+est2); assertEquals(est2, k/4.0, 0.0); - result3 = aNb.getResult(ordered, wmem3, true); //stateful result, then reset + result3 = aNb.getResult(ordered, wseg3, true); //stateful result, then reset final double est3 = result3.getEstimate(); assertEquals(est3, k/4.0, 0.0); } @Test - public void checkAnotBnotC_sameMemory() { + public void checkAnotBnotC_sameMemorySegment() { final int k = 1024; final boolean ordered = true; @@ -258,8 +265,8 @@ public void checkAnotBnotC_sameMemory() { final UpdateSketch c = UpdateSketch.builder().setNominalEntries(k).build(); for (int i=k/2; i<3*k/4; i++) { c.update(i); } //third 256 - final int memBytes = Sketch.getMaxCompactSketchBytes(a.getRetainedEntries(true)); - final WritableMemory mem = WritableMemory.allocate(memBytes); + final int segBytes = Sketch.getMaxCompactSketchBytes(a.getRetainedEntries(true)); + final MemorySegment seg = MemorySegment.ofArray(new byte[segBytes]); CompactSketch result1, result2; final AnotB aNb = SetOperation.builder().buildANotB(); @@ -268,15 +275,15 @@ public void checkAnotBnotC_sameMemory() { aNb.setA(a); //stateful - result1 = aNb.aNotB(a, b, ordered, mem); //stateless + result1 = aNb.aNotB(a, b, ordered, seg); //stateless aNb.notB(b); //stateful - result1 = aNb.aNotB(result1, c, ordered, mem); //stateless + result1 = aNb.aNotB(result1, c, ordered, seg); //stateless aNb.notB(c); //stateful - result2 = aNb.getResult(ordered, mem, true); //stateful result, then reset + result2 = aNb.getResult(ordered, seg, true); //stateful result, then reset final double est1 = result1.getEstimate(); //check stateless result println("est: "+est1); diff --git a/src/test/java/org/apache/datasketches/theta/BackwardConversions.java b/src/test/java/org/apache/datasketches/theta/BackwardConversions.java index edd327021..a0688cbba 100644 --- a/src/test/java/org/apache/datasketches/theta/BackwardConversions.java +++ b/src/test/java/org/apache/datasketches/theta/BackwardConversions.java @@ -19,10 +19,17 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; + +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.EmptyCompactSketch; +import org.apache.datasketches.theta.SingleItemSketch; /** * This class converts current compact sketches into prior SerVer 1 and SerVer 2 format for testing. @@ -32,7 +39,7 @@ public class BackwardConversions { /** - * Converts a SerVer3 ordered, heap CompactSketch to a SerVer1 ordered, SetSketch in Memory. + * Converts a SerVer3 ordered, heap CompactSketch to a SerVer1 ordered, SetSketch in MemorySegment. * This is exclusively for testing purposes. * *

              V1 dates from roughly Aug 2014 to about May 2015. @@ -75,36 +82,37 @@ public class BackwardConversions { *

            * * @param skV3 a SerVer3, ordered CompactSketch - * @return a SerVer1 SetSketch as Memory object. + * @return a SerVer1 SetSketch as MemorySegment object. */ - public static Memory convertSerVer3toSerVer1(final CompactSketch skV3) { + public static MemorySegment convertSerVer3toSerVer1(final CompactSketch skV3) { //Check input sketch - final boolean validIn = skV3.isCompact() && skV3.isOrdered() && !skV3.hasMemory(); + final boolean validIn = skV3.isCompact() && skV3.isOrdered() && !skV3.hasMemorySegment(); if (!validIn) { throw new SketchesArgumentException("Invalid input sketch."); } - //Build V1 SetSketch in memory + //Build V1 SetSketch in MemorySegment final int curCount = skV3.getRetainedEntries(true); - final WritableMemory wmem = WritableMemory.allocate((3 + curCount) << 3); + final int bytes = (3 + curCount) << 3; + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);//Util.newHeapSegment(bytes); //Pre0 - wmem.putByte(0, (byte) 3); //preLongs - wmem.putByte(1, (byte) 1); //SerVer - wmem.putByte(2, (byte) 3); //Compact (SetSketch) - wmem.putByte(6, (byte) 2); //Flags ReadOnly, LittleEndian + wseg.set(JAVA_BYTE, 0, (byte) 3); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 1); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //Compact (SetSketch) + wseg.set(JAVA_BYTE, 6, (byte) 2); //Flags ReadOnly, LittleEndian //Pre1 - wmem.putInt(8, curCount); + wseg.set(JAVA_INT_UNALIGNED, 8, curCount); //Pre2 - wmem.putLong(16, skV3.getThetaLong()); + wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong()); //Data if (curCount > 0) { - wmem.putLongArray(24, skV3.getCache(), 0, curCount); + MemorySegment.copy(skV3.getCache(), 0, wseg, JAVA_LONG_UNALIGNED, 24, curCount); } - return wmem; + return wseg; } /** - * Converts a SerVer3 ordered, heap CompactSketch to a SerVer2 ordered, SetSketch in Memory. + * Converts a SerVer3 ordered, heap CompactSketch to a SerVer2 ordered, SetSketch in MemorySegment. * This is exclusively for testing purposes. * *

            V2 is short-lived and dates from roughly Mid May 2015 to about June 1st, 2015. @@ -179,54 +187,54 @@ public static Memory convertSerVer3toSerVer1(final CompactSketch skV3) { * * @param skV3 a SerVer3, ordered CompactSketch * @param seed used for checking the seed hash (if one exists). - * @return a SerVer2 SetSketch as Memory object. + * @return a SerVer2 SetSketch as MemorySegment object. */ - public static Memory convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) { + public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) { final short seedHash = Util.computeSeedHash(seed); - WritableMemory wmem = null; + MemorySegment wseg = null; if (skV3 instanceof EmptyCompactSketch) { - wmem = WritableMemory.allocate(8); - wmem.putByte(0, (byte) 1); //preLongs - wmem.putByte(1, (byte) 2); //SerVer - wmem.putByte(2, (byte) 3); //SetSketch + wseg = MemorySegment.ofArray(new long[1]); + wseg.set(JAVA_BYTE, 0, (byte) 1); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch final byte flags = (byte) 0xE; //NoRebuild, Empty, ReadOnly, LE - wmem.putByte(5, flags); - wmem.putShort(6, seedHash); - return wmem; + wseg.set(JAVA_BYTE, 5, flags); + wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); + return wseg; } if (skV3 instanceof SingleItemSketch) { final SingleItemSketch sis = (SingleItemSketch) skV3; - wmem = WritableMemory.allocate(24); - wmem.putByte(0, (byte) 2); //preLongs - wmem.putByte(1, (byte) 2); //SerVer - wmem.putByte(2, (byte) 3); //SetSketch + wseg = MemorySegment.ofArray(new long[3]); + wseg.set(JAVA_BYTE, 0, (byte) 2); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch final byte flags = (byte) 0xA; //NoRebuild, notEmpty, ReadOnly, LE - wmem.putByte(5, flags); - wmem.putShort(6, seedHash); - wmem.putInt(8, 1); + wseg.set(JAVA_BYTE, 5, flags); + wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); + wseg.set(JAVA_INT_UNALIGNED, 8, 1); final long[] arr = sis.getCache(); - wmem.putLong(16, arr[0]); - return wmem; + wseg.set(JAVA_LONG_UNALIGNED, 16, arr[0]); + return wseg; } //General CompactSketch final int preLongs = skV3.getCompactPreambleLongs(); final int entries = skV3.getRetainedEntries(true); final boolean unordered = !(skV3.isOrdered()); final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE - wmem = WritableMemory.allocate((preLongs + entries) << 3); - wmem.putByte(0, (byte) preLongs); //preLongs - wmem.putByte(1, (byte) 2); //SerVer - wmem.putByte(2, (byte) 3); //SetSketch + wseg = MemorySegment.ofArray(new byte[(preLongs + entries) << 3]); + wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs + wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer + wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch - wmem.putByte(5, flags); - wmem.putShort(6, seedHash); - wmem.putInt(8, entries); + wseg.set(JAVA_BYTE, 5, flags); + wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); + wseg.set(JAVA_INT_UNALIGNED, 8, entries); if (preLongs == 3) { - wmem.putLong(16, skV3.getThetaLong()); + wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong()); } final long[] arr = skV3.getCache(); - wmem.putLongArray(preLongs * 8L, arr, 0, entries); - return wmem; + MemorySegment.copy(arr, 0, wseg, JAVA_LONG_UNALIGNED, preLongs << 3, entries); + return wseg; } } diff --git a/src/test/java/org/apache/datasketches/theta/BitPackingTest.java b/src/test/java/org/apache/datasketches/theta/BitPackingTest.java index c155bef77..35de3e342 100644 --- a/src/test/java/org/apache/datasketches/theta/BitPackingTest.java +++ b/src/test/java/org/apache/datasketches/theta/BitPackingTest.java @@ -22,6 +22,7 @@ import static org.testng.Assert.assertEquals; import org.apache.datasketches.common.Util; +import org.apache.datasketches.theta.BitPacking; import org.testng.annotations.Test; public class BitPackingTest { diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java index 906c1914b..8541ed6bc 100644 --- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java @@ -26,10 +26,21 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.DirectCompactSketch; +import org.apache.datasketches.theta.EmptyCompactSketch; +import org.apache.datasketches.theta.HashIterator; +import org.apache.datasketches.theta.HeapCompactSketch; +import org.apache.datasketches.theta.Intersection; +import org.apache.datasketches.theta.SingleItemSketch; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.UpdateSketch; +import org.apache.datasketches.theta.WrappedCompactCompressedSketch; +import org.apache.datasketches.theta.WrappedCompactSketch; import org.testng.annotations.Test; import java.lang.foreign.Arena; @@ -60,36 +71,36 @@ public void checkHeapifyWrap(int k, int u, boolean ordered) { usk.update(i); } - /****ON HEAP MEMORY -- HEAPIFY****/ + /****ON HEAP MemorySegment -- HEAPIFY****/ CompactSketch refSk = usk.compact(ordered, null); byte[] barr = refSk.toByteArray(); - Memory srcMem = Memory.wrap(barr); - CompactSketch testSk = (CompactSketch) Sketch.heapify(srcMem); + MemorySegment srcSeg = MemorySegment.ofArray(barr); + CompactSketch testSk = (CompactSketch) Sketch.heapify(srcSeg); checkByRange(refSk, testSk, u, ordered); /**Via byte[]**/ byte[] byteArray = refSk.toByteArray(); - Memory heapROMem = Memory.wrap(byteArray); - testSk = (CompactSketch)Sketch.heapify(heapROMem); + MemorySegment heapROSeg = MemorySegment.ofArray(byteArray).asReadOnly(); + testSk = (CompactSketch)Sketch.heapify(heapROSeg); checkByRange(refSk, testSk, u, ordered); - /****OFF HEAP MEMORY -- WRAP****/ - //Prepare Memory for direct + /****OFF HEAP MemorySegment -- WRAP****/ + //Prepare MemorySegment for direct int bytes = usk.getCompactBytes(); //for Compact try (Arena arena = Arena.ofConfined()) { - WritableMemory directMem = WritableMemory.allocateDirect(bytes, arena); + MemorySegment directSeg = arena.allocate(bytes); /**Via CompactSketch.compact**/ - refSk = usk.compact(ordered, directMem); - testSk = (CompactSketch)Sketch.wrap(directMem); + refSk = usk.compact(ordered, directSeg); + testSk = (CompactSketch)Sketch.wrap(directSeg); checkByRange(refSk, testSk, u, ordered); /**Via CompactSketch.compact**/ - testSk = (CompactSketch)Sketch.wrap(directMem); + testSk = (CompactSketch)Sketch.wrap(directSeg); checkByRange(refSk, testSk, u, ordered); } catch (final Exception e) { throw new RuntimeException(e); @@ -111,9 +122,9 @@ private static void checkEmptySketch(Sketch testSk) { assertTrue(testSk instanceof EmptyCompactSketch); assertTrue(testSk.isEmpty()); assertTrue(testSk.isOrdered()); - assertNull(testSk.getMemory()); + assertNull(testSk.getMemorySegment()); assertFalse(testSk.isDirect()); - assertFalse(testSk.hasMemory()); + assertFalse(testSk.hasMemorySegment()); assertEquals(testSk.getSeedHash(), 0); assertEquals(testSk.getRetainedEntries(true), 0); assertEquals(testSk.getEstimate(), 0.0, 0.0); @@ -129,9 +140,9 @@ private static void checkSingleItemSketch(Sketch testSk, Sketch refSk) { assertTrue(testSk instanceof SingleItemSketch); assertFalse(testSk.isEmpty()); assertTrue(testSk.isOrdered()); - assertNull(testSk.getMemory()); + assertNull(testSk.getMemorySegment()); assertFalse(testSk.isDirect()); - assertFalse(testSk.hasMemory()); + assertFalse(testSk.hasMemorySegment()); assertEquals(testSk.getSeedHash(), refSk.getSeedHash()); assertEquals(testSk.getRetainedEntries(true), 1); assertEquals(testSk.getEstimate(), 1.0, 0.0); @@ -147,9 +158,9 @@ private static void checkOtherCompactSketch(Sketch testSk, Sketch refSk, boolean assertFalse(testSk.isEmpty()); assertNotNull(testSk.iterator()); assertEquals(testSk.isOrdered(), ordered); - if (refSk.hasMemory()) { - assertTrue(testSk.hasMemory()); - assertNotNull(testSk.getMemory()); + if (refSk.hasMemorySegment()) { + assertTrue(testSk.hasMemorySegment()); + assertNotNull(testSk.getMemorySegment()); if (ordered) { assertTrue(testSk.isOrdered()); } else { @@ -161,7 +172,7 @@ private static void checkOtherCompactSketch(Sketch testSk, Sketch refSk, boolean assertFalse(testSk.isDirect()); } } else { - assertFalse(testSk.hasMemory()); + assertFalse(testSk.hasMemorySegment()); assertTrue(testSk instanceof HeapCompactSketch); } assertEquals(testSk.getSeedHash(), refSk.getSeedHash()); @@ -178,14 +189,14 @@ public void checkDirectSingleItemSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); sk.update(1); int bytes = sk.getCompactBytes(); - WritableMemory wmem = WritableMemory.allocate(bytes); - sk.compact(true, wmem); - Sketch csk2 = Sketch.heapify(wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + sk.compact(true, wseg); + Sketch csk2 = Sketch.heapify(wseg); assertTrue(csk2 instanceof SingleItemSketch); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMemTooSmall() { + public void checkSegTooSmall() { int k = 512; int u = k; boolean ordered = false; @@ -196,12 +207,12 @@ public void checkMemTooSmall() { int bytes = usk.getCompactBytes(); byte[] byteArray = new byte[bytes -8]; //too small - WritableMemory mem = WritableMemory.writableWrap(byteArray); - usk.compact(ordered, mem); + MemorySegment seg = MemorySegment.ofArray(byteArray); + usk.compact(ordered, seg); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMemTooSmallOrdered() { + public void checkSegTooSmallOrdered() { int k = 512; int u = k; boolean ordered = true; @@ -212,8 +223,8 @@ public void checkMemTooSmallOrdered() { int bytes = usk.getCompactBytes(); byte[] byteArray = new byte[bytes -8]; //too small - WritableMemory mem = WritableMemory.writableWrap(byteArray); - usk.compact(ordered, mem); + MemorySegment seg = MemorySegment.ofArray(byteArray); + usk.compact(ordered, seg); } @Test @@ -230,38 +241,38 @@ public void checkCompactCachePart() { private static final boolean COMPACT = true; private static final boolean EMPTY = true; private static final boolean DIRECT = true; - private static final boolean MEMORY = true; + private static final boolean SEGMENT = true; private static final boolean ORDERED = true; private static final boolean ESTIMATION = true; @Test /** - * Empty, memory-based Compact sketches are always ordered + * Empty, segment-based Compact sketches are always ordered */ - public void checkEmptyMemoryCompactSketch() { + public void checkEmptyMemorySegmentCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); - WritableMemory wmem1 = WritableMemory.allocate(16); - CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when empty - State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + MemorySegment wseg1 = MemorySegment.ofArray(new byte[16]); + CompactSketch csk1 = sk.compact(false, wseg1); //the first parameter is ignored when empty + State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); - WritableMemory wmem2 = WritableMemory.allocate(16); - CompactSketch csk2 = sk.compact(false, wmem2); + MemorySegment wseg2 = MemorySegment.ofArray(new byte[16]); + CompactSketch csk2 = sk.compact(false, wseg2); state1.check(csk2); - assertNotEquals(csk1, csk2); //different object because memory is valid + assertNotEquals(csk1, csk2); //different object because MemorySegment is valid assertFalse(csk1 == csk2); - WritableMemory wmem3 = WritableMemory.allocate(16); - CompactSketch csk3 = csk1.compact(false, wmem3); + MemorySegment wseg3 = MemorySegment.ofArray(new byte[16]); + CompactSketch csk3 = csk1.compact(false, wseg3); state1.check(csk3); - assertNotEquals(csk1, csk3); //different object because memory is valid + assertNotEquals(csk1, csk3); //different object because MemorySegment is valid assertFalse(csk1 == csk3); CompactSketch csk4 = csk1.compact(false, null); - State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state4.check(csk4); assertNotEquals(csk1, csk4); //different object because on heap @@ -276,29 +287,29 @@ public void checkEmptyMemoryCompactSketch() { @Test /** - * Single-Item, memory-based Compact sketches are always ordered: + * Single-Item, segment-based Compact sketches are always ordered: */ - public void checkSingleItemMemoryCompactSketch() { + public void checkSingleItemMemorySegmentCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); sk.update(1); - WritableMemory wmem1 = WritableMemory.allocate(16); - CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when single item - State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + MemorySegment wseg1 = MemorySegment.ofArray(new byte[16]); + CompactSketch csk1 = sk.compact(false, wseg1); //the first parameter is ignored when single item + State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); - WritableMemory wmem2 = WritableMemory.allocate(16); - CompactSketch csk2 = sk.compact(false, wmem2); //the first parameter is ignored when single item + MemorySegment wseg2 = MemorySegment.ofArray(new byte[16]); + CompactSketch csk2 = sk.compact(false, wseg2); //the first parameter is ignored when single item state1.check(csk2); - assertNotEquals(csk1, csk2); //different object because memory is valid + assertNotEquals(csk1, csk2); //different object because segment is valid assertFalse(csk1 == csk2); - WritableMemory wmem3 = WritableMemory.allocate(16); - CompactSketch csk3 = csk1.compact(false, wmem3); + MemorySegment wseg3 = MemorySegment.ofArray(new byte[16]); + CompactSketch csk3 = csk1.compact(false, wseg3); state1.check(csk3); - assertNotEquals(csk1, csk3); //different object because memory is valid + assertNotEquals(csk1, csk3); //different object because segment is valid assertFalse(csk1 == csk3); CompactSketch cskc = csk1.compact(); @@ -309,31 +320,31 @@ public void checkSingleItemMemoryCompactSketch() { } @Test - public void checkMultipleItemMemoryCompactSketch() { + public void checkMultipleItemMemorySegmentCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); //This sequence is naturally out-of-order by the hash values. sk.update(1); sk.update(2); sk.update(3); - WritableMemory wmem1 = WritableMemory.allocate(50); - CompactSketch csk1 = sk.compact(true, wmem1); - State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION); + MemorySegment wseg1 = MemorySegment.ofArray(new byte[50]); + CompactSketch csk1 = sk.compact(true, wseg1); + State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); - WritableMemory wmem2 = WritableMemory.allocate(50); - CompactSketch csk2 = sk.compact(false, wmem2); - State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION); + MemorySegment wseg2 = MemorySegment.ofArray(new byte[50]); + CompactSketch csk2 = sk.compact(false, wseg2); + State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, SEGMENT, !ORDERED, !ESTIMATION); state2.check(csk2); - assertNotEquals(csk1, csk2); //different object because memory is valid + assertNotEquals(csk1, csk2); //different object because segment is valid assertFalse(csk1 == csk2); - WritableMemory wmem3 = WritableMemory.allocate(50); - CompactSketch csk3 = csk1.compact(false, wmem3); + MemorySegment wseg3 = MemorySegment.ofArray(new byte[50]); + CompactSketch csk3 = csk1.compact(false, wseg3); state2.check(csk3); - assertNotEquals(csk1, csk3); //different object because memory is valid + assertNotEquals(csk1, csk3); //different object because segment is valid assertFalse(csk1 == csk3); CompactSketch cskc = csk1.compact(); @@ -352,7 +363,7 @@ public void checkEmptyHeapCompactSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty - State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty @@ -383,7 +394,7 @@ public void checkSingleItemHeapCompactSketch() { sk.update(1); CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item - State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item @@ -414,11 +425,11 @@ public void checkMultipleItemHeapCompactSketch() { sk.update(3); CompactSketch csk1 = sk.compact(true, null); //creates a new object - State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION); + State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION); state1.check(csk1); CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered - State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION); + State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !SEGMENT, !ORDERED, !ESTIMATION); state2.check(csk2); assertNotEquals(csk1, csk2); //order is different and different objects @@ -427,10 +438,10 @@ public void checkMultipleItemHeapCompactSketch() { CompactSketch csk3 = csk1.compact(true, null); state1.check(csk3); - assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered + assertEquals(csk1, csk3); //the same object because wseg = null and csk1.ordered = dstOrdered assertTrue(csk1 == csk3); - assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true + assertNotEquals(csk2, csk3); //different object because wseg = null and csk2.ordered = false && dstOrdered = true assertFalse(csk2 == csk3); CompactSketch cskc = csk1.compact(); @@ -445,19 +456,19 @@ public void checkHeapifySingleItemSketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); sk.update(1); int bytes = Sketches.getMaxCompactSketchBytes(2); //1 more than needed - WritableMemory wmem = WritableMemory.allocate(bytes); - sk.compact(false, wmem); - Sketch csk = Sketch.heapify(wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + sk.compact(false, wseg); + Sketch csk = Sketch.heapify(wseg); assertTrue(csk instanceof SingleItemSketch); } @Test public void checkHeapifyEmptySketch() { UpdateSketch sk = Sketches.updateSketchBuilder().build(); - WritableMemory wmem = WritableMemory.allocate(16); //empty, but extra bytes - CompactSketch csk = sk.compact(false, wmem); //ignores order because it is empty + MemorySegment wseg = MemorySegment.ofArray(new byte[16]); //empty, but extra bytes + CompactSketch csk = sk.compact(false, wseg); //ignores order because it is empty assertTrue(csk instanceof DirectCompactSketch); - Sketch csk2 = Sketch.heapify(wmem); + Sketch csk2 = Sketch.heapify(wseg); assertTrue(csk2 instanceof EmptyCompactSketch); } @@ -466,7 +477,7 @@ public void checkGetCache() { UpdateSketch sk = Sketches.updateSketchBuilder().setP((float).5).build(); sk.update(7); int bytes = sk.getCompactBytes(); - CompactSketch csk = sk.compact(true, WritableMemory.allocate(bytes)); + CompactSketch csk = sk.compact(true, MemorySegment.ofArray(new byte[bytes])); long[] cache = csk.getCache(); assertTrue(cache.length == 0); } @@ -484,12 +495,12 @@ public void checkHeapCompactSketchCompact() { /** * This is checking the empty, single, exact and estimating cases of an off-heap * sketch to make sure they are being stored properly and to check the new capability - * of calling compact(boolean, Memory) on an already compact sketch. This allows the + * of calling compact(boolean, MemorySegment) on an already compact sketch. This allows the * user to be able to change the order and heap status of an already compact sketch. */ @Test public void checkDirectCompactSketchCompact() { - WritableMemory wmem1, wmem2; + MemorySegment wseg1, wseg2; CompactSketch csk1, csk2; int bytes; int lgK = 6; @@ -497,12 +508,12 @@ public void checkDirectCompactSketchCompact() { //empty UpdateSketch sk = Sketches.updateSketchBuilder().setLogNominalEntries(lgK).build(); bytes = sk.getCompactBytes(); //empty, 8 bytes - wmem1 = WritableMemory.allocate(bytes); - wmem2 = WritableMemory.allocate(bytes); - csk1 = sk.compact(false, wmem1); //place into memory as unordered + wseg1 = MemorySegment.ofArray(new byte[bytes]); + wseg2 = MemorySegment.ofArray(new byte[bytes]); + csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered assertTrue(csk1 instanceof DirectCompactSketch); assertTrue(csk1.isOrdered()); //empty is always ordered - csk2 = csk1.compact(false, wmem2); //set to unordered again + csk2 = csk1.compact(false, wseg2); //set to unordered again assertTrue(csk2 instanceof DirectCompactSketch); assertTrue(csk2.isOrdered()); //empty is always ordered assertTrue(csk2.getSeedHash() == 0); //empty has no seed hash @@ -511,12 +522,12 @@ public void checkDirectCompactSketchCompact() { //single sk.update(1); bytes = sk.getCompactBytes(); //single, 16 bytes - wmem1 = WritableMemory.allocate(bytes); - wmem2 = WritableMemory.allocate(bytes); - csk1 = sk.compact(false, wmem1); //place into memory as unordered + wseg1 = MemorySegment.ofArray(new byte[bytes]); + wseg2 = MemorySegment.ofArray(new byte[bytes]); + csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered assertTrue(csk1 instanceof DirectCompactSketch); assertTrue(csk1.isOrdered()); //single is always ordered - csk2 = csk1.compact(false, wmem2); //set to unordered again + csk2 = csk1.compact(false, wseg2); //set to unordered again assertTrue(csk2 instanceof DirectCompactSketch); assertTrue(csk2.isOrdered()); //single is always ordered assertTrue(csk2.getSeedHash() != 0); //has a seed hash @@ -525,12 +536,12 @@ public void checkDirectCompactSketchCompact() { //exact sk.update(2); bytes = sk.getCompactBytes(); //exact, 16 bytes preamble, 16 bytes data - wmem1 = WritableMemory.allocate(bytes); - wmem2 = WritableMemory.allocate(bytes); - csk1 = sk.compact(false, wmem1); //place into memory as unordered + wseg1 = MemorySegment.ofArray(new byte[bytes]); + wseg2 = MemorySegment.ofArray(new byte[bytes]); + csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered assertTrue(csk1 instanceof DirectCompactSketch); assertFalse(csk1.isOrdered()); //should be unordered - csk2 = csk1.compact(true, wmem2); //set to ordered + csk2 = csk1.compact(true, wseg2); //set to ordered assertTrue(csk2 instanceof DirectCompactSketch); assertTrue(csk2.isOrdered()); //should be ordered assertTrue(csk2.getSeedHash() != 0); //has a seed hash @@ -540,12 +551,12 @@ public void checkDirectCompactSketchCompact() { int n = 1 << (lgK + 1); for (int i = 2; i < n; i++) { sk.update(i); } bytes = sk.getCompactBytes(); //24 bytes preamble + curCount * 8, - wmem1 = WritableMemory.allocate(bytes); - wmem2 = WritableMemory.allocate(bytes); - csk1 = sk.compact(false, wmem1); //place into memory as unordered + wseg1 = MemorySegment.ofArray(new byte[bytes]); + wseg2 = MemorySegment.ofArray(new byte[bytes]); + csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered assertTrue(csk1 instanceof DirectCompactSketch); assertFalse(csk1.isOrdered()); //should be unordered - csk2 = csk1.compact(true, wmem2); //set to ordered + csk2 = csk1.compact(true, wseg2); //set to ordered assertTrue(csk2 instanceof DirectCompactSketch); assertTrue(csk2.isOrdered()); //should be ordered assertTrue(csk2.getSeedHash() != 0); //has a seed hash @@ -561,7 +572,7 @@ public void serializeDeserializeHeapV4() { } CompactSketch cs1 = sk.compact(); byte[] bytes = cs1.toByteArrayCompressed(); - CompactSketch cs2 = CompactSketch.heapify(Memory.wrap(bytes)); + CompactSketch cs2 = CompactSketch.heapify(MemorySegment.ofArray(bytes)); assertEquals(cs1.getRetainedEntries(), cs2.getRetainedEntries()); HashIterator it1 = cs1.iterator(); HashIterator it2 = cs2.iterator(); @@ -576,9 +587,9 @@ public void serializeDeserializeDirectV4() { for (int i = 0; i < 10000; i++) { sk.update(i); } - CompactSketch cs1 = sk.compact(true, WritableMemory.allocate(sk.getCompactBytes())); + CompactSketch cs1 = sk.compact(true, MemorySegment.ofArray(new byte[sk.getCompactBytes()])); byte[] bytes = cs1.toByteArrayCompressed(); - CompactSketch cs2 = CompactSketch.wrap(Memory.wrap(bytes)); + CompactSketch cs2 = CompactSketch.wrap(MemorySegment.ofArray(bytes)); assertEquals(cs1.getRetainedEntries(), cs2.getRetainedEntries()); HashIterator it1 = cs1.iterator(); HashIterator it2 = cs2.iterator(); @@ -630,19 +641,19 @@ private static class State { boolean compact = false; boolean empty = false; boolean direct = false; - boolean memory = false; + boolean hasSeg = false; boolean ordered = false; boolean estimation = false; State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct, - boolean memory, boolean ordered, boolean estimation) { + boolean hasSeg, boolean ordered, boolean estimation) { this.classType = classType; this.count = count; this.bytes = bytes; this.compact = compact; this.empty = empty; this.direct = direct; - this.memory = memory; + this.hasSeg = hasSeg; this.ordered = ordered; this.estimation = estimation; } @@ -654,7 +665,7 @@ void check(CompactSketch csk) { assertEquals(csk.isCompact(), compact, "Compact"); assertEquals(csk.isEmpty(), empty, "Empty"); assertEquals(csk.isDirect(), direct, "Direct"); - assertEquals(csk.hasMemory(), memory, "Memory"); + assertEquals(csk.hasMemorySegment(), hasSeg, "MemorySegment"); assertEquals(csk.isOrdered(), ordered, "Ordered"); assertEquals(csk.isEstimationMode(), estimation, "Estimation"); } diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java index 5b9e28afe..ac08fd9e6 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.waitForBgPropagationToComplete; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE; @@ -27,11 +28,20 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.ConcurrentDirectQuickSelectSketch; +import org.apache.datasketches.theta.ConcurrentHeapThetaBuffer; +import org.apache.datasketches.theta.ConcurrentSharedThetaSketch; +import org.apache.datasketches.theta.DirectQuickSelectSketch; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.UpdateSketch; +import org.apache.datasketches.theta.UpdateSketchBuilder; import org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.SharedLocal; import org.apache.datasketches.thetacommon.HashOperations; import org.testng.annotations.Test; @@ -45,19 +55,19 @@ public class ConcurrentDirectQuickSelectSketchTest { @Test public void checkDirectCompactConversion() { int lgK = 9; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); assertTrue(sl.shared instanceof ConcurrentDirectQuickSelectSketch); assertTrue(sl.shared.compact().isCompact()); } @Test - public void checkHeapifyMemoryEstimating() { + public void checkHeapifyMemorySegmentEstimating() { int lgK = 9; int k = 1 << lgK; int u = 2*k; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; //off-heap UpdateSketch local = sl.local; @@ -69,10 +79,10 @@ public void checkHeapifyMemoryEstimating() { assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer"); //This sharedHeap is not linked to the concurrent local buffer - UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wmem); + UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wseg); assertEquals(sharedHeap.getClass().getSimpleName(), "HeapQuickSelectSketch"); - checkMemoryDirectProxyMethods(local, shared); + checkMemorySegmentDirectProxyMethods(local, shared); checkOtherProxyMethods(local, shared); checkOtherProxyMethods(local, sharedHeap); @@ -90,8 +100,8 @@ public void checkHeapifyMemoryEstimating() { public void checkHeapifyByteArrayExact() { int lgK = 9; int k = 1 << lgK; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; UpdateSketch local = sl.local; @@ -99,13 +109,13 @@ public void checkHeapifyByteArrayExact() { waitForBgPropagationToComplete(shared); byte[] serArr = shared.toByteArray(); - Memory srcMem = Memory.wrap(serArr); - Sketch recoveredShared = Sketch.heapify(srcMem); + MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly(); + Sketch recoveredShared = Sketch.heapify(srcSeg); //reconstruct to Native/Direct final int bytes = Sketch.getMaxUpdateSketchBytes(k); - final WritableMemory wmem = WritableMemory.allocate(bytes); - shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wmem); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wseg); UpdateSketch local2 = sl.bldr.buildLocal(shared); assertEquals(local2.getEstimate(), k, 0.0); @@ -127,8 +137,8 @@ public void checkHeapifyByteArrayEstimating() { int k = 1 << lgK; int u = 2*k; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; UpdateSketch local = sl.local; @@ -141,13 +151,13 @@ public void checkHeapifyByteArrayEstimating() { assertEquals(local.isEstimationMode(), true); byte[] serArr = shared.toByteArray(); - Memory srcMem = Memory.wrap(serArr); - Sketch recoveredShared = Sketch.heapify(srcMem); + MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly(); + Sketch recoveredShared = Sketch.heapify(srcSeg); //reconstruct to Native/Direct final int bytes = Sketch.getMaxUpdateSketchBytes(k); - final WritableMemory wmem = WritableMemory.allocate(bytes); - shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wmem); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wseg); UpdateSketch local2 = sl.bldr.buildLocal(shared); assertEquals(local2.getEstimate(), uskEst); @@ -159,14 +169,14 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkWrapMemoryEst() { + public void checkWrapMemorySegmentEst() { int lgK = 9; int k = 1 << lgK; int u = 2*k; //boolean estimating = (u > k); - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; UpdateSketch local = sl.local; @@ -178,7 +188,7 @@ public void checkWrapMemoryEst() { double sk1ub = local.getUpperBound(2); assertTrue(local.isEstimationMode()); - Sketch local2 = Sketch.wrap(sl.wmem); + Sketch local2 = Sketch.wrap(sl.wseg); assertEquals(local2.getEstimate(), sk1est); assertEquals(local2.getLowerBound(2), sk1lb); @@ -193,14 +203,14 @@ public void checkDQStoCompactForms() { int k = 1 << lgK; int u = 4*k; //boolean estimating = (u > k); - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; UpdateSketch local = sl.local; assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer"); assertFalse(local.isDirect()); - assertTrue(local.hasMemory()); + assertTrue(local.hasMemorySegment()); for (int i=0; i lgNomLongs +1. @Test - public void checkResizeInBigMem() { + public void checkResizeInBigSeg() { int lgK = 14; int u = 1 << 20; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useMem, true, 8); //mem is 8X larger than needed + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useSeg, true, 8); //seg is 8X larger than needed UpdateSketch local = sl.local; for (int i = 0; i < u; i++) { local.update(i); } @@ -536,37 +546,37 @@ public void checkResizeInBigMem() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkConstructorKtooSmall() { int lgK = 3; - boolean useMem = true; - new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + new SharedLocal(lgK, lgK, useSeg); } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkConstructorMemTooSmall() { + public void checkConstructorSegTooSmall() { int lgK = 4; int k = 1 << lgK; - WritableMemory wmem = WritableMemory.allocate(k/2); + MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]); UpdateSketchBuilder bldr = new UpdateSketchBuilder(); bldr.setLogNominalEntries(lgK); - bldr.buildShared(wmem); + bldr.buildShared(wseg); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifyIllegalFamilyID_heapify() { int lgK = 9; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); - sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte - //try to heapify the corrupted mem - Sketch.heapify(sl.wmem); //catch in Sketch.constructHeapSketch + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte + //try to heapify the corrupted seg + Sketch.heapify(sl.wseg); //catch in Sketch.constructHeapSketch } @Test(expectedExceptions = SketchesArgumentException.class) public void checkBadLgNomLongs() { int lgK = 4; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); - sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte - DirectQuickSelectSketch.writableWrap(sl.wmem, Util.DEFAULT_UPDATE_SEED); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); + sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte + DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED); } @Test @@ -574,8 +584,8 @@ public void checkBackgroundPropagation() { int lgK = 4; int k = 1 << lgK; int u = 10*k; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; UpdateSketch local = sl.local; assertTrue(local.isEmpty()); @@ -612,8 +622,8 @@ public void checkBackgroundPropagation() { public void checkBadSerVer() { int lgK = 9; int k = 1 << lgK; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; UpdateSketch local = sl.local; assertTrue(local.isEmpty()); @@ -625,30 +635,30 @@ public void checkBadSerVer() { assertEquals(local.getEstimate(), k, 0.0); assertEquals(shared.getRetainedEntries(false), k); - sl.wmem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.wrap(sl.wmem); + sl.wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + Sketch.wrap(sl.wseg); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkWrapIllegalFamilyID_wrap() { int lgK = 9; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to wrap the corrupted mem - Sketch.wrap(sl.wmem); //catch in Sketch.constructDirectSketch + sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + //try to wrap the corrupted seg + Sketch.wrap(sl.wseg); //catch in Sketch.constructDirectSketch } @Test(expectedExceptions = SketchesArgumentException.class) public void checkWrapIllegalFamilyID_direct() { int lgK = 9; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to wrap the corrupted mem - DirectQuickSelectSketch.writableWrap(sl.wmem, Util.DEFAULT_UPDATE_SEED); + sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + //try to wrap the corrupted seg + DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -656,29 +666,29 @@ public void checkHeapifySeedConflict() { int lgK = 9; long seed1 = 1021; long seed2 = Util.DEFAULT_UPDATE_SEED; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useMem, true, 1); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1); UpdateSketch shared = sl.shared; - Memory srcMem = Memory.wrap(shared.toByteArray()); - Sketch.heapify(srcMem, seed2); + MemorySegment srcSeg = MemorySegment.ofArray(shared.toByteArray()).asReadOnly(); + Sketch.heapify(srcSeg, seed2); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkCorruptLgNomLongs() { int lgK = 4; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(sl.wmem, Util.DEFAULT_UPDATE_SEED); + sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt + Sketch.heapify(sl.wseg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = UnsupportedOperationException.class) public void checkIllegalHashUpdate() { int lgK = 4; - boolean useMem = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useMem); + boolean useSeg = true; + SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); UpdateSketch shared = sl.shared; shared.hashUpdate(1); } @@ -695,14 +705,14 @@ static void println(String s) { //System.out.println(s); //disable here } - private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) { + private static void checkMemorySegmentDirectProxyMethods(Sketch local, Sketch shared) { assertEquals( - local.hasMemory(), - shared.hasMemory()); + local.hasMemorySegment(), + shared.hasMemorySegment()); assertEquals(local.isDirect(), shared.isDirect()); } - //Does not check hasMemory(), isDirect() + //Does not check hasMemorySegment(), isDirect() private static void checkOtherProxyMethods(Sketch local, Sketch shared) { assertEquals(local.getCompactBytes(), shared.getCompactBytes()); assertEquals(local.getCurrentBytes(), shared.getCurrentBytes()); diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java index f642bcae5..e8c517afd 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE; import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; @@ -27,13 +28,23 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketch; +import org.apache.datasketches.theta.ConcurrentHeapThetaBuffer; +import org.apache.datasketches.theta.ConcurrentPropagationService; +import org.apache.datasketches.theta.ConcurrentSharedThetaSketch; +import org.apache.datasketches.theta.HeapQuickSelectSketch; +import org.apache.datasketches.theta.PreambleUtil; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.UpdateSketch; +import org.apache.datasketches.theta.UpdateSketchBuilder; import org.testng.annotations.Test; /** @@ -63,12 +74,12 @@ public void checkBadSerVer() { assertEquals(shared.getRetainedEntries(false), u); byte[] serArr = shared.toByteArray(); - WritableMemory mem = WritableMemory.writableWrap(serArr); - Sketch sk = Sketch.heapify(mem, sl.seed); + MemorySegment seg = MemorySegment.ofArray(serArr); + Sketch sk = Sketch.heapify(seg, sl.seed); assertTrue(sk instanceof HeapQuickSelectSketch); //Intentional promotion to Parent - mem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.heapify(mem, sl.seed); + seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + Sketch.heapify(seg, sl.seed); } @Test @@ -111,11 +122,11 @@ public void checkIllegalSketchID_UpdateSketch() { assertEquals(local.getEstimate(), u, 0.0); assertEquals(shared.getRetainedEntries(false), u); byte[] byteArray = shared.toByteArray(); - WritableMemory mem = WritableMemory.writableWrap(byteArray); - mem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte + MemorySegment seg = MemorySegment.ofArray(byteArray); + seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to heapify the corrupted mem - Sketch.heapify(mem, sl.seed); + //try to heapify the corrupted seg + Sketch.heapify(seg, sl.seed); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -125,8 +136,8 @@ public void checkHeapifySeedConflict() { long seed2 = Util.DEFAULT_UPDATE_SEED; SharedLocal sl = new SharedLocal(lgK, lgK, seed); byte[] byteArray = sl.shared.toByteArray(); - Memory srcMem = Memory.wrap(byteArray); - Sketch.heapify(srcMem, seed2); + MemorySegment srcSeg = MemorySegment.ofArray(byteArray); + Sketch.heapify(srcSeg, seed2); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -134,9 +145,9 @@ public void checkHeapifyCorruptLgNomLongs() { int lgK = 4; SharedLocal sl = new SharedLocal(lgK); byte[] serArr = sl.shared.toByteArray(); - WritableMemory srcMem = WritableMemory.writableWrap(serArr); - srcMem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcMem, Util.DEFAULT_UPDATE_SEED); + MemorySegment srcSeg = MemorySegment.ofArray(serArr); + srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt + Sketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } @Test(expectedExceptions = UnsupportedOperationException.class) @@ -161,13 +172,13 @@ public void checkHeapifyByteArrayExact() { waitForBgPropagationToComplete(shared); byte[] serArr = shared.toByteArray(); - Memory srcMem = Memory.wrap(serArr); - Sketch recoveredShared = Sketches.heapifyUpdateSketch(srcMem); + MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly(); + Sketch recoveredShared = Sketches.heapifyUpdateSketch(srcSeg); //reconstruct to Native/Direct final int bytes = Sketch.getMaxUpdateSketchBytes(k); - final WritableMemory wmem = WritableMemory.allocate(bytes); - shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wmem); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wseg); UpdateSketch local2 = sl.bldr.buildLocal(shared); assertEquals(local2.getEstimate(), u, 0.0); @@ -200,12 +211,12 @@ public void checkHeapifyByteArrayEstimating() { assertTrue(local.isEstimationMode()); byte[] serArr = shared.toByteArray(); - Memory srcMem = Memory.wrap(serArr); - UpdateSketch recoveredShared = UpdateSketch.heapify(srcMem, sl.seed); + MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly(); + UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, sl.seed); final int bytes = Sketch.getMaxUpdateSketchBytes(k); - final WritableMemory wmem = WritableMemory.allocate(bytes); - shared = sl.bldr.buildSharedFromSketch(recoveredShared, wmem); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg); UpdateSketch local2 = sl.bldr.buildLocal(shared); assertEquals(local2.getEstimate(), localEst); assertEquals(local2.getLowerBound(2), localLB); @@ -216,7 +227,7 @@ public void checkHeapifyByteArrayEstimating() { } @Test - public void checkHeapifyMemoryEstimating() { + public void checkHeapifyMemorySegmentEstimating() { int lgK = 9; int k = 1 << lgK; int u = 2*k; //thus estimating @@ -235,16 +246,16 @@ public void checkHeapifyMemoryEstimating() { double localUB = local.getUpperBound(2); assertTrue(local.isEstimationMode()); assertFalse(local.isDirect()); - assertFalse(local.hasMemory()); + assertFalse(local.hasMemorySegment()); byte[] serArr = shared.toByteArray(); - Memory srcMem = Memory.wrap(serArr); - UpdateSketch recoveredShared = UpdateSketch.heapify(srcMem, Util.DEFAULT_UPDATE_SEED); + MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly(); + UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); final int bytes = Sketch.getMaxUpdateSketchBytes(k); - final WritableMemory wmem = WritableMemory.allocate(bytes); - shared = sl.bldr.buildSharedFromSketch(recoveredShared, wmem); + final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg); UpdateSketch local2 = sl.bldr.buildLocal(shared); assertEquals(local2.getEstimate(), localEst); @@ -268,7 +279,7 @@ public void checkHQStoCompactForms() { assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer"); assertFalse(local.isDirect()); - assertFalse(local.hasMemory()); + assertFalse(local.hasMemorySegment()); for (int i=0; i k); println("Est: "+est); - final byte[] memArr3 = inter2.toByteArray(); - final WritableMemory srcMem2 = WritableMemory.writableWrap(memArr3); - inter3 = Sketches.wrapIntersection(srcMem2); + final byte[] segArr3 = inter2.toByteArray(); + final MemorySegment srcSeg2 = MemorySegment.ofArray(segArr3); + inter3 = Sketches.wrapIntersection(srcSeg2); resultComp2 = inter3.getResult(false, null); est2 = resultComp2.getEstimate(); println("Est2: "+est2); @@ -660,15 +669,15 @@ public void checkWrap() { @Test public void checkDefaultMinSize() { final int k = 32; - final WritableMemory mem = WritableMemory.writableWrap(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptionMinSize() { final int k = 16; - final WritableMemory mem = WritableMemory.writableWrap(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); } @Test @@ -676,11 +685,11 @@ public void checkGetResult() { final int k = 1024; final UpdateSketch sk = Sketches.updateSketchBuilder().build(); - final int memBytes = getMaxIntersectionBytes(k); - final byte[] memArr = new byte[memBytes]; - final WritableMemory iMem = WritableMemory.writableWrap(memArr); + final int segBytes = getMaxIntersectionBytes(k); + final byte[] segArr = new byte[segBytes]; + final MemorySegment iSeg = MemorySegment.ofArray(segArr); - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iMem); + final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iSeg); inter.intersect(sk); final CompactSketch csk = inter.getResult(); assertEquals(csk.getCompactBytes(), 8); @@ -690,31 +699,31 @@ public void checkGetResult() { public void checkFamily() { //cheap trick final int k = 16; - final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]); - final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); + final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); assertEquals(impl.getFamily(), Family.INTERSECTION); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptions1() { final int k = 16; - final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); //corrupt SerVer - mem.putByte(PreambleUtil.SER_VER_BYTE, (byte) 2); - IntersectionImpl.wrapInstance(mem, Util.DEFAULT_UPDATE_SEED, false); + seg.set(JAVA_BYTE, PreambleUtil.SER_VER_BYTE, (byte) 2); + IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false); } @Test(expectedExceptions = SketchesArgumentException.class) public void checkExceptions2() { final int k = 16; - final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, mem); - //mem now has non-empty intersection + final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); + IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); + //seg now has non-empty intersection //corrupt empty and CurCount - mem.setBits(PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK); - mem.putInt(PreambleUtil.RETAINED_ENTRIES_INT, 2); - IntersectionImpl.wrapInstance(mem, Util.DEFAULT_UPDATE_SEED, false); + Util.setBits(seg, PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK); + seg.set(JAVA_INT_UNALIGNED, PreambleUtil.RETAINED_ENTRIES_INT, 2); + IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false); } //Check Alex's bug intersecting 2 direct full sketches with only overlap of 2 @@ -722,26 +731,26 @@ public void checkExceptions2() { @Test public void checkOverlappedDirect() { final int k = 1 << 4; - final int memBytes = 2*k*16 +PREBYTES; //plenty of room + final int segBytes = 2*k*16 +PREBYTES; //plenty of room final UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build(); final UpdateSketch sk2 = Sketches.updateSketchBuilder().setNominalEntries(k).build(); for (int i=0; i>> 1); //corrupt theta and - mem1.putByte(LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs + seg1.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE >>> 1); //corrupt theta and + seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass } - mem1.putLong(THETA_LONG, Long.MAX_VALUE); //fix theta and - mem1.putByte(LG_ARR_LONGS_BYTE, (byte) 11); //fix lgArrLongs + seg1.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE); //fix theta and + seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 11); //fix lgArrLongs byte badFlags = (byte) (BIG_ENDIAN_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK); - mem1.putByte(FLAGS_BYTE, badFlags); + seg1.set(JAVA_BYTE, FLAGS_BYTE, badFlags); try { - usk2 = DirectQuickSelectSketch.writableWrap(mem1, Util.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass } byte[] arr2 = Arrays.copyOfRange(arr1, 0, bytes-1); //corrupt length - WritableMemory mem2 = WritableMemory.writableWrap(arr2); + MemorySegment seg2 = MemorySegment.ofArray(arr2); try { - usk2 = DirectQuickSelectSketch.writableWrap(mem2, Util.DEFAULT_UPDATE_SEED); + usk2 = DirectQuickSelectSketch.writableWrap(seg2, Util.DEFAULT_UPDATE_SEED); fail("Expected SketchesArgumentException"); } catch (SketchesArgumentException e) { //pass @@ -826,42 +832,42 @@ public void checkCorruptRFWithInsufficientArray() { int bytes = Sketches.getMaxUpdateSketchBytes(k); byte[] arr = new byte[bytes]; - WritableMemory mem = WritableMemory.writableWrap(arr); + MemorySegment seg = MemorySegment.ofArray(arr); ResizeFactor rf = ResizeFactor.X8; // 3 - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(mem); + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg); usk.update(0); - insertLgResizeFactor(mem, 0); // corrupt RF: X1 - UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(mem, Util.DEFAULT_UPDATE_SEED); + insertLgResizeFactor(seg, 0); // corrupt RF: X1 + UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(seg, Util.DEFAULT_UPDATE_SEED); assertEquals(dqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2 } @Test public void checkFamilyAndRF() { int k = 16; - WritableMemory mem = WritableMemory.writableWrap(new byte[(k*16) +24]); - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(mem); + MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]); + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); assertEquals(sketch.getFamily(), Family.QUICKSELECT); assertEquals(sketch.getResizeFactor(), ResizeFactor.X8); } //checks Alex's bug where lgArrLongs > lgNomLongs +1. @Test - public void checkResizeInBigMem() { + public void checkResizeInBigSeg() { int k = 1 << 14; int u = 1 << 20; - WritableMemory mem = WritableMemory.writableWrap(new byte[(8*k*16) +24]); - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(mem); + MemorySegment seg = MemorySegment.ofArray(new byte[(8*k*16) +24]); + UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); for (int i=0; i k); println(quick1.toString()); - println(PreambleUtil.preambleToString(mem)); + println(PreambleUtil.preambleToString(seg)); - final WritableMemory uMem = WritableMemory.writableWrap(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uMem); + final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); + final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); union.union(quick1); - println(PreambleUtil.preambleToString(uMem)); + println(PreambleUtil.preambleToString(uSeg)); } @Test @@ -120,7 +126,7 @@ public void checkPreambleToStringExceptions() { byteArr = new byte[8]; byteArr[0] = (byte) 2; //needs min capacity of 16 try { //check preLongs == 2 fails - Sketch.toString(Memory.wrap(byteArr)); + Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly()); fail("Did not throw SketchesArgumentException."); } catch (final SketchesArgumentException e) { //expected @@ -150,70 +156,70 @@ public void checkPreLongs() { } comp = sketch.compact(false, null); byteArr = comp.toByteArray(); - println(Sketch.toString(Memory.wrap(byteArr))); //PreLongs = 3 + println(Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly())); //PreLongs = 3 } @Test public void checkInsertsAndExtracts() { final byte[] arr = new byte[32]; - final WritableMemory wmem = WritableMemory.writableWrap(arr); + final MemorySegment wseg = MemorySegment.ofArray(arr); int v = 0; - insertPreLongs(wmem, ++v); - assertEquals(extractPreLongs(wmem), v); - insertPreLongs(wmem, 0); + insertPreLongs(wseg, ++v); + assertEquals(extractPreLongs(wseg), v); + insertPreLongs(wseg, 0); - insertLgResizeFactor(wmem, 3); //limited to 2 bits - assertEquals(extractLgResizeFactor(wmem), 3); - insertLgResizeFactor(wmem, 0); + insertLgResizeFactor(wseg, 3); //limited to 2 bits + assertEquals(extractLgResizeFactor(wseg), 3); + insertLgResizeFactor(wseg, 0); - insertSerVer(wmem, ++v); - assertEquals(extractSerVer(wmem), v); - insertSerVer(wmem, 0); + insertSerVer(wseg, ++v); + assertEquals(extractSerVer(wseg), v); + insertSerVer(wseg, 0); - insertFamilyID(wmem, ++v); - assertEquals(extractFamilyID(wmem), v); - insertFamilyID(wmem, 0); + insertFamilyID(wseg, ++v); + assertEquals(extractFamilyID(wseg), v); + insertFamilyID(wseg, 0); - insertLgNomLongs(wmem, ++v); - assertEquals(extractLgNomLongs(wmem), v); - insertLgNomLongs(wmem, 0); + insertLgNomLongs(wseg, ++v); + assertEquals(extractLgNomLongs(wseg), v); + insertLgNomLongs(wseg, 0); - insertLgArrLongs(wmem, ++v); - assertEquals(extractLgArrLongs(wmem), v); - insertLgArrLongs(wmem, 0); + insertLgArrLongs(wseg, ++v); + assertEquals(extractLgArrLongs(wseg), v); + insertLgArrLongs(wseg, 0); - insertFlags(wmem, 3); - assertEquals(extractFlags(wmem), 3); - assertEquals(extractLgResizeRatioV1(wmem), 3); //also at byte 5, limited to 2 bits - insertFlags(wmem, 0); + insertFlags(wseg, 3); + assertEquals(extractFlags(wseg), 3); + assertEquals(extractLgResizeRatioV1(wseg), 3); //also at byte 5, limited to 2 bits + insertFlags(wseg, 0); - insertSeedHash(wmem, ++v); - assertEquals(extractSeedHash(wmem), v); - assertEquals(extractFlagsV1(wmem), v); //also at byte 6 - insertSeedHash(wmem, 0); + insertSeedHash(wseg, ++v); + assertEquals(extractSeedHash(wseg), v); + assertEquals(extractFlagsV1(wseg), v); //also at byte 6 + insertSeedHash(wseg, 0); - insertCurCount(wmem, ++v); - assertEquals(extractCurCount(wmem), v); - insertCurCount(wmem, 0); + insertCurCount(wseg, ++v); + assertEquals(extractCurCount(wseg), v); + insertCurCount(wseg, 0); - insertP(wmem, (float) 1.0); - assertEquals(extractP(wmem), (float) 1.0); - insertP(wmem, (float) 0.0); + insertP(wseg, (float) 1.0); + assertEquals(extractP(wseg), (float) 1.0); + insertP(wseg, (float) 0.0); - insertThetaLong(wmem, ++v); - assertEquals(extractThetaLong(wmem), v); - insertThetaLong(wmem, 0L); + insertThetaLong(wseg, ++v); + assertEquals(extractThetaLong(wseg), v); + insertThetaLong(wseg, 0L); - insertUnionThetaLong(wmem, ++v); - assertEquals(extractUnionThetaLong(wmem), v); - insertUnionThetaLong(wmem, 0L); + insertUnionThetaLong(wseg, ++v); + assertEquals(extractUnionThetaLong(wseg), v); + insertUnionThetaLong(wseg, 0L); - setEmpty(wmem); - assertTrue(isEmptyFlag(wmem)); + setEmpty(wseg); + assertTrue(isEmptyFlag(wseg)); - clearEmpty(wmem); - assertFalse(isEmptyFlag(wmem)); + clearEmpty(wseg); + assertFalse(isEmptyFlag(wseg)); } @Test diff --git a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemorySegmentTest.java b/src/test/java/org/apache/datasketches/theta/ReadOnlyMemorySegmentTest.java similarity index 95% rename from src/test/java/org/apache/datasketches/theta2/ReadOnlyMemorySegmentTest.java rename to src/test/java/org/apache/datasketches/theta/ReadOnlyMemorySegmentTest.java index 969643618..cc9672a4e 100644 --- a/src/test/java/org/apache/datasketches/theta2/ReadOnlyMemorySegmentTest.java +++ b/src/test/java/org/apache/datasketches/theta/ReadOnlyMemorySegmentTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.theta2; +package org.apache.datasketches.theta; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -28,6 +28,12 @@ import java.nio.ByteOrder; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.theta.Intersection; +import org.apache.datasketches.theta.SetOperation; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.Union; +import org.apache.datasketches.theta.UpdateSketch; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/theta/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/theta/ReadOnlyMemoryTest.java deleted file mode 100644 index b77f3d660..000000000 --- a/src/test/java/org/apache/datasketches/theta/ReadOnlyMemoryTest.java +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.fail; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.memory.Memory; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ReadOnlyMemoryTest { - - @Test - public void wrapAndTryUpdatingUpdateSketch() { - UpdateSketch updateSketch = UpdateSketch.builder().build(); - updateSketch.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(updateSketch.toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - UpdateSketch sketch = (UpdateSketch) Sketch.wrap(mem); - assertEquals(sketch.getEstimate(), 1.0); - - boolean thrown = false; - try { - sketch.update(2); - } catch (SketchesReadOnlyException e) { - thrown = true; - } - Assert.assertTrue(thrown); - } - - @Test - public void wrapCompactUnorderedSketch() { - UpdateSketch updateSketch = UpdateSketch.builder().build(); - updateSketch.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(updateSketch.compact(false, null).toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Sketch sketch = Sketch.wrap(mem); - assertEquals(sketch.getEstimate(), 1.0); - } - - @Test - public void wrapCompactOrderedSketch() { - UpdateSketch updateSketch = UpdateSketch.builder().build(); - updateSketch.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(updateSketch.compact().toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Sketch sketch = Sketch.wrap(mem); - assertEquals(sketch.getEstimate(), 1.0); - } - - @Test - public void heapifyUpdateSketch() { - UpdateSketch us1 = UpdateSketch.builder().build(); - us1.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(us1.toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - // downcasting is not recommended, for testing only - UpdateSketch us2 = (UpdateSketch) Sketch.heapify(mem); - us2.update(2); - assertEquals(us2.getEstimate(), 2.0); - } - - @Test - public void heapifyCompactUnorderedSketch() { - UpdateSketch updateSketch = UpdateSketch.builder().build(); - updateSketch.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(updateSketch.compact(false, null).toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Sketch sketch = Sketch.heapify(mem); - assertEquals(sketch.getEstimate(), 1.0); - } - - @Test - public void heapifyCompactOrderedSketch() { - UpdateSketch updateSketch = UpdateSketch.builder().build(); - updateSketch.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(updateSketch.compact().toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Sketch sketch = Sketch.heapify(mem); - assertEquals(sketch.getEstimate(), 1.0); - } - - @Test - public void heapifyUnion() { - Union u1 = SetOperation.builder().buildUnion(); - u1.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(u1.toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Union u2 = (Union) SetOperation.heapify(mem); - u2.update(2); - Assert.assertEquals(u2.getResult().getEstimate(), 2.0); - } - - @Test - public void wrapAndTryUpdatingUnion() { - Union u1 = SetOperation.builder().buildUnion(); - u1.update(1); - Memory mem = Memory.wrap(ByteBuffer.wrap(u1.toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - - Union u2 = (Union) Sketches.wrapSetOperation(mem); - Union u3 = Sketches.wrapUnion(mem); - Assert.assertEquals(u2.getResult().getEstimate(), 1.0); - Assert.assertEquals(u3.getResult().getEstimate(), 1.0); - - try { - u2.update(2); - fail(); - } catch (SketchesReadOnlyException e) { - //expected - } - - try { - u3.update(2); - fail(); - } catch (SketchesReadOnlyException e) { - //expected - } - } - - @Test - public void heapifyIntersection() { - UpdateSketch us1 = UpdateSketch.builder().build(); - us1.update(1); - us1.update(2); - UpdateSketch us2 = UpdateSketch.builder().build(); - us2.update(2); - us2.update(3); - - Intersection i1 = SetOperation.builder().buildIntersection(); - i1.intersect(us1); - i1.intersect(us2); - Memory mem = Memory.wrap(ByteBuffer.wrap(i1.toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Intersection i2 = (Intersection) SetOperation.heapify(mem); - i2.intersect(us1); - Assert.assertEquals(i2.getResult().getEstimate(), 1.0); - } - - @Test - public void wrapIntersection() { - UpdateSketch us1 = UpdateSketch.builder().build(); - us1.update(1); - us1.update(2); - UpdateSketch us2 = UpdateSketch.builder().build(); - us2.update(2); - us2.update(3); - - Intersection i1 = SetOperation.builder().buildIntersection(); - i1.intersect(us1); - i1.intersect(us2); - Memory mem = Memory.wrap(ByteBuffer.wrap(i1.toByteArray()) - .asReadOnlyBuffer().order(ByteOrder.nativeOrder())); - Intersection i2 = (Intersection) SetOperation.wrap(mem); - Assert.assertEquals(i2.getResult().getEstimate(), 1.0); - - boolean thrown = false; - try { - i2.intersect(us1); - } catch (SketchesReadOnlyException e) { - thrown = true; - } - Assert.assertTrue(thrown); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta/SetOperationTest.java index ee4350744..7ba06bb73 100644 --- a/src/test/java/org/apache/datasketches/theta/SetOperationTest.java +++ b/src/test/java/org/apache/datasketches/theta/SetOperationTest.java @@ -23,20 +23,27 @@ import static org.apache.datasketches.theta.Sketch.getMaxUpdateSketchBytes; import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; import static org.testng.Assert.assertEquals; -//import static org.testng.Assert.assertTrue; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.theta.AnotB; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.Intersection; +import org.apache.datasketches.theta.PreambleUtil; +import org.apache.datasketches.theta.SetOperation; +import org.apache.datasketches.theta.SetOperationBuilder; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.Union; +import org.apache.datasketches.theta.UpdateSketch; import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; @@ -95,10 +102,6 @@ public void checkBuilder2() { bldr.setNominalEntries(k); assertEquals(bldr.getLgNominalEntries(), lgK); - final MemoryRequestServer mrs = new DefaultMemoryRequestServer(); - bldr.setMemoryRequestServer(mrs); - assertEquals(bldr.getMemoryRequestServer(), mrs); - println(bldr.toString()); } @@ -131,9 +134,9 @@ public void checkBuilderValidP() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBuilderAnotB_noMem() { - final WritableMemory mem = WritableMemory.writableWrap(new byte[64]); - SetOperation.builder().build(Family.A_NOT_B, mem); + public void checkBuilderAnotB_noSeg() { + final MemorySegment seg = MemorySegment.ofArray(new byte[64]); + SetOperation.builder().build(Family.A_NOT_B, seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -174,8 +177,8 @@ public void checkIllegalSetOpHeapify() { usk1.update(i); //64 } final byte[] byteArray = usk1.toByteArray(); - final Memory mem = Memory.wrap(byteArray); - SetOperation.heapify(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArray).asReadOnly(); + SetOperation.heapify(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -186,8 +189,8 @@ public void checkIllegalSetOpWrap() { usk1.update(i); //64 } final byte[] byteArray = usk1.toByteArray(); - final Memory mem = Memory.wrap(byteArray); - Sketches.wrapIntersection(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArray).asReadOnly(); + Sketches.wrapIntersection(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -197,10 +200,10 @@ public void checkIllegalSetOpWrap2() { for (int i=0; i k, true); assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemory(), false); + assertEquals(csk.hasMemorySegment(), false); assertEquals(csk.isOrdered(), true); csk = generate(State.THLT1_CNT0_FALSE, k); @@ -411,7 +419,7 @@ public void checkGenerator() { assertEquals(csk.getRetainedEntries(true), 0); assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemory(), false); + assertEquals(csk.hasMemorySegment(), false); assertEquals(csk.isOrdered(), true); csk = generate(State.THEQ1_CNT0_TRUE, k); @@ -420,20 +428,20 @@ public void checkGenerator() { assertEquals(csk.getRetainedEntries(true), 0); assertEquals(csk.getThetaLong() < Long.MAX_VALUE, false); assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemory(), false); + assertEquals(csk.hasMemorySegment(), false); assertEquals(csk.isOrdered(), true); - csk = generate(State.EST_MEMORY_UNORDERED, k); + csk = generate(State.EST_SEGMENT_UNORDERED, k); assertEquals(csk.isEmpty(), false); assertEquals(csk.isEstimationMode(), true); assertEquals(csk.getRetainedEntries(true) > k, true); assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemory(), true); + assertEquals(csk.hasMemorySegment(), true); assertEquals(csk.isOrdered(), false); } - enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_MEMORY_UNORDERED} + enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_SEGMENT_UNORDERED} private static CompactSketch generate(State state, int k) { UpdateSketch sk = null; @@ -483,15 +491,15 @@ private static CompactSketch generate(State state, int k) { csk = sk.compact(true, null); //compact as {Th < 1.0, 0, T} break; } - case EST_MEMORY_UNORDERED : { + case EST_SEGMENT_UNORDERED : { sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); for (int i = 0; i < (4 * k); i++) { sk.update(i); } int bytes = Sketch.getMaxCompactSketchBytes(sk.getRetainedEntries(true)); byte[] byteArr = new byte[bytes]; - WritableMemory mem = WritableMemory.writableWrap(byteArr); - csk = sk.compact(false, mem); + MemorySegment wseg = MemorySegment.ofArray(byteArr); + csk = sk.compact(false, wseg); break; } } diff --git a/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java b/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java index 45aaf6952..2dd4df315 100644 --- a/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java @@ -19,6 +19,8 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.hash.MurmurHash3.hash; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -26,10 +28,19 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.AnotB; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.DirectCompactSketch; +import org.apache.datasketches.theta.Intersection; +import org.apache.datasketches.theta.SingleItemSketch; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.Union; +import org.apache.datasketches.theta.UpdateSketch; +import org.apache.datasketches.theta.UpdateSketchBuilder; import org.testng.annotations.Test; /** @@ -126,7 +137,7 @@ public void checkSketchInterface() { assertEquals(sis.getRetainedEntries(true), 1); assertEquals(sis.getUpperBound(1), 1.0); assertFalse(sis.isDirect()); - assertFalse(sis.hasMemory()); + assertFalse(sis.hasMemorySegment()); assertFalse(sis.isEmpty()); assertTrue(sis.isOrdered()); } @@ -147,12 +158,12 @@ public void checkLessThanThetaLong() { public void checkSerDe() { SingleItemSketch sis = SingleItemSketch.create(1); byte[] byteArr = sis.toByteArray(); - Memory mem = Memory.wrap(byteArr); + MemorySegment seg = MemorySegment.ofArray(byteArr); final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); - SingleItemSketch sis2 = SingleItemSketch.heapify(mem, defaultSeedHash); + SingleItemSketch sis2 = SingleItemSketch.heapify(seg, defaultSeedHash); assertEquals(sis2.getEstimate(), 1.0); - SingleItemSketch sis3 = SingleItemSketch.heapify(mem, defaultSeedHash); + SingleItemSketch sis3 = SingleItemSketch.heapify(seg , defaultSeedHash); assertEquals(sis3.getEstimate(), 1.0); Union union = Sketches.setOperationBuilder().buildUnion(); @@ -167,7 +178,7 @@ public void checkSerDe() { @Test public void checkRestricted() { SingleItemSketch sis = SingleItemSketch.create(1); - assertNull(sis.getMemory()); + assertNull(sis.getMemorySegment()); assertEquals(sis.getCompactPreambleLongs(), 1); } @@ -175,8 +186,8 @@ public void checkRestricted() { public void unionWrapped() { Sketch sketch = SingleItemSketch.create(1); Union union = Sketches.setOperationBuilder().buildUnion(); - Memory mem = Memory.wrap(sketch.toByteArray()); - union.union(mem); + MemorySegment seg = MemorySegment.ofArray(sketch.toByteArray()); + union.union(seg ); assertEquals(union.getResult().getEstimate(), 1, 0); } @@ -195,8 +206,8 @@ public void buildAndCompact() { //Off-heap bytes = Sketches.getMaxUpdateSketchBytes(32); - WritableMemory wmem = WritableMemory.writableWrap(new byte[bytes]); - sk1= Sketches.updateSketchBuilder().setNominalEntries(32).build(wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + sk1= Sketches.updateSketchBuilder().setNominalEntries(32).build(wseg ); sk1.update(1); csk = sk1.compact(true, null); assertTrue(csk instanceof SingleItemSketch); @@ -204,10 +215,10 @@ public void buildAndCompact() { assertTrue(csk instanceof SingleItemSketch); bytes = Sketches.getMaxCompactSketchBytes(1); - wmem = WritableMemory.writableWrap(new byte[bytes]); - csk = sk1.compact(true, wmem); + wseg = MemorySegment.ofArray(new byte[bytes]); + csk = sk1.compact(true, wseg ); assertTrue(csk.isOrdered()); - csk = sk1.compact(false, wmem); + csk = sk1.compact(false, wseg ); assertTrue(csk.isOrdered()); } @@ -230,8 +241,8 @@ public void intersection() { //Intersection off-heap bytes = Sketches.getMaxIntersectionBytes(32); - WritableMemory wmem = WritableMemory.writableWrap(new byte[bytes]); - inter = Sketches.setOperationBuilder().buildIntersection(wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + inter = Sketches.setOperationBuilder().buildIntersection(wseg ); inter.intersect(sk1); inter.intersect(sk2); csk = inter.getResult(true, null); @@ -258,8 +269,8 @@ public void union() { //Union off-heap bytes = Sketches.getMaxUnionBytes(32); - WritableMemory wmem = WritableMemory.writableWrap(new byte[bytes]); - union = Sketches.setOperationBuilder().buildUnion(wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); + union = Sketches.setOperationBuilder().buildUnion(wseg ); union.union(sk1); union.union(sk2); csk = union.getResult(true, null); @@ -294,10 +305,10 @@ public void checkHeapifyInstance() { Intersection inter = Sketches.setOperationBuilder().buildIntersection(); inter.intersect(sk1); inter.intersect(sk2); - WritableMemory wmem = WritableMemory.writableWrap(new byte[16]); - CompactSketch csk = inter.getResult(false, wmem); + MemorySegment wseg = MemorySegment.ofArray(new byte[16]); + CompactSketch csk = inter.getResult(false, wseg ); assertTrue(csk.isOrdered()); - Sketch csk2 = Sketches.heapifySketch(wmem); + Sketch csk2 = Sketches.heapifySketch(wseg ); assertTrue(csk2 instanceof SingleItemSketch); println(csk2.toString(true, true, 1, true)); } @@ -307,11 +318,11 @@ public void checkSingleItemBadFlags() { final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); UpdateSketch sk1 = new UpdateSketchBuilder().build(); sk1.update(1); - WritableMemory wmem = WritableMemory.allocate(16); - sk1.compact(true, wmem); - wmem.putByte(5, (byte) 0); //corrupt flags to zero + MemorySegment wseg = MemorySegment.ofArray(new byte[16]); + sk1.compact(true, wseg ); + wseg .set(JAVA_BYTE, 5, (byte) 0); //corrupt flags to zero try { - SingleItemSketch.heapify(wmem, defaultSeedHash); //fails due to corrupted flags bytes + SingleItemSketch.heapify(wseg , defaultSeedHash); //fails due to corrupted flags bytes fail(); } catch (SketchesArgumentException e) { } } @@ -334,7 +345,7 @@ public void checkSingleItemCompact() { assertTrue(csk instanceof SingleItemSketch); CompactSketch csk2 = csk.compact(); assertEquals(csk, csk2); - CompactSketch csk3 = csk.compact(true, WritableMemory.allocate(16)); + CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[16])); assertTrue(csk3 instanceof DirectCompactSketch); assertEquals(csk2.getCurrentPreambleLongs(), 1); assertEquals(csk3.getCurrentPreambleLongs(), 1); @@ -345,20 +356,20 @@ public void checkSingleItemCompact() { static final long SiSkPre0WoutSiFlag = 0x93cc1a0000030301L; static final long Hash = 0x05a186bdcb7df915L; - static Memory siSkWithSiFlag24Bytes() { + static MemorySegment siSkWithSiFlag24Bytes() { int cap = 24; //8 extra bytes - WritableMemory wmem = WritableMemory.allocate(cap); - wmem.putLong(0, SiSkPre0WithSiFlag); - wmem.putLong(8, Hash); - return wmem; + MemorySegment wseg = MemorySegment.ofArray(new byte[cap]); + wseg .set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WithSiFlag); + wseg .set(JAVA_LONG_UNALIGNED, 8, Hash); + return wseg ; } - static Memory siSkWoutSiFlag24Bytes() { + static MemorySegment siSkWoutSiFlag24Bytes() { int cap = 24; //8 extra bytes - WritableMemory wmem = WritableMemory.allocate(cap); - wmem.putLong(0, SiSkPre0WoutSiFlag); - wmem.putLong(8, Hash); - return wmem; + MemorySegment wseg = MemorySegment.ofArray(new byte[cap]); + wseg .set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WoutSiFlag); + wseg .set(JAVA_LONG_UNALIGNED, 8, Hash); + return wseg; } @Test diff --git a/src/test/java/org/apache/datasketches/theta/SketchTest.java b/src/test/java/org/apache/datasketches/theta/SketchTest.java index 8d3d72c75..592453749 100644 --- a/src/test/java/org/apache/datasketches/theta/SketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/SketchTest.java @@ -26,7 +26,7 @@ import static org.apache.datasketches.common.ResizeFactor.X2; import static org.apache.datasketches.common.ResizeFactor.X4; import static org.apache.datasketches.common.ResizeFactor.X8; -import static org.apache.datasketches.common.Util.*; +import static org.apache.datasketches.common.Util.isSameResource; import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer1; import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3toSerVer2; import static org.apache.datasketches.theta.CompactOperations.computeCompactPreLongs; @@ -34,17 +34,25 @@ import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta.Sketch.getMaxCompactSketchBytes; +import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.theta.CompactSketch; +import org.apache.datasketches.theta.DirectCompactSketch; +import org.apache.datasketches.theta.PreambleUtil; +import org.apache.datasketches.theta.SetOperation; +import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Sketches; +import org.apache.datasketches.theta.Union; +import org.apache.datasketches.theta.UpdateSketch; import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; @@ -182,8 +190,8 @@ public void checkBuilderResizeFactor() { public void checkWrapBadFamily() { UpdateSketch sketch = UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(1024).build(); byte[] byteArr = sketch.toByteArray(); - Memory srcMem = Memory.wrap(byteArr); - Sketch.wrap(srcMem); + MemorySegment srcSeg = MemorySegment.ofArray(byteArr); + Sketch.wrap(srcSeg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -196,12 +204,12 @@ public void checkBadFamily() { public void checkSerVer() { UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(1024).build(); byte[] sketchArray = sketch.toByteArray(); - Memory mem = Memory.wrap(sketchArray); - int serVer = Sketch.getSerializationVersion(mem); + MemorySegment seg = MemorySegment.ofArray(sketchArray); + int serVer = Sketch.getSerializationVersion(seg); assertEquals(serVer, 3); - WritableMemory wmem = WritableMemory.writableWrap(sketchArray); - UpdateSketch sk2 = UpdateSketch.wrap(wmem); - serVer = sk2.getSerializationVersion(wmem); + MemorySegment wseg = MemorySegment.ofArray(sketchArray); + UpdateSketch sk2 = UpdateSketch.wrap(wseg); + serVer = sk2.getSerializationVersion(wseg); assertEquals(serVer, 3); } @@ -210,10 +218,10 @@ public void checkHeapifyAlphaCompactExcep() { int k = 512; Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); byte[] byteArray = sketch1.toByteArray(); - WritableMemory mem = WritableMemory.writableWrap(byteArray); + MemorySegment seg = MemorySegment.ofArray(byteArray); //corrupt: - mem.setBits(FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.heapify(mem); + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.heapify(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -221,10 +229,10 @@ public void checkHeapifyQSCompactExcep() { int k = 512; Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); byte[] byteArray = sketch1.toByteArray(); - WritableMemory mem = WritableMemory.writableWrap(byteArray); + MemorySegment seg = MemorySegment.ofArray(byteArray); //corrupt: - mem.setBits(FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.heapify(mem); + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.heapify(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -233,11 +241,11 @@ public void checkHeapifyNotCompactExcep() { UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); int bytes = Sketch.getMaxCompactSketchBytes(0); byte[] byteArray = new byte[bytes]; - WritableMemory mem = WritableMemory.writableWrap(byteArray); - sketch1.compact(false, mem); + MemorySegment seg = MemorySegment.ofArray(byteArray); + sketch1.compact(false, seg); //corrupt: - mem.clearBits(FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.heapify(mem); + Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.heapify(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -245,9 +253,9 @@ public void checkHeapifyFamilyExcep() { int k = 512; Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); byte[] byteArray = union.toByteArray(); - Memory mem = Memory.wrap(byteArray); + MemorySegment seg = MemorySegment.ofArray(byteArray); //Improper use - Sketch.heapify(mem); + Sketch.heapify(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -255,10 +263,10 @@ public void checkWrapAlphaCompactExcep() { int k = 512; Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); byte[] byteArray = sketch1.toByteArray(); - WritableMemory mem = WritableMemory.writableWrap(byteArray); + MemorySegment seg = MemorySegment.ofArray(byteArray); //corrupt: - mem.setBits(FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.wrap(mem); + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.wrap(seg); } @@ -267,10 +275,10 @@ public void checkWrapQSCompactExcep() { int k = 512; Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); byte[] byteArray = sketch1.toByteArray(); - WritableMemory mem = WritableMemory.writableWrap(byteArray); + MemorySegment seg = MemorySegment.ofArray(byteArray); //corrupt: - mem.setBits(FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.wrap(mem); + Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.wrap(seg); } @Test(expectedExceptions = SketchesArgumentException.class) @@ -279,11 +287,11 @@ public void checkWrapNotCompactExcep() { UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); int bytes = Sketch.getMaxCompactSketchBytes(0); byte[] byteArray = new byte[bytes]; - WritableMemory mem = WritableMemory.writableWrap(byteArray); - sketch1.compact(false, mem); + MemorySegment seg = MemorySegment.ofArray(byteArray); + sketch1.compact(false, seg); //corrupt: - mem.clearBits(FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.wrap(mem); + Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); + Sketch.wrap(seg); } @Test @@ -298,45 +306,46 @@ public void checkValidSketchID() { public void checkWrapToHeapifyConversion1() { int k = 512; UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build(); - for (int i=0; i 0) && !(((lgT - lgA) % lgR) == 0); boolean rf0 = (lgR == 0) && (lgA != lgT); assertTrue((lgRbad == rf0) || (lgRbad == rf123)); @@ -207,8 +208,8 @@ public void checkIsResizeFactorIncorrect() { @SuppressWarnings("unused") @Test - public void checkCompactOpsMemoryToCompact() { - WritableMemory skwmem, cskwmem1, cskwmem2, cskwmem3; + public void checkCompactOpsMemorySegmentToCompact() { + MemorySegment skwseg, cskwseg1, cskwseg2, cskwseg3; CompactSketch csk1, csk2, csk3; int lgK = 6; UpdateSketch sk = Sketches.updateSketchBuilder().setLogNominalEntries(lgK).build(); @@ -216,15 +217,15 @@ public void checkCompactOpsMemoryToCompact() { for (int i = 2; i < n; i++) { sk.update(i); } int cbytes = sk.getCompactBytes(); byte[] byteArr = sk.toByteArray(); - skwmem = WritableMemory.writableWrap(byteArr); - cskwmem1 = WritableMemory.allocate(cbytes); - cskwmem2 = WritableMemory.allocate(cbytes); - cskwmem3 = WritableMemory.allocate(cbytes); - csk1 = sk.compact(true, cskwmem1); - csk2 = CompactOperations.memoryToCompact(skwmem, true, cskwmem2); - csk3 = CompactOperations.memoryToCompact(cskwmem1, true, cskwmem3); - assertTrue(cskwmem1.equalTo(cskwmem2)); - assertTrue(cskwmem1.equalTo(cskwmem3)); + skwseg = MemorySegment.ofArray(byteArr); + cskwseg1 = MemorySegment.ofArray(new byte[cbytes]); + cskwseg2 = MemorySegment.ofArray(new byte[cbytes]); + cskwseg3 = MemorySegment.ofArray(new byte[cbytes]); + csk1 = sk.compact(true, cskwseg1); + csk2 = CompactOperations.segmentToCompact(skwseg, true, cskwseg2); + csk3 = CompactOperations.segmentToCompact(cskwseg1, true, cskwseg3); + assertTrue(equalContents(cskwseg1,cskwseg2)); + assertTrue(equalContents(cskwseg1, cskwseg3)); } @Test diff --git a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java deleted file mode 100644 index b78acc4ca..000000000 --- a/src/test/java/org/apache/datasketches/theta2/AnotBimplTest.java +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class AnotBimplTest { - - @Test - public void checkExactAnotB_AvalidNoOverlap() { - final int k = 512; - - final UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).build(); - final UpdateSketch usk2 = UpdateSketch.builder().setNominalEntries(k).build(); - - for (int i=0; iV1 dates from roughly Aug 2014 to about May 2015. - * The library at that time had an early Theta sketch with set operations based on ByteBuffer, - * the Alpha sketch, and an early HLL sketch. It also had an early adaptor for Pig. - * It also had code for the even earlier CountUniqueSketch (for backward compatibility), - * which was the bucket sketch based on Giroire. - * - *

            Serialization Version 1:

            - *
            -   * Long || Start Byte Adr:
            -   * Adr:
            -   *      ||  7 |   6   |     5    |   4   |   3   |    2   |    1   |     0    |
            -   *  0   ||    | Flags | LgResize | LgArr | lgNom | SkType | SerVer | MD_LONGS |
            -   *
            -   *      || 15 |  14   |    13    |  12   |  11   |   10   |    9   |     8    |
            -   *  1   ||                               | ------------CurCount-------------- |
            -   *
            -   *      || 23 |  22   |    21    |  20   |  19   |   18   |   17   |    16    |
            -   *  2   || --------------------------THETA_LONG------------------------------ |
            -   *
            -   *      ||                                                         |    24    |
            -   *  3   || ----------------------Start of Long Array------------------------  |
            -   * 
            - * - *
              - *
            • The serialization for V1 was always to a compact form (no hash table spaces).
            • - *
            • MD_LONGS (Metadata Longs, now Preamble Longs) was always 3.
            • - *
            • SerVer is always 1.
            • - *
            • The SkType had three values: 1,2,3 for Alpha, QuickSelect, and SetSketch, - * respectively.
            • - *
            • Bytes lgNom and lgArr were only used by the QS and Alpha sketches.
            • - *
            • V1 LgResize (2 bits) was only relevant to the Alpha and QS sketches.
            • - *
            • The flags byte is in byte 6 (moved to 5 in V2).
            • - *
            • The only flag bits are BE(bit0)=0, and Read-Only(bit1)=1. Read-only was only set for the - * SetSketch.
            • - *
            • There is no seedHash.
            • - *
            • There is no concept of p-sampling so bytes 12-15 of Pre1 are empty.
            • - *
            • The determination of empty is when both curCount=0 and thetaLong = Long.MAX_VALUE.
            • - *
            - * - * @param skV3 a SerVer3, ordered CompactSketch - * @return a SerVer1 SetSketch as MemorySegment object. - */ - public static MemorySegment convertSerVer3toSerVer1(final CompactSketch skV3) { - //Check input sketch - final boolean validIn = skV3.isCompact() && skV3.isOrdered() && !skV3.hasMemorySegment(); - if (!validIn) { - throw new SketchesArgumentException("Invalid input sketch."); - } - - //Build V1 SetSketch in MemorySegment - final int curCount = skV3.getRetainedEntries(true); - final int bytes = (3 + curCount) << 3; - final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);//Util.newHeapSegment(bytes); - //Pre0 - wseg.set(JAVA_BYTE, 0, (byte) 3); //preLongs - wseg.set(JAVA_BYTE, 1, (byte) 1); //SerVer - wseg.set(JAVA_BYTE, 2, (byte) 3); //Compact (SetSketch) - wseg.set(JAVA_BYTE, 6, (byte) 2); //Flags ReadOnly, LittleEndian - //Pre1 - wseg.set(JAVA_INT_UNALIGNED, 8, curCount); - //Pre2 - wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong()); - //Data - if (curCount > 0) { - MemorySegment.copy(skV3.getCache(), 0, wseg, JAVA_LONG_UNALIGNED, 24, curCount); - } - return wseg; - } - - /** - * Converts a SerVer3 ordered, heap CompactSketch to a SerVer2 ordered, SetSketch in MemorySegment. - * This is exclusively for testing purposes. - * - *

            V2 is short-lived and dates from roughly Mid May 2015 to about June 1st, 2015. - * (V3 was created about June 15th in preparation for OpenSource in July.) - * The Theta sketch had evolved but still based on ByteBuffer. There was an UpdateSketch, - * the Alpha sketch, and the early HLL sketch. It also had an early adaptor for Pig. - * - * - *

            Serialization Version 2:

            - *
            -   * Long || Start Byte Adr:
            -   * Adr:
            -   *      ||  7 |   6   |     5    |   4   |   3   |    2   |    1   |     0         |
            -   *  0   || Seed Hash  |  Flags   | lgArr | lgNom | SkType | SerVer | MD_LONGS + RR |
            -   *
            -   *      || 15 |  14   |    13    |  12   |  11   |   10   |    9   |     8         |
            -   *  1   || --------------p-------------- | ---------Retained Entries Count-------- |
            -   *
            -   *      || 23 |  22   |    21    |  20   |  19   |   18   |   17   |    16         |
            -   *  2   || --------------------------THETA_LONG----------------------------------- |
            -   *
            -   *      ||                                                         |    24         |
            -   *  3   || ----------Start of Long Array, could be at 2 or 3 --------------------  |
            -   *  
            - * - *
              - *
            • The serialization for V2 was always to a compact form (no hash table spaces).
            • - *
            • MD_LONGS low 6 bits: 1 (Empty), 2 (Exact), 3 (Estimating).
            • - *
            • SerVer is always 2.
            • - *
            • The SkType had 4 values: 1,2,3,4; see below.
            • - *
            • Bytes lgNom and lgArr were only used by the QS and Alpha sketches.
            • - *
            • V2 LgResize top 2 bits if byte 0. Only relevant to the Alpha and QS sketches.
            • - *
            • The flags byte is in byte 5.
            • - *
            • The flag bits are specified below.
            • - *
            • There is a seedHash in bytes 6-7.
            • - *
            • p-sampling is bytes 12-15 of Pre1.
            • - *
            • The determination of empty based on the sketch field empty_.
            • - *
            - *
            -   *   // Metadata byte Addresses
            -   *   private static final int METADATA_LONGS_BYTE        = 0; //low 6 bits
            -   *   private static final int LG_RESIZE_RATIO_BYTE       = 0; //upper 2 bits
            -   *   private static final int SER_VER_BYTE               = 1;
            -   *   private static final int SKETCH_TYPE_BYTE           = 2;
            -   *   private static final int LG_NOM_LONGS_BYTE          = 3;
            -   *   private static final int LG_ARR_LONGS_BYTE          = 4;
            -   *   private static final int FLAGS_BYTE                 = 5;
            -   *   private static final int SEED_HASH_SHORT            = 6;  //byte 6,7
            -   *   private static final int RETAINED_ENTRIES_COUNT_INT = 8;  //4 byte aligned
            -   *   private static final int P_FLOAT                    = 12; //4 byte aligned
            -   *   private static final int THETA_LONG                 = 16; //8-byte aligned
            -   *   //Backward compatibility
            -   *   private static final int FLAGS_BYTE_V1              = 6;
            -   *   private static final int LG_RESIZE_RATIO_BYTE_V1    = 5;
            -   *
            -   *   // Constant Values
            -   *   static final int SER_VER                        = 2;
            -   *   static final int ALPHA_SKETCH                   = 1; //SKETCH_TYPE_BYTE
            -   *   static final int QUICK_SELECT_SKETCH            = 2;
            -   *   static final int SET_SKETCH                     = 3;
            -   *   static final int BUFFERED_QUICK_SELECT_SKETCH   = 4;
            -   *   static final String[] SKETCH_TYPE_STR     =
            -   *       { "None", "AlphaSketch", "QuickSelectSketch", "SetSketch", "BufferedQuickSelectSketch" };
            -   *
            -   *   // flag bit masks
            -   *   static final int BIG_ENDIAN_FLAG_MASK     = 1;
            -   *   static final int READ_ONLY_FLAG_MASK      = 2;
            -   *   static final int EMPTY_FLAG_MASK          = 4;
            -   *   static final int NO_REBUILD_FLAG_MASK     = 8;
            -   *   static final int UNORDERED_FLAG_MASK     = 16;
            -   * 
            - * - * @param skV3 a SerVer3, ordered CompactSketch - * @param seed used for checking the seed hash (if one exists). - * @return a SerVer2 SetSketch as MemorySegment object. - */ - public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) { - final short seedHash = Util.computeSeedHash(seed); - MemorySegment wseg = null; - - if (skV3 instanceof EmptyCompactSketch) { - wseg = MemorySegment.ofArray(new long[1]); - wseg.set(JAVA_BYTE, 0, (byte) 1); //preLongs - wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer - wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch - final byte flags = (byte) 0xE; //NoRebuild, Empty, ReadOnly, LE - wseg.set(JAVA_BYTE, 5, flags); - wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); - return wseg; - } - if (skV3 instanceof SingleItemSketch) { - final SingleItemSketch sis = (SingleItemSketch) skV3; - wseg = MemorySegment.ofArray(new long[3]); - wseg.set(JAVA_BYTE, 0, (byte) 2); //preLongs - wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer - wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch - final byte flags = (byte) 0xA; //NoRebuild, notEmpty, ReadOnly, LE - wseg.set(JAVA_BYTE, 5, flags); - wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); - wseg.set(JAVA_INT_UNALIGNED, 8, 1); - final long[] arr = sis.getCache(); - wseg.set(JAVA_LONG_UNALIGNED, 16, arr[0]); - return wseg; - } - //General CompactSketch - final int preLongs = skV3.getCompactPreambleLongs(); - final int entries = skV3.getRetainedEntries(true); - final boolean unordered = !(skV3.isOrdered()); - final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE - wseg = MemorySegment.ofArray(new byte[(preLongs + entries) << 3]); - wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs - wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer - wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch - - wseg.set(JAVA_BYTE, 5, flags); - wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash); - wseg.set(JAVA_INT_UNALIGNED, 8, entries); - if (preLongs == 3) { - wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong()); - } - final long[] arr = skV3.getCache(); - MemorySegment.copy(arr, 0, wseg, JAVA_LONG_UNALIGNED, preLongs << 3, entries); - return wseg; - } -} diff --git a/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java b/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java deleted file mode 100644 index d6a68bbd5..000000000 --- a/src/test/java/org/apache/datasketches/theta2/BitPackingTest.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.testng.Assert.assertEquals; - -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -public class BitPackingTest { - private final static boolean enablePrinting = false; -//for every number of bits from 1 to 63 -//generate pseudo-random data, pack, unpack and compare - - @Test - public void packUnpackBits() { - long value = 0xaa55aa55aa55aa55L; // arbitrary starting value - for (int n = 0; n < 10000; n++) { - for (int bits = 1; bits <= 63; bits++) { - final long mask = (1 << bits) - 1; - long[] input = new long[8]; - for (int i = 0; i < 8; ++i) { - input[i] = value & mask; - value += Util.INVERSE_GOLDEN_U64; - } - - byte[] bytes = new byte[8 * Long.BYTES]; - int bitOffset = 0; - int bufOffset = 0; - for (int i = 0; i < 8; ++i) { - BitPacking.packBits(input[i], bits, bytes, bufOffset, bitOffset); - bufOffset += (bitOffset + bits) >>> 3; - bitOffset = (bitOffset + bits) & 7; - } - - long[] output = new long[8]; - bitOffset = 0; - bufOffset = 0; - for (int i = 0; i < 8; ++i) { - BitPacking.unpackBits(output, i, bits, bytes, bufOffset, bitOffset); - bufOffset += (bitOffset + bits) >>> 3; - bitOffset = (bitOffset + bits) & 7; - } - - for (int i = 0; i < 8; ++i) { - assertEquals(output[i], input[i]); - } - } - } - } - - @Test - public void packUnpackBlocks() { - long value = 0xaa55aa55aa55aa55L; // arbitrary starting value - for (int n = 0; n < 10000; n++) { - for (int bits = 1; bits <= 63; bits++) { - if (enablePrinting) { System.out.println("bits " + bits); } - final long mask = (1L << bits) - 1; - long[] input = new long[8]; - for (int i = 0; i < 8; ++i) { - input[i] = value & mask; - value += Util.INVERSE_GOLDEN_U64; - } - - byte[] bytes = new byte[8 * Long.BYTES]; - BitPacking.packBitsBlock8(input, 0, bytes, 0, bits); - if (enablePrinting) { hexDump(bytes); } - - long[] output = new long[8]; - BitPacking.unpackBitsBlock8(output, 0, bytes, 0, bits); - - for (int i = 0; i < 8; ++i) { - if (enablePrinting) { System.out.println("checking value " + i); } - assertEquals(output[i], input[i]); - } - } - } - } - - @Test - public void packBitsUnpackBlocks() { - long value = 0; // arbitrary starting value - for (int n = 0; n < 10000; n++) { - for (int bits = 1; bits <= 63; bits++) { - final long mask = (1 << bits) - 1; - long[] input = new long[8]; - for (int i = 0; i < 8; ++i) { - input[i] = value & mask; - value += Util.INVERSE_GOLDEN_U64; - } - - byte[] bytes = new byte[8 * Long.BYTES]; - int bitOffset = 0; - int bufOffset = 0; - for (int i = 0; i < 8; ++i) { - BitPacking.packBits(input[i], bits, bytes, bufOffset, bitOffset); - bufOffset += (bitOffset + bits) >>> 3; - bitOffset = (bitOffset + bits) & 7; - } - - long[] output = new long[8]; - BitPacking.unpackBitsBlock8(output, 0, bytes, 0, bits); - - for (int i = 0; i < 8; ++i) { - assertEquals(output[i], input[i]); - } - } - } - } - - @Test - public void packBlocksUnpackBits() { - long value = 123L; // arbitrary starting value - for (int n = 0; n < 10000; n++) { - for (int bits = 1; bits <= 63; bits++) { - final long mask = (1 << bits) - 1; - long[] input = new long[8]; - for (int i = 0; i < 8; ++i) { - input[i] = value & mask; - value += Util.INVERSE_GOLDEN_U64; - } - - byte[] bytes = new byte[8 * Long.BYTES]; - BitPacking.packBitsBlock8(input, 0, bytes, 0, bits); - - long[] output = new long[8]; - int bitOffset = 0; - int bufOffset = 0; - for (int i = 0; i < 8; ++i) { - BitPacking.unpackBits(output, i, bits, bytes, bufOffset, bitOffset); - bufOffset += (bitOffset + bits) >>> 3; - bitOffset = (bitOffset + bits) & 7; - } - - for (int i = 0; i < 8; ++i) { - assertEquals(output[i], input[i]); - } - } - } - } - - void hexDump(byte[] bytes) { - for (int i = 0; i < bytes.length; i++) { - System.out.print(String.format("%02x ", bytes[i])); - } - System.out.println(); - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java deleted file mode 100644 index d528d6f6e..000000000 --- a/src/test/java/org/apache/datasketches/theta2/CompactSketchTest.java +++ /dev/null @@ -1,674 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertNotEquals; -import static org.testng.Assert.assertNotNull; -import static org.testng.Assert.assertNull; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.annotations.Test; - -import java.lang.foreign.Arena; - -/** - * @author Lee Rhodes - */ -public class CompactSketchTest { - - @Test - public void checkHeapifyWrap() { - int k = 4096; - final boolean ordered = true; - checkHeapifyWrap(k, 0, ordered); - checkHeapifyWrap(k, 1, ordered); - checkHeapifyWrap(k, 1, !ordered); - checkHeapifyWrap(k, k, ordered); //exact - checkHeapifyWrap(k, k, !ordered); //exact - checkHeapifyWrap(k, 4 * k, ordered); //estimating - checkHeapifyWrap(k, 4 * k, !ordered); //estimating - } - - //test combinations of compact ordered/not ordered and heap/direct - public void checkHeapifyWrap(int k, int u, boolean ordered) { - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(); - - for (int i=0; i k); - - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - for (int i=0; i k); - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer"); - assertFalse(local.isDirect()); - assertTrue(local.hasMemorySegment()); - - for (int i=0; i k); - } - - @Test - public void checkErrorBounds() { - int lgK = 9; - int k = 1 << lgK; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - //Exact mode - for (int i = 0; i < k; i++ ) { local.update(i); } - waitForBgPropagationToComplete(shared); - - double est = local.getEstimate(); - double lb = local.getLowerBound(2); - double ub = local.getUpperBound(2); - assertEquals(est, ub, 0.0); - assertEquals(est, lb, 0.0); - - //Est mode - int u = 100*k; - for (int i = k; i < u; i++ ) { - local.update(i); - local.update(i); //test duplicate rejection - } - waitForBgPropagationToComplete(shared); - est = local.getEstimate(); - lb = local.getLowerBound(2); - ub = local.getUpperBound(2); - assertTrue(est <= ub); - assertTrue(est >= lb); - } - - - @Test - public void checkUpperAndLowerBounds() { - int lgK = 9; - int k = 1 << lgK; - int u = 2*k; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - for (int i = 0; i < u; i++ ) { local.update(i); } - waitForBgPropagationToComplete(shared); - - double est = local.getEstimate(); - double ub = local.getUpperBound(1); - double lb = local.getLowerBound(1); - assertTrue(ub > est); - assertTrue(lb < est); - } - - @Test - public void checkRebuild() { - int lgK = 9; - int k = 1 << lgK; - int u = 4*k; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertTrue(local.isEmpty()); - - for (int i = 0; i< u; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertTrue(local.getEstimate() > 0.0); - assertTrue(shared.getRetainedEntries(false) >= k); - - shared.rebuild(); - assertEquals(shared.getRetainedEntries(false), k); - assertEquals(shared.getRetainedEntries(true), k); - local.rebuild(); - assertEquals(shared.getRetainedEntries(false), k); - assertEquals(shared.getRetainedEntries(true), k); - } - - @Test - public void checkResetAndStartingSubMultiple() { - int lgK = 9; - int k = 1 << lgK; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertTrue(local.isEmpty()); - - int u = 4*k; - for (int i = 0; i< u; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertTrue(shared.getRetainedEntries(false) >= k); - assertTrue(local.getThetaLong() < Long.MAX_VALUE); - - shared.reset(); - local.reset(); - assertTrue(local.isEmpty()); - assertEquals(shared.getRetainedEntries(false), 0); - assertEquals(local.getEstimate(), 0.0, 0.0); - assertEquals(local.getThetaLong(), Long.MAX_VALUE); - } - - @Test - public void checkExactModeMemorySegmentArr() { - int lgK = 12; - int k = 1 << lgK; - int u = k; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertTrue(local.isEmpty()); - - for (int i = 0; i< u; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - - assertEquals(local.getEstimate(), u, 0.0); - assertEquals(shared.getRetainedEntries(false), u); - } - - @Test - public void checkEstModeMemorySegmentArr() { - int lgK = 12; - int k = 1 << lgK; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertTrue(local.isEmpty()); - - int u = 3*k; - for (int i = 0; i< u; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - - double est = local.getEstimate(); - assertTrue((est < (u * 1.05)) && (est > (u * 0.95))); - assertTrue(shared.getRetainedEntries(false) >= k); - } - - @Test - public void checkEstModeNativeMemorySegment() { - int lgK = 12; - int k = 1 << lgK; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertTrue(local.isEmpty()); - - int u = 3*k; - for (int i = 0; i< u; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - double est = local.getEstimate(); - assertTrue((est < (u * 1.05)) && (est > (u * 0.95))); - assertTrue(shared.getRetainedEntries(false) >= k); - } - - @Test - public void checkConstructReconstructFromMemorySegment() { - int lgK = 12; - int k = 1 << lgK; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertTrue(local.isEmpty()); - int u = 3*k; - - for (int i = 0; i< u; i++) { local.update(i); } //force estimation - waitForBgPropagationToComplete(shared); - - double est1 = local.getEstimate(); - int count1 = shared.getRetainedEntries(false); - assertTrue((est1 < (u * 1.05)) && (est1 > (u * 0.95))); - assertTrue(count1 >= k); - - byte[] serArr; - double est2; - - serArr = shared.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(serArr); - UpdateSketch recoveredShared = Sketches.wrapUpdateSketch(seg); - - //reconstruct to Native/Direct - final int bytes = Sketch.getMaxUpdateSketchBytes(k); - final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg); - UpdateSketch local2 = sl.bldr.buildLocal(shared); - est2 = local2.getEstimate(); - - assertEquals(est2, est1, 0.0); - } - - @Test - public void checkNullMemorySegment() { - UpdateSketchBuilder bldr = new UpdateSketchBuilder(); - final UpdateSketch sk = bldr.build(); - for (int i = 0; i < 1000; i++) { sk.update(i); } - final UpdateSketch shared = bldr.buildSharedFromSketch(sk, null); - assertEquals(shared.getRetainedEntries(true), 1000); - assertFalse(shared.hasMemorySegment()); - } - - //checks Alex's bug where lgArrLongs > lgNomLongs +1. - @Test - public void checkResizeInBigSeg() { - int lgK = 14; - int u = 1 << 20; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useSeg, true, 8); //seg is 8X larger than needed - UpdateSketch local = sl.local; - - for (int i = 0; i < u; i++) { local.update(i); } - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkConstructorKtooSmall() { - int lgK = 3; - boolean useSeg = true; - new SharedLocal(lgK, lgK, useSeg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkConstructorSegTooSmall() { - int lgK = 4; - int k = 1 << lgK; - MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]); - UpdateSketchBuilder bldr = new UpdateSketchBuilder(); - bldr.setLogNominalEntries(lgK); - bldr.buildShared(wseg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifyIllegalFamilyID_heapify() { - int lgK = 9; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte - //try to heapify the corrupted seg - Sketch.heapify(sl.wseg); //catch in Sketch.constructHeapSketch - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadLgNomLongs() { - int lgK = 4; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte - DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED); - } - - @Test - public void checkBackgroundPropagation() { - int lgK = 4; - int k = 1 << lgK; - int u = 10*k; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertTrue(local.isEmpty()); - ConcurrentHeapThetaBuffer sk1 = (ConcurrentHeapThetaBuffer)local; //for internal checks - - int i = 0; - for (; i< k; i++) { - local.update(i); - } - waitForBgPropagationToComplete(shared); - assertFalse(local.isEmpty()); - assertTrue(local.getEstimate() > 0.0); - long theta1 = ((ConcurrentSharedThetaSketch)shared).getVolatileTheta(); - - for (; i< u; i++) { - local.update(i); - } - waitForBgPropagationToComplete(shared); - - long theta2 = ((ConcurrentSharedThetaSketch)shared).getVolatileTheta(); - int entries = shared.getRetainedEntries(false); - assertTrue((entries > k) || (theta2 < theta1), - "entries="+entries+" k="+k+" theta1="+theta1+" theta2="+theta2); - - shared.rebuild(); - assertEquals(shared.getRetainedEntries(false), k); - assertEquals(shared.getRetainedEntries(true), k); - sk1.rebuild(); - assertEquals(shared.getRetainedEntries(false), k); - assertEquals(shared.getRetainedEntries(true), k); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadSerVer() { - int lgK = 9; - int k = 1 << lgK; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertTrue(local.isEmpty()); - - for (int i = 0; i< k; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertEquals(local.getEstimate(), k, 0.0); - assertEquals(shared.getRetainedEntries(false), k); - - sl.wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.wrap(sl.wseg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkWrapIllegalFamilyID_wrap() { - int lgK = 9; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - - sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to wrap the corrupted seg - Sketch.wrap(sl.wseg); //catch in Sketch.constructDirectSketch - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkWrapIllegalFamilyID_direct() { - int lgK = 9; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - - sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to wrap the corrupted seg - DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifySeedConflict() { - int lgK = 9; - long seed1 = 1021; - long seed2 = Util.DEFAULT_UPDATE_SEED; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1); - UpdateSketch shared = sl.shared; - - MemorySegment srcSeg = MemorySegment.ofArray(shared.toByteArray()).asReadOnly(); - Sketch.heapify(srcSeg, seed2); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkCorruptLgNomLongs() { - int lgK = 4; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - - sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(sl.wseg, Util.DEFAULT_UPDATE_SEED); - } - - @Test(expectedExceptions = UnsupportedOperationException.class) - public void checkIllegalHashUpdate() { - int lgK = 4; - boolean useSeg = true; - SharedLocal sl = new SharedLocal(lgK, lgK, useSeg); - UpdateSketch shared = sl.shared; - shared.hashUpdate(1); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - - private static void checkMemorySegmentDirectProxyMethods(Sketch local, Sketch shared) { - assertEquals( - local.hasMemorySegment(), - shared.hasMemorySegment()); - assertEquals(local.isDirect(), shared.isDirect()); - } - - //Does not check hasMemorySegment(), isDirect() - private static void checkOtherProxyMethods(Sketch local, Sketch shared) { - assertEquals(local.getCompactBytes(), shared.getCompactBytes()); - assertEquals(local.getCurrentBytes(), shared.getCurrentBytes()); - assertEquals(local.getEstimate(), shared.getEstimate()); - assertEquals(local.getLowerBound(2), shared.getLowerBound(2)); - assertEquals(local.getUpperBound(2), shared.getUpperBound(2)); - assertEquals(local.isEmpty(), shared.isEmpty()); - assertEquals(local.isEstimationMode(), shared.isEstimationMode()); - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java deleted file mode 100644 index 25b08e9e4..000000000 --- a/src/test/java/org/apache/datasketches/theta2/ConcurrentHeapQuickSelectSketchTest.java +++ /dev/null @@ -1,744 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -/** - * @author eshcar - */ -public class ConcurrentHeapQuickSelectSketchTest { - - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadSerVer() { - int lgK = 9; - int k = 1 << lgK; - int u = k; - SharedLocal sl = new SharedLocal(lgK); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertTrue(local.isEmpty()); - - for (int i = 0; i< u; i++) { - local.update(i); - } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertEquals(local.getEstimate(), u, 0.0); - assertEquals(shared.getRetainedEntries(false), u); - - byte[] serArr = shared.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(serArr); - Sketch sk = Sketch.heapify(seg, sl.seed); - assertTrue(sk instanceof HeapQuickSelectSketch); //Intentional promotion to Parent - - seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.heapify(seg, sl.seed); - } - - @Test - public void checkPropagationNotOrdered() { - int lgK = 8; - int k = 1 << lgK; - int u = 200*k; - SharedLocal sl = new SharedLocal(lgK, 4, false, false); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertEquals((sl.bldr.getLocalLgNominalEntries()), 4); - assertTrue(local.isEmpty()); - - for (int i = 0; i < u; i++) { - local.update(i); - } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertTrue(shared.getRetainedEntries(true) <= u); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkIllegalSketchID_UpdateSketch() { - int lgK = 9; - int k = 1 << lgK; - int u = k; - SharedLocal sl = new SharedLocal(lgK); - - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - assertTrue(local.isEmpty()); - assertTrue(shared instanceof ConcurrentHeapQuickSelectSketch); - for (int i = 0; i< u; i++) { - local.update(i); - } - assertTrue(shared.compact().isCompact()); - - assertFalse(local.isEmpty()); - assertEquals(local.getEstimate(), u, 0.0); - assertEquals(shared.getRetainedEntries(false), u); - byte[] byteArray = shared.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - - //try to heapify the corrupted seg - Sketch.heapify(seg, sl.seed); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifySeedConflict() { - int lgK = 9; - long seed = 1021; - long seed2 = Util.DEFAULT_UPDATE_SEED; - SharedLocal sl = new SharedLocal(lgK, lgK, seed); - byte[] byteArray = sl.shared.toByteArray(); - MemorySegment srcSeg = MemorySegment.ofArray(byteArray); - Sketch.heapify(srcSeg, seed2); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifyCorruptLgNomLongs() { - int lgK = 4; - SharedLocal sl = new SharedLocal(lgK); - byte[] serArr = sl.shared.toByteArray(); - MemorySegment srcSeg = MemorySegment.ofArray(serArr); - srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt - Sketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); - } - - @Test(expectedExceptions = UnsupportedOperationException.class) - public void checkIllegalHashUpdate() { - int lgK = 4; - SharedLocal sl = new SharedLocal(lgK); - sl.shared.hashUpdate(1); - } - - @Test - public void checkHeapifyByteArrayExact() { - int lgK = 9; - int k = 1 << lgK; - int u = k; - SharedLocal sl = new SharedLocal(lgK); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - for (int i=0; i k); - // it could be exactly k, but in this case must be greater - } - - @Test - public void checkErrorBounds() { - int lgK = 9; - int k = 1 << lgK; - SharedLocal sl = new SharedLocal(lgK); - UpdateSketch local = sl.local; - UpdateSketch shared = sl.shared; - - //Exact mode - //int limit = (int)ConcurrentSharedThetaSketch.computeExactLimit(lim, 0); //? ask Eshcar - for (int i = 0; i < k; i++ ) { - local.update(i); - } - - double est = local.getEstimate(); - double lb = local.getLowerBound(2); - double ub = local.getUpperBound(2); - assertEquals(est, ub, 0.0); - assertEquals(est, lb, 0.0); - - //Est mode - int u = 2 * k; - for (int i = k; i < u; i++ ) { - local.update(i); - local.update(i); //test duplicate rejection - } - waitForBgPropagationToComplete(shared); - est = local.getEstimate(); - lb = local.getLowerBound(2); - ub = local.getUpperBound(2); - assertTrue(est <= ub); - assertTrue(est >= lb); - } - - @Test - public void checkRebuild() { - int lgK = 4; - int k = 1 << lgK; - SharedLocal sl = new SharedLocal(lgK); - //must build shared first - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertTrue(local.isEmpty()); - int t = ((ConcurrentHeapThetaBuffer)local).getHashTableThreshold(); - - for (int i = 0; i< t; i++) { - local.update(i); - } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertTrue(local.getEstimate() > 0.0); - assertTrue(shared.getRetainedEntries(false) > k); - - shared.rebuild(); - assertEquals(shared.getRetainedEntries(false), k); - assertEquals(shared.getRetainedEntries(true), k); - shared.rebuild(); - assertEquals(shared.getRetainedEntries(false), k); - assertEquals(shared.getRetainedEntries(true), k); - } - - @Test - public void checkBuilder() { - int lgK = 4; - SharedLocal sl = new SharedLocal(lgK); - assertEquals(sl.bldr.getLocalLgNominalEntries(), lgK); - assertEquals(sl.bldr.getLgNominalEntries(), lgK); - println(sl.bldr.toString()); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBuilderSmallNominal() { - int lgK = 2; //too small - new SharedLocal(lgK); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkNegativeHashes() { - int lgK = 9; - SharedLocal sl = new SharedLocal(lgK); - UpdateSketch local = sl.local; - local.hashUpdate(-1L); - } - - @Test - public void checkResetAndStartingSubMultiple() { - int lgK = 9; - int k = 1 << lgK; - SharedLocal sl = new SharedLocal(lgK); - UpdateSketch shared = sl.shared; - UpdateSketch local = sl.local; - - assertTrue(local.isEmpty()); - int u = 3*k; - - for (int i = 0; i< u; i++) { local.update(i); } - waitForBgPropagationToComplete(shared); - - assertFalse(local.isEmpty()); - assertTrue(shared.getRetainedEntries(false) >= k); - assertTrue(local.getThetaLong() < Long.MAX_VALUE); - - shared.reset(); - local.reset(); - assertTrue(local.isEmpty()); - assertEquals(shared.getRetainedEntries(false), 0); - assertEquals(local.getEstimate(), 0.0, 0.0); - assertEquals(local.getThetaLong(), Long.MAX_VALUE); - } - - @Test - public void checkDQStoCompactEmptyForms() { - int lgK = 9; - SharedLocal sl = new SharedLocal(lgK); - UpdateSketch local = sl.local; - UpdateSketch shared = sl.shared; - - //empty - local.toString(false, true, 0, false); //exercise toString - assertTrue(local instanceof ConcurrentHeapThetaBuffer); - double localEst = local.getEstimate(); - double localLB = local.getLowerBound(2); - double uskUB = local.getUpperBound(2); - assertFalse(local.isEstimationMode()); - - int bytes = local.getCompactBytes(); - assertEquals(bytes, 8); - byte[] segArr2 = new byte[bytes]; - MemorySegment seg2 = MemorySegment.ofArray(segArr2); - - CompactSketch csk2 = shared.compact(false, seg2); - assertEquals(csk2.getEstimate(), localEst); - assertEquals(csk2.getLowerBound(2), localLB); - assertEquals(csk2.getUpperBound(2), uskUB); - assertTrue(csk2.isEmpty()); - assertFalse(csk2.isEstimationMode()); - assertTrue(csk2.isOrdered()); - - CompactSketch csk3 = shared.compact(true, seg2); - csk3.toString(false, true, 0, false); - csk3.toString(); - assertEquals(csk3.getEstimate(), localEst); - assertEquals(csk3.getLowerBound(2), localLB); - assertEquals(csk3.getUpperBound(2), uskUB); - assertTrue(csk3.isEmpty()); - assertFalse(csk3.isEstimationMode()); - assertTrue(csk2.isOrdered()); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkMinReqBytes() { - int lgK = 4; - int k = 1 << lgK; - SharedLocal sl = new SharedLocal(lgK); - for (int i = 0; i < (4 * k); i++) { sl.local.update(i); } - waitForBgPropagationToComplete(sl.shared); - byte[] byteArray = sl.shared.toByteArray(); - byte[] badBytes = Arrays.copyOfRange(byteArray, 0, 24); //corrupt no. bytes - MemorySegment seg = MemorySegment.ofArray(badBytes).asReadOnly(); - Sketch.heapify(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkThetaAndLgArrLongs() { - int lgK = 4; - int k = 1 << lgK; - SharedLocal sl = new SharedLocal(lgK); - for (int i = 0; i < k; i++) { sl.local.update(i); } - waitForBgPropagationToComplete(sl.shared); - byte[] badArray = sl.shared.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(badArray); - PreambleUtil.insertLgArrLongs(seg, 4); //corrupt - PreambleUtil.insertThetaLong(seg, Long.MAX_VALUE / 2); //corrupt - Sketch.heapify(seg); - } - - @Test - public void checkFamily() { - SharedLocal sl = new SharedLocal(); - UpdateSketch local = sl.local; - assertEquals(local.getFamily(), Family.QUICKSELECT); - } - - @Test - public void checkBackgroundPropagation() { - int lgK = 4; - int k = 1 << lgK; - int u = 5*k; - SharedLocal sl = new SharedLocal(lgK); - assertTrue(sl.local.isEmpty()); - - int i = 0; - for (; i < k; i++) { sl.local.update(i); } //exact - waitForBgPropagationToComplete(sl.shared); - - assertFalse(sl.local.isEmpty()); - assertTrue(sl.local.getEstimate() > 0.0); - long theta1 = sl.sharedIf.getVolatileTheta(); - - for (; i < u; i++) { sl.local.update(i); } //continue, make it estimating - waitForBgPropagationToComplete(sl.shared); - - long theta2 = sl.sharedIf.getVolatileTheta(); - int entries = sl.shared.getRetainedEntries(false); - assertTrue((entries > k) || (theta2 < theta1), - "entries= " + entries + " k= " + k + " theta1= " + theta1 + " theta2= " + theta2); - - sl.shared.rebuild(); - assertEquals(sl.shared.getRetainedEntries(false), k); - assertEquals(sl.shared.getRetainedEntries(true), k); - sl.local.rebuild(); - assertEquals(sl.shared.getRetainedEntries(false), k); - assertEquals(sl.shared.getRetainedEntries(true), k); - } - - @Test - public void checkBuilderExceptions() { - UpdateSketchBuilder bldr = new UpdateSketchBuilder(); - try { - bldr.setNominalEntries(8); - fail(); - } catch (SketchesArgumentException e) { } - try { - bldr.setLocalNominalEntries(8); - fail(); - } catch (SketchesArgumentException e) { } - try { - bldr.setLocalLogNominalEntries(3); - fail(); - } catch (SketchesArgumentException e) { } - bldr.setNumPoolThreads(4); - assertEquals(bldr.getNumPoolThreads(), 4); - bldr.setMaxConcurrencyError(0.04); - assertEquals(bldr.getMaxConcurrencyError(), 0.04); - bldr.setMaxNumLocalThreads(4); - assertEquals(bldr.getMaxNumLocalThreads(), 4); - } - - @Test(expectedExceptions = UnsupportedOperationException.class) - public void checkToByteArray() { - SharedLocal sl = new SharedLocal(); - sl.local.toByteArray(); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - - static class SharedLocal { - static final long DefaultSeed = Util.DEFAULT_UPDATE_SEED; - final UpdateSketch shared; - final ConcurrentSharedThetaSketch sharedIf; - final UpdateSketch local; - final int sharedLgK; - final int localLgK; - final long seed; - final MemorySegment wseg; - final UpdateSketchBuilder bldr = new UpdateSketchBuilder(); - - SharedLocal() { - this(9, 9, DefaultSeed, false, true, 1); - } - - SharedLocal(int lgK) { - this(lgK, lgK, DefaultSeed, false, true, 1); - } - - SharedLocal(int sharedLgK, int localLgK) { - this(sharedLgK, localLgK, DefaultSeed, false, true, 1); - } - - SharedLocal(int sharedLgK, int localLgK, long seed) { - this(sharedLgK, localLgK, seed, false, true, 1); - } - - SharedLocal(int sharedLgK, int localLgK, boolean useSeg) { - this(sharedLgK, localLgK, DefaultSeed, useSeg, true, 1); - } - - SharedLocal(int sharedLgK, int localLgK, boolean useSeg, boolean ordered) { - this(sharedLgK, localLgK, DefaultSeed, useSeg, ordered, 1); - } - - SharedLocal(int sharedLgK, int localLgK, long seed, boolean useSeg, boolean ordered, int segMult) { - this.sharedLgK = sharedLgK; - this.localLgK = localLgK; - this.seed = seed; - if (useSeg) { - int bytes = (((4 << sharedLgK) * segMult) + (Family.QUICKSELECT.getMaxPreLongs())) << 3; - wseg = MemorySegment.ofArray(new byte[bytes]); - } else { - wseg = null; - } - bldr.setLogNominalEntries(sharedLgK); - bldr.setLocalLogNominalEntries(localLgK); - bldr.setPropagateOrderedCompact(ordered); - bldr.setSeed(this.seed); - shared = bldr.buildShared(wseg); - local = bldr.buildLocal(shared); - sharedIf = (ConcurrentSharedThetaSketch) shared; - } - } - - static void waitForBgPropagationToComplete(UpdateSketch shared) { - try { - Thread.sleep(10); - } catch (InterruptedException e) { - e.printStackTrace(); - } - ConcurrentSharedThetaSketch csts = (ConcurrentSharedThetaSketch)shared; - csts.awaitBgPropagationTermination(); - ConcurrentPropagationService.resetExecutorService(Thread.currentThread().getId()); - csts.initBgPropagationService(); - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/CornerCaseThetaSetOperationsTest.java b/src/test/java/org/apache/datasketches/theta2/CornerCaseThetaSetOperationsTest.java deleted file mode 100644 index 594a0808f..000000000 --- a/src/test/java/org/apache/datasketches/theta2/CornerCaseThetaSetOperationsTest.java +++ /dev/null @@ -1,518 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import org.testng.annotations.Test; -//import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED; -//import static org.apache.datasketches.hash.MurmurHash3.hash; - -public class CornerCaseThetaSetOperationsTest { - - /* Hash Values - * 9223372036854775807 Theta = 1.0 - * - * 6730918654704304314 hash(3L)[0] >>> 1 GT_MIDP - * 4611686018427387904 Theta for p = 0.5f = MIDP - * - * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V - * 922337217429372928 Theta for p = 0.1f = LOWP - * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V - */ - - private static final long GT_MIDP_V = 3L; - private static final float MIDP = 0.5f; - - private static final long GT_LOWP_V = 6L; - private static final float LOWP = 0.1f; - private static final long LT_LOWP_V = 4L; - - private static final double LOWP_THETA = LOWP; - - private enum SkType { - EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05 - EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value - ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value - DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value - } - - //================================= - - @Test - public void emptyEmpty() { - UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = true; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyExact() { - UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyDegenerate() { - UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyEstimation() { - UpdateSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void exactEmpty() { - UpdateSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_V); - UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactExact() { - UpdateSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_V); - UpdateSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactDegenerate() { - UpdateSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_V); - UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0 - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactEstimation() { - UpdateSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_V); - UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void estimationEmpty() { - UpdateSketch thetaA = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); - UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationExact() { - UpdateSketch thetaA = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); - UpdateSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationDegenerate() { - UpdateSketch thetaA = getSketch(SkType.ESTIMATION, MIDP, LT_LOWP_V); - UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationEstimation() { - UpdateSketch thetaA = getSketch(SkType.ESTIMATION, MIDP, LT_LOWP_V); - UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void degenerateEmpty() { - UpdateSketch thetaA = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0 - UpdateSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateExact() { - UpdateSketch thetaA = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); //entries = 0 - UpdateSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateDegenerate() { - UpdateSketch thetaA = getSketch(SkType.DEGENERATE, MIDP, GT_MIDP_V); //entries = 0 - UpdateSketch thetaB = getSketch(SkType.DEGENERATE, LOWP, GT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateEstimation() { - UpdateSketch thetaA = getSketch(SkType.DEGENERATE, MIDP, GT_MIDP_V); //entries = 0 - UpdateSketch thetaB = getSketch(SkType.ESTIMATION, LOWP, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_THETA; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_THETA; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - //================================= - - private static void checks( - UpdateSketch thetaA, - UpdateSketch thetaB, - double expectedIntersectTheta, - int expectedIntersectCount, - boolean expectedIntersectEmpty, - double expectedAnotbTheta, - int expectedAnotbCount, - boolean expectedAnotbEmpty, - double expectedUnionTheta, - int expectedUnionCount, - boolean expectedUnionEmpty) { - CompactSketch csk; - Intersection inter = SetOperation.builder().buildIntersection(); - AnotB anotb = SetOperation.builder().buildANotB(); - Union union = new SetOperationBuilder().buildUnion(); - - //Intersection Stateless Theta, Theta Updatable - csk = inter.intersect(thetaA, thetaB); - checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - //Intersection Stateless Theta, Theta Compact - csk = inter.intersect(thetaA.compact(), thetaB.compact()); - checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - - //AnotB Stateless Theta, Theta Updatable - csk = anotb.aNotB(thetaA, thetaB); - checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateless Theta, Theta Compact - csk = anotb.aNotB(thetaA.compact(), thetaB.compact()); - checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - - //AnotB Stateful Theta, Theta Updatable - anotb.setA(thetaA); - anotb.notB(thetaB); - csk = anotb.getResult(true); - checkResult("AnotB Stateful Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateful Theta, Theta Compact - anotb.setA(thetaA.compact()); - anotb.notB(thetaB.compact()); - csk = anotb.getResult(true); - checkResult("AnotB Stateful Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - - //Union Stateful Theta, Theta Updatable - union.union(thetaA); - union.union(thetaB); - csk = union.getResult(); - union.reset(); - checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //Union Stateful Theta, Theta Compact - union.union(thetaA.compact()); - union.union(thetaB.compact()); - csk = union.getResult(); - union.reset(); - checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - - } - - private static void checkResult( - String comment, - CompactSketch csk, - double expectedTheta, - int expectedEntries, - boolean expectedEmpty) { - double actualTheta = csk.getTheta(); - int actualEntries = csk.getRetainedEntries(); - boolean actualEmpty = csk.isEmpty(); - - boolean thetaOk = actualTheta == expectedTheta; - boolean entriesOk = actualEntries == expectedEntries; - boolean emptyOk = actualEmpty == expectedEmpty; - if (!thetaOk || !entriesOk || !emptyOk) { - StringBuilder sb = new StringBuilder(); - sb.append(comment + ": "); - if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); } - if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); } - if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); } - throw new IllegalArgumentException(sb.toString()); - } - } - - private static UpdateSketch getSketch(SkType skType, float p, long value) { - UpdateSketchBuilder bldr = UpdateSketch.builder(); - bldr.setLogNominalEntries(4); - UpdateSketch sk; - switch(skType) { - case EMPTY: { // { 1.0, 0, T} p and value are not used - sk = bldr.build(); - break; - } - case EXACT: { // { 1.0, >0, F} p is not used - sk = bldr.build(); - sk.update(value); - break; - } - case ESTIMATION: { // {<1.0, >0, F} - bldr.setP(p); - sk = bldr.build(); - sk.update(value); - break; - } - case DEGENERATE: { // {<1.0, 0, F} - bldr.setP(p); - sk = bldr.build(); - sk.update(value); - break; - } - - default: { return null; } // should not happen - } - return sk; - } - -// private static void println(Object o) { -// System.out.println(o.toString()); -// } -// -// @Test -// public void printHash() { -// long seed = DEFAULT_UPDATE_SEED; -// long v = 6; -// long hash = (hash(v, seed)[0]) >>> 1; -// println(v + ", " + hash); -// } -// -// @Test -// public void printPAsLong() { -// float p = 0.5f; -// println("p = " + p + ", " + (long)(Long.MAX_VALUE * p)); -// } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java deleted file mode 100644 index a706efba4..000000000 --- a/src/test/java/org/apache/datasketches/theta2/DirectIntersectionTest.java +++ /dev/null @@ -1,768 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.SetOperation.CONST_PREAMBLE_LONGS; -import static org.apache.datasketches.theta2.SetOperation.getMaxIntersectionBytes; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.common.SketchesStateException; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class DirectIntersectionTest { - private static final int PREBYTES = CONST_PREAMBLE_LONGS << 3; //24 - - @Test - public void checkExactIntersectionNoOverlap() { - final int lgK = 9; - final int k = 1< k); - println("Est: "+est); - } - - @SuppressWarnings("unused") - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkOverflow() { - final int lgK = 9; //512 - final int k = 1< k); - println("Est: "+est); - - final byte[] byteArray = inter.toByteArray(); - final MemorySegment seg = MemorySegment.ofArray(byteArray); - final Intersection inter2 = (Intersection) SetOperation.heapify(seg); - comp2 = inter2.getResult(false, null); - est2 = comp2.getEstimate(); - println("Est2: "+est2); - } - - /** - * This proves that the hash of 7 is < 0.5. This fact will be used in other tests involving P. - */ - @Test - public void checkPreject() { - final UpdateSketch sk = UpdateSketch.builder().setP((float) .5).build(); - sk.update(7); - assertEquals(sk.getRetainedEntries(), 0); - } - - @Test - public void checkWrapVirginEmpty() { - final int lgK = 5; - final int k = 1 << lgK; - Intersection inter1, inter2; - UpdateSketch sk1; - - final int segBytes = getMaxIntersectionBytes(k); - MemorySegment iSeg = MemorySegment.ofArray(new byte[segBytes]); - - inter1 = SetOperation.builder().buildIntersection(iSeg); //virgin off-heap - inter2 = Sketches.wrapIntersection(iSeg); //virgin off-heap, identical to inter1 - //both in virgin state, empty = false - //note: both inter1 and inter2 are tied to the same MemorySegment, - // so an intersect to one also affects the other. Don't do what I do! - assertFalse(inter1.hasResult()); - assertFalse(inter2.hasResult()); - - //This constructs a sketch with 0 entries and theta < 1.0 - sk1 = UpdateSketch.builder().setP((float) .5).setNominalEntries(k).build(); - sk1.update(7); //will be rejected by P, see proof above. - - //A virgin intersection (empty = false) intersected with a not-empty zero cache sketch - //remains empty = false! - inter1.intersect(sk1); - assertFalse(inter1.isEmpty()); - assertTrue(inter1.hasResult()); - //note that inter2 is not independent - assertFalse(inter2.isEmpty()); - assertTrue(inter2.hasResult()); - - //test the path via toByteArray, now in a different state - iSeg = MemorySegment.ofArray(inter1.toByteArray()); - inter2 = Sketches.wrapIntersection(iSeg); - assertTrue(inter2.hasResult()); //still true - - //test the compaction path - final CompactSketch comp = inter2.getResult(true, null); - assertEquals(comp.getRetainedEntries(false), 0); - assertFalse(comp.isEmpty()); - } - - @Test - public void checkWrapNullEmpty2() { - final int lgK = 5; - final int k = 1< k); - println("Est: "+est); - - final byte[] segArr3 = inter2.toByteArray(); - final MemorySegment srcSeg2 = MemorySegment.ofArray(segArr3); - inter3 = Sketches.wrapIntersection(srcSeg2); - resultComp2 = inter3.getResult(false, null); - est2 = resultComp2.getEstimate(); - println("Est2: "+est2); - - inter.reset(); - inter2.reset(); - inter3.reset(); - } - - @Test - public void checkDefaultMinSize() { - final int k = 32; - final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkExceptionMinSize() { - final int k = 16; - final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); - } - - @Test - public void checkGetResult() { - final int k = 1024; - final UpdateSketch sk = Sketches.updateSketchBuilder().build(); - - final int segBytes = getMaxIntersectionBytes(k); - final byte[] segArr = new byte[segBytes]; - final MemorySegment iSeg = MemorySegment.ofArray(segArr); - - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iSeg); - inter.intersect(sk); - final CompactSketch csk = inter.getResult(); - assertEquals(csk.getCompactBytes(), 8); - } - - @Test - public void checkFamily() { - //cheap trick - final int k = 16; - final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); - final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); - assertEquals(impl.getFamily(), Family.INTERSECTION); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkExceptions1() { - final int k = 16; - final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); - //corrupt SerVer - seg.set(JAVA_BYTE, PreambleUtil.SER_VER_BYTE, (byte) 2); - IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkExceptions2() { - final int k = 16; - final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]); - IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg); - //seg now has non-empty intersection - //corrupt empty and CurCount - Util.setBits(seg, PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK); - seg.set(JAVA_INT_UNALIGNED, PreambleUtil.RETAINED_ENTRIES_INT, 2); - IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false); - } - - //Check Alex's bug intersecting 2 direct full sketches with only overlap of 2 - // - @Test - public void checkOverlappedDirect() { - final int k = 1 << 4; - final int segBytes = 2*k*16 +PREBYTES; //plenty of room - final UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - final UpdateSketch sk2 = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - for (int i=0; i k); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkSamplingMode() { - int k = 4096; - float p = (float)0.5; - - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setP(p).setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - - for (int i = 0; i < k; i++ ) { usk.update(i); } - - double p2 = sk1.getP(); - double theta = sk1.getTheta(); - assertTrue(theta <= p2); - - double est = usk.getEstimate(); - assertEquals(k, est, k *.05); - double ub = usk.getUpperBound(1); - assertTrue(ub > est); - double lb = usk.getLowerBound(1); - assertTrue(lb < est); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkErrorBounds() { - int k = 512; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - - //Exact mode - for (int i = 0; i < k; i++ ) { usk.update(i); } - - double est = usk.getEstimate(); - double lb = usk.getLowerBound(2); - double ub = usk.getUpperBound(2); - assertEquals(est, ub, 0.0); - assertEquals(est, lb, 0.0); - - //Est mode - int u = 100*k; - for (int i = k; i < u; i++ ) { - usk.update(i); - usk.update(i); //test duplicate rejection - } - est = usk.getEstimate(); - lb = usk.getLowerBound(2); - ub = usk.getUpperBound(2); - assertTrue(est <= ub); - assertTrue(est >= lb); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - //Empty Tests - @Test - public void checkEmptyAndP() { - //virgin, p = 1.0 - int k = 1024; - float p = (float)1.0; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setP(p).setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - usk.update(1); - assertEquals(sk1.getRetainedEntries(true), 1); - assertFalse(usk.isEmpty()); - - //virgin, p = .001 - p = (float)0.001; - byte[] segArr2 = new byte[(int) wseg.byteSize()]; - MemorySegment seg2 = MemorySegment.ofArray(segArr2); - UpdateSketch usk2 = UpdateSketch.builder().setP(p).setNominalEntries(k).build(seg2); - sk1 = (DirectQuickSelectSketch)usk2; - - assertTrue(usk2.isEmpty()); - usk2.update(1); //will be rejected - assertEquals(sk1.getRetainedEntries(true), 0); - assertFalse(usk2.isEmpty()); - double est = usk2.getEstimate(); - //println("Est: "+est); - assertEquals(est, 0.0, 0.0); //because curCount = 0 - double ub = usk2.getUpperBound(2); //huge because theta is tiny! - //println("UB: "+ub); - assertTrue(ub > 0.0); - double lb = usk2.getLowerBound(2); - assertTrue(lb <= est); - //println("LB: "+lb); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkUpperAndLowerBounds() { - int k = 512; - int u = 2*k; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - - for (int i = 0; i < u; i++ ) { usk.update(i); } - - double est = usk.getEstimate(); - double ub = usk.getUpperBound(1); - double lb = usk.getLowerBound(1); - assertTrue(ub > est); - assertTrue(lb < est); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkRebuild() { - int k = 512; - int u = 4*k; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { usk.update(i); } - - assertFalse(usk.isEmpty()); - assertTrue(usk.getEstimate() > 0.0); - assertTrue(sk1.getRetainedEntries(false) > k); - - sk1.rebuild(); - assertEquals(sk1.getRetainedEntries(false), k); - assertEquals(sk1.getRetainedEntries(true), k); - sk1.rebuild(); - assertEquals(sk1.getRetainedEntries(false), k); - assertEquals(sk1.getRetainedEntries(true), k); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkResetAndStartingSubMultiple() { - int k = 512; - int u = 4*k; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { usk.update(i); } - - assertFalse(usk.isEmpty()); - assertTrue(sk1.getRetainedEntries(false) > k); - assertTrue(sk1.getThetaLong() < Long.MAX_VALUE); - - sk1.reset(); - assertTrue(usk.isEmpty()); - assertEquals(sk1.getRetainedEntries(false), 0); - assertEquals(usk.getEstimate(), 0.0, 0.0); - assertEquals(sk1.getThetaLong(), Long.MAX_VALUE); - - assertNotNull(sk1.getMemorySegment()); - assertFalse(sk1.isOrdered()); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkExactModeMemorySegmentArr() { - int k = 4096; - int u = 4096; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { usk.update(i); } - - assertEquals(usk.getEstimate(), u, 0.0); - assertEquals(sk1.getRetainedEntries(false), u); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkEstModeMemorySegmentArr() { - int k = 4096; - int u = 2*k; - - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { usk.update(i); } - - assertEquals(usk.getEstimate(), u, u*.05); - assertTrue(sk1.getRetainedEntries(false) > k); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkEstModeNativeMemorySegment() { - int k = 4096; - int u = 2*k; - int segCapacity = (k << 4) + (Family.QUICKSELECT.getMinPreLongs() << 3); - - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(segCapacity, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { usk.update(i); } - double est = usk.getEstimate(); - println(""+est); - assertEquals(usk.getEstimate(), u, u*.05); - assertTrue(sk1.getRetainedEntries(false) > k); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test - public void checkConstructReconstructFromMemorySegment() { - int k = 4096; - int u = 2*k; - try (Arena arena = Arena.ofConfined()) { - MemorySegment wseg = makeNativeMemorySegment(k, arena); - - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wseg); - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { usk.update(i); } //force estimation - - double est1 = usk.getEstimate(); - int count1 = usk.getRetainedEntries(false); - assertEquals(est1, u, u*.05); - assertTrue(count1 >= k); - - byte[] serArr; - double est2; - int count2; - - serArr = usk.toByteArray(); - - MemorySegment seg2 = MemorySegment.ofArray(serArr); - - //reconstruct to Native/Direct - UpdateSketch usk2 = Sketches.wrapUpdateSketch(seg2); - - est2 = usk2.getEstimate(); - count2 = usk2.getRetainedEntries(false); - - assertEquals(count2, count1); - assertEquals(est2, est1, 0.0); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Test(expectedExceptions = SketchesReadOnlyException.class) - public void updateAfterReadOnlyWrap() { - UpdateSketch usk1 = UpdateSketch.builder().build(); - UpdateSketch usk2 = (UpdateSketch) Sketch.wrap(MemorySegment.ofArray(usk1.toByteArray())); - usk2.update(0); - } - - public void updateAfterWritableWrap() { - UpdateSketch usk1 = UpdateSketch.builder().build(); - UpdateSketch usk2 = UpdateSketch.wrap(MemorySegment.ofArray(usk1.toByteArray())); - usk2.update(0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkNegativeHashes() { - int k = 512; - UpdateSketch qs = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); - qs.hashUpdate(-1L); - } - - @Test - public void checkConstructorSrcSegCorruptions() { - int k = 1024; //lgNomLongs = 10 - int u = k; //exact mode, lgArrLongs = 11 - - int bytes = Sketches.getMaxUpdateSketchBytes(k); - byte[] arr1 = new byte[bytes]; - MemorySegment seg1 = MemorySegment.ofArray(arr1); - ResizeFactor rf = ResizeFactor.X1; //0 - UpdateSketch usk1 = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg1); - for (int i=0; i>> 1); //corrupt theta and - seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 10); //corrupt lgArrLongs - try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); - fail("Expected SketchesArgumentException"); - } catch (SketchesArgumentException e) { - //pass - } - seg1.set(JAVA_LONG_UNALIGNED, THETA_LONG, Long.MAX_VALUE); //fix theta and - seg1.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) 11); //fix lgArrLongs - byte badFlags = (byte) (BIG_ENDIAN_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK); - seg1.set(JAVA_BYTE, FLAGS_BYTE, badFlags); - try { - usk2 = DirectQuickSelectSketch.writableWrap(seg1, Util.DEFAULT_UPDATE_SEED); - fail("Expected SketchesArgumentException"); - } catch (SketchesArgumentException e) { - //pass - } - - byte[] arr2 = Arrays.copyOfRange(arr1, 0, bytes-1); //corrupt length - MemorySegment seg2 = MemorySegment.ofArray(arr2); - try { - usk2 = DirectQuickSelectSketch.writableWrap(seg2, Util.DEFAULT_UPDATE_SEED); - fail("Expected SketchesArgumentException"); - } catch (SketchesArgumentException e) { - //pass - } - } - - @Test - public void checkCorruptRFWithInsufficientArray() { - int k = 1024; //lgNomLongs = 10 - - int bytes = Sketches.getMaxUpdateSketchBytes(k); - byte[] arr = new byte[bytes]; - MemorySegment seg = MemorySegment.ofArray(arr); - ResizeFactor rf = ResizeFactor.X8; // 3 - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).setResizeFactor(rf).build(seg); - usk.update(0); - - insertLgResizeFactor(seg, 0); // corrupt RF: X1 - UpdateSketch dqss = DirectQuickSelectSketch.writableWrap(seg, Util.DEFAULT_UPDATE_SEED); - assertEquals(dqss.getResizeFactor(), ResizeFactor.X2); // force-promote to X2 - } - - @Test - public void checkFamilyAndRF() { - int k = 16; - MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]); - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); - assertEquals(sketch.getFamily(), Family.QUICKSELECT); - assertEquals(sketch.getResizeFactor(), ResizeFactor.X8); - } - - //checks Alex's bug where lgArrLongs > lgNomLongs +1. - @Test - public void checkResizeInBigSeg() { - int k = 1 << 14; - int u = 1 << 20; - MemorySegment seg = MemorySegment.ofArray(new byte[(8*k*16) +24]); - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); - for (int i=0; i 98663.0); - assertTrue(est < 101530.0); - } - - @Test - public void checkForDruidBug2() { //update union with just sketch segment reference - final int k = 16384; - final UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(); - for (int i = 0; i < 100000; i++) { - usk.update(Integer.toString(i)); - } - usk.rebuild(); //optional but created the symptom - final MemorySegment segIn = MemorySegment.ofArray(new byte[usk.getCompactBytes()]); - usk.compact(true, segIn); //side effect of loading the segIn - - //create empty target union in off-heap segment - final MemorySegment seg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union1 = SetOperation.builder().setNominalEntries(k).buildUnion(seg); - - union1.union(segIn); - - final CompactSketch csk = union1.getResult(); - - assertTrue(csk.getTheta() < 0.2); - assertEquals(csk.getRetainedEntries(true), 16384); - final double est = csk.getEstimate(); - assertTrue(est > 98663.0); - assertTrue(est < 101530.0); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //Disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/EmptyTest.java b/src/test/java/org/apache/datasketches/theta2/EmptyTest.java deleted file mode 100644 index 01e16cf25..000000000 --- a/src/test/java/org/apache/datasketches/theta2/EmptyTest.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.ORDERED_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; -import org.testng.annotations.Test; - - -/** - * Empty essentially means that the sketch has never seen data. - * - * @author Lee Rhodes - */ -public class EmptyTest { - - @Test - public void checkEmpty() { - final UpdateSketch sk1 = Sketches.updateSketchBuilder().build(); - final UpdateSketch sk2 = Sketches.updateSketchBuilder().build(); - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - - final int u = 100; - for (int i = 0; i < u; i++) { //disjoint - sk1.update(i); - sk2.update(i + u); - } - inter.intersect(sk1); - inter.intersect(sk2); - - final CompactSketch csk = inter.getResult(); - //The intersection of two disjoint, exact-mode sketches is empty, T == 1.0. - println(csk.toString()); - assertTrue(csk.isEmpty()); - - final AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); - final CompactSketch csk2 = aNotB.aNotB(csk, sk1); - //The AnotB of an empty, T == 1.0 sketch with another exact-mode sketch is empty, T == 1.0 - assertTrue(csk2.isEmpty()); - } - - @Test - public void checkNotEmpty() { - final UpdateSketch sk1 = Sketches.updateSketchBuilder().build(); - final UpdateSketch sk2 = Sketches.updateSketchBuilder().build(); - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - - final int u = 10000; //estimating - for (int i = 0; i < u; i++) { //disjoint - sk1.update(i); - sk2.update(i + u); - } - inter.intersect(sk1); - inter.intersect(sk2); - - final CompactSketch csk = inter.getResult(); - println(csk.toString()); - //The intersection of two disjoint, est-mode sketches is not-empty, T < 1.0. - assertFalse(csk.isEmpty()); - - AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); - final CompactSketch csk2 = aNotB.aNotB(csk, sk1); //empty, T < 1.0; with est-mode sketch - println(csk2.toString()); - //The AnotB of an empty, T < 1.0 sketch with another exact-mode sketch is not-empty. - assertFalse(csk2.isEmpty()); - - final UpdateSketch sk3 = Sketches.updateSketchBuilder().build(); - aNotB = Sketches.setOperationBuilder().buildANotB(); - final CompactSketch csk3 = aNotB.aNotB(sk3, sk1); //empty, T == 1.0; with est-mode sketch - println(csk3.toString()); - //the AnotB of an empty, T == 1.0 sketch with another est-mode sketch is empty, T < 1.0 - assertTrue(csk3.isEmpty()); - } - - @Test - public void checkPsampling() { - final UpdateSketch sk1 = Sketches.updateSketchBuilder().setP(.5F).build(); - assertTrue(sk1.isEmpty()); - //An empty P-sampling sketch where T < 1.0 and has never seen data is also empty - // and will have a full preamble of 24 bytes. But when compacted, theta returns to 1.0, so - // it will be stored as only 8 bytes. - assertEquals(sk1.compact().toByteArray().length, 8); - } - - //These 3 tests reproduce a failure mode where an "old" empty sketch of 8 bytes without - // its empty-flag bit set is read. - @Test - public void checkBackwardCompatibility1() { - final int k = 16; - final int bytes = Sketches.getMaxUnionBytes(k); //288 - final Union union = SetOperation.builder().buildUnion(MemorySegment.ofArray(new byte[bytes])); - final MemorySegment seg = badEmptySk(); - final Sketch wsk = Sketches.wrapSketch(seg); - union.union(wsk); //union has segment - } - - @Test - public void checkBackwardCompatibility2() { - final Union union = SetOperation.builder().setNominalEntries(16).buildUnion(); - final MemorySegment seg = badEmptySk(); - final Sketch wsk = Sketches.wrapSketch(seg); - union.union(wsk); //heap union - } - - @Test - public void checkBackwardCompatibility3() { - final MemorySegment seg = badEmptySk(); - Sketches.heapifySketch(seg); - } - - @Test - public void checkEmptyToCompact() { - final UpdateSketch sk1 = Sketches.updateSketchBuilder().build(); - final CompactSketch csk = sk1.compact(); - assertTrue(csk instanceof EmptyCompactSketch); - final CompactSketch csk2 = csk.compact(); - assertTrue(csk2 instanceof EmptyCompactSketch); - final CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[8])); - assertTrue(csk3 instanceof DirectCompactSketch); - assertEquals(csk2.getCurrentPreambleLongs(), 1); - } - - - //SerVer 2 had an empty sketch where preLongs = 1, but empty bit was not set. - private static MemorySegment badEmptySk() { - final long preLongs = 1; - final long serVer = 2; - final long family = 3; //compact - final long flags = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK; - final long seedHash = 0x93CC; - final long badEmptySk = seedHash << 48 | flags << 40 - | family << 16 | serVer << 8 | preLongs; - final MemorySegment wseg = MemorySegment.ofArray(new byte[8]); - wseg.set(JAVA_LONG_UNALIGNED, 0, badEmptySk); - return wseg; - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java b/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java deleted file mode 100644 index faba72701..000000000 --- a/src/test/java/org/apache/datasketches/theta2/ExamplesTest.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class ExamplesTest { - - @Test - public void simpleCountingSketch() { - final int k = 4096; - final int u = 1000000; - - final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build(); - for (int i = 0; i < u; i++) { - sketch.update(i); - } - - println(sketch.toString()); - } - /* -### HeapQuickSelectSketch SUMMARY: - Nominal Entries (k) : 4096 - Estimate : 1002714.745231455 - Upper Bound, 95% conf : 1027777.3354974985 - Lower Bound, 95% conf : 978261.4472857157 - p : 1.0 - Theta (double) : 0.00654223948655085 - Theta (long) : 60341508738660257 - Theta (long, hex : 00d66048519437a1 - EstMode? : true - Empty? : false - Resize Factor : 8 - Array Size Entries : 8192 - Retained Entries : 6560 - Update Seed : 9001 - Seed Hash : ffff93cc -### END SKETCH SUMMARY - */ - - @Test - public void theta2dot0Examples() { - //Load source sketches - final UpdateSketchBuilder bldr = UpdateSketch.builder(); - final UpdateSketch skA = bldr.build(); - final UpdateSketch skB = bldr.build(); - for (int i = 1; i <= 1000; i++) { - skA.update(i); - skB.update(i + 250); - } - - //Union Stateless: - Union union = SetOperation.builder().buildUnion(); - CompactSketch csk = union.union(skA, skB); - assert csk.getEstimate() == 1250; - - //Union Stateful: - union = SetOperation.builder().buildUnion(); - union.union(skA); //first call - union.union(skB); //2nd through nth calls - //... - csk = union.getResult(); - assert csk.getEstimate() == 1250; - - //Intersection Stateless: - Intersection inter = SetOperation.builder().buildIntersection(); - csk = inter.intersect(skA, skB); - assert csk.getEstimate() == 750; - - //Intersection Stateful: - inter = SetOperation.builder().buildIntersection(); - inter.intersect(skA); //first call - inter.intersect(skB); //2nd through nth calls - //... - csk = inter.getResult(); - assert csk.getEstimate() == 750; - - //AnotB Stateless: - AnotB diff = SetOperation.builder().buildANotB(); - csk = diff.aNotB(skA, skB); - assert csk.getEstimate() == 250; - - //AnotB Stateful: - diff = SetOperation.builder().buildANotB(); - diff.setA(skA); //first call - diff.notB(skB); //2nd through nth calls - //... - csk = diff.getResult(true); - assert csk.getEstimate() == 250; - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //enable/disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java deleted file mode 100644 index a90703a68..000000000 --- a/src/test/java/org/apache/datasketches/theta2/ForwardCompatibilityTest.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class ForwardCompatibilityTest { - - @Test - public void checkSerVer1_Empty() { - CompactSketch csk = EmptyCompactSketch.getInstance(); - MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly(); - Sketch sketch = Sketch.heapify(srcSeg); - assertEquals(sketch.isEmpty(), true); - assertEquals(sketch.isEstimationMode(), false); - assertEquals(sketch.isDirect(), false); - assertEquals(sketch.hasMemorySegment(), false); - assertEquals(sketch.isCompact(), true); - assertEquals(sketch.isOrdered(), true); - assertTrue(sketch instanceof EmptyCompactSketch); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkSerVer1_badPrelongs() { - CompactSketch csk = EmptyCompactSketch.getInstance(); - - MemorySegment srcWseg = convertSerVer3toSerVer1(csk); - MemorySegment srcseg = srcWseg.asReadOnly(); - srcWseg.set(JAVA_BYTE, 0, (byte) 1); - Sketch.heapify(srcWseg); //throws because bad preLongs - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkSerVer1_tooSmall() { - UpdateSketch usk = Sketches.updateSketchBuilder().build(); - usk.update(1); - usk.update(2); - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly(); - MemorySegment srcSeg2 = srcSeg.asSlice(0, srcSeg.byteSize() - 8); - Sketch.heapify(srcSeg2); //throws because too small - } - - - @Test - public void checkSerVer1_1Value() { - UpdateSketch usk = Sketches.updateSketchBuilder().build(); - usk.update(1); - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer1(csk).asReadOnly(); - Sketch sketch = Sketch.heapify(srcSeg); - assertEquals(sketch.isEmpty(), false); - assertEquals(sketch.isEstimationMode(), false); - assertEquals(sketch.isDirect(), false); - assertEquals(sketch.hasMemorySegment(), false); - assertEquals(sketch.isCompact(), true); - assertEquals(sketch.isOrdered(), true); - assertEquals(sketch.getEstimate(), 1.0); - assertTrue(sketch instanceof SingleItemSketch); - } - - @Test - public void checkSerVer2_1PreLong_Empty() { - CompactSketch csk = EmptyCompactSketch.getInstance(); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); - Sketch sketch = Sketch.heapify(srcSeg); - assertEquals(sketch.isEmpty(), true); - assertEquals(sketch.isEstimationMode(), false); - assertEquals(sketch.isDirect(), false); - assertEquals(sketch.hasMemorySegment(), false); - assertEquals(sketch.isCompact(), true); - assertEquals(sketch.isOrdered(), true); - assertTrue(sketch instanceof EmptyCompactSketch); - } - - @Test - public void checkSerVer2_2PreLongs_Empty() { - UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); - for (int i = 0; i < 2; i++) { usk.update(i); } //exact mode - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); - - MemorySegment srcWseg = MemorySegment.ofArray(new byte[16]); - MemorySegment.copy(srcSeg, 0, srcWseg, 0, 16); - PreambleUtil.setEmpty(srcWseg); //Force - assertTrue(PreambleUtil.isEmptyFlag(srcWseg)); - srcWseg.set(JAVA_INT_UNALIGNED, 8, 0); //corrupt curCount = 0 - - Sketch sketch = Sketch.heapify(srcWseg); - assertTrue(sketch instanceof EmptyCompactSketch); - } - - @Test - public void checkSerVer2_3PreLongs_Empty() { - UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); - for (int i = 0; i < 32; i++) { usk.update(i); } //est mode - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); - - MemorySegment srcWseg = MemorySegment.ofArray(new byte[24]); - MemorySegment.copy(srcSeg, 0, srcWseg, 0, 24); - PreambleUtil.setEmpty(srcWseg); //Force - assertTrue(PreambleUtil.isEmptyFlag(srcWseg)); - srcWseg.set(JAVA_INT_UNALIGNED, 8, 0); //corrupt curCount = 0 - srcWseg.set(JAVA_LONG_UNALIGNED, 16, Long.MAX_VALUE); //corrupt to make it look empty - - Sketch sketch = Sketch.heapify(srcWseg); //now serVer=3, EmptyCompactSketch - assertTrue(sketch instanceof EmptyCompactSketch); - } - - @Test - public void checkSerVer2_2PreLongs_1Value() { - UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); - usk.update(1); //exact mode - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); - - Sketch sketch = Sketch.heapify(srcSeg); - assertEquals(sketch.isEmpty(), false); - assertEquals(sketch.isEstimationMode(), false); - assertEquals(sketch.isDirect(), false); - assertEquals(sketch.hasMemorySegment(), false); - assertEquals(sketch.isCompact(), true); - assertEquals(sketch.isOrdered(), true); - assertTrue(sketch instanceof SingleItemSketch); - } - - @Test - public void checkSerVer2_3PreLongs_1Value() { - UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); - for (int i = 0; i < 32; i++) { usk.update(i); } //est mode - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); - - MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); - MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); - srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1 - srcWseg.set(JAVA_LONG_UNALIGNED, 16, Long.MAX_VALUE); //corrupt theta to make it look exact - long[] cache = csk.getCache(); - srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value - - Sketch sketch = Sketch.heapify(srcWseg); - assertEquals(sketch.isEmpty(), false); - assertEquals(sketch.isEstimationMode(), false); - assertEquals(sketch.isDirect(), false); - assertEquals(sketch.hasMemorySegment(), false); - assertEquals(sketch.isCompact(), true); - assertEquals(sketch.isOrdered(), true); - assertTrue(sketch instanceof SingleItemSketch); - } - - @Test - public void checkSerVer2_3PreLongs_1Value_ThLessthan1() { - UpdateSketch usk = Sketches.updateSketchBuilder().setLogNominalEntries(4).build(); - for (int i = 0; i < 32; i++) { usk.update(i); } //est mode - CompactSketch csk = usk.compact(true, null); - MemorySegment srcSeg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED).asReadOnly(); - - MemorySegment srcWseg = MemorySegment.ofArray(new byte[32]); - MemorySegment.copy(srcSeg, 0, srcWseg, 0, 32); - srcWseg.set(JAVA_INT_UNALIGNED, 8, 1); //corrupt curCount = 1 - long[] cache = csk.getCache(); - srcWseg.set(JAVA_LONG_UNALIGNED, 24, cache[0]); //corrupt cache with only one value - - Sketch sketch = Sketch.heapify(srcWseg); - assertEquals(sketch.isEmpty(), false); - assertEquals(sketch.isEstimationMode(), true); - assertEquals(sketch.isDirect(), false); - assertEquals(sketch.hasMemorySegment(), false); - assertEquals(sketch.isCompact(), true); - assertEquals(sketch.isOrdered(), true); - assertTrue(sketch instanceof HeapCompactSketch); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java deleted file mode 100644 index 5a6b828f7..000000000 --- a/src/test/java/org/apache/datasketches/theta2/HeapAlphaSketchTest.java +++ /dev/null @@ -1,697 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.Family.ALPHA; -import static org.apache.datasketches.common.ResizeFactor.X1; -import static org.apache.datasketches.common.ResizeFactor.X2; -import static org.apache.datasketches.common.ResizeFactor.X8; -import static org.apache.datasketches.common.Util.clear; -import static org.apache.datasketches.theta2.PreambleUtil.FAMILY_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.LG_NOM_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.THETA_LONG; -import static org.apache.datasketches.theta2.PreambleUtil.insertLgResizeFactor; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertNotEquals; -import static org.testng.Assert.assertNull; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class HeapAlphaSketchTest { - private Family fam_ = ALPHA; - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadSerVer() { - int k = 512; - int u = k; - long seed = Util.DEFAULT_UPDATE_SEED; - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) - .setNominalEntries(k).build(); - HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { - sk1.update(i); - } - - assertFalse(usk.isEmpty()); - assertEquals(usk.getEstimate(), u, 0.0); - assertEquals(sk1.getRetainedEntries(false), u); - - byte[] byteArray = usk.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - - Sketch.heapify(seg, seed); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkConstructorKtooSmall() { - int k = 256; - UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkAlphaIncompatibleWithSeg() { - MemorySegment seg = MemorySegment.ofArray(new byte[(512*16)+24]); - UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(512).build(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkIllegalSketchID_UpdateSketch() { - int k = 512; - int u = k; - long seed = Util.DEFAULT_UPDATE_SEED; - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) - .setNominalEntries(k).build(); - HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { - usk.update(i); - } - - assertFalse(usk.isEmpty()); - assertEquals(usk.getEstimate(), u, 0.0); - assertEquals(sk1.getRetainedEntries(false), u); - byte[] byteArray = usk.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - - //try to heapify the corrupted seg - Sketch.heapify(seg, seed); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifySeedConflict() { - int k = 512; - long seed1 = 1021; - long seed2 = Util.DEFAULT_UPDATE_SEED; - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed1) - .setNominalEntries(k).build(); - byte[] byteArray = usk.toByteArray(); - MemorySegment srcSeg = MemorySegment.ofArray(byteArray).asReadOnly(); - Sketch.heapify(srcSeg, seed2); - } - - @Test - public void checkHeapifyByteArrayExact() { - int k = 512; - int u = k; - long seed = Util.DEFAULT_UPDATE_SEED; - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setSeed(seed) - .setNominalEntries(k).build(); - - for (int i=0; i k); - } - - @Test - public void checkSamplingMode() { - int k = 4096; - int u = k; - float p = (float)0.5; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setP(p) - .setNominalEntries(k).build(); - HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks - - for (int i = 0; i < u; i++ ) { - usk.update(i); - } - - double p2 = sk1.getP(); - double theta = sk1.getTheta(); - assertTrue(theta <= p2); - - double est = usk.getEstimate(); - double kdbl = k; - assertEquals(kdbl, est, kdbl*.05); - double ub = usk.getUpperBound(1); - assertTrue(ub > est); - double lb = usk.getLowerBound(1); - assertTrue(lb < est); - } - - @Test - public void checkErrorBounds() { - int k = 512; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X1) - .setNominalEntries(k).build(); - - //Exact mode - for (int i = 0; i < k; i++ ) { - usk.update(i); - } - - double est = usk.getEstimate(); - double lb = usk.getLowerBound(2); - double ub = usk.getUpperBound(2); - assertEquals(est, ub, 0.0); - assertEquals(est, lb, 0.0); - - //Est mode - int u = 10*k; - for (int i = k; i < u; i++ ) { - usk.update(i); - usk.update(i); //test duplicate rejection - } - est = usk.getEstimate(); - lb = usk.getLowerBound(2); - ub = usk.getUpperBound(2); - assertTrue(est <= ub); - assertTrue(est >= lb); - } - - //Empty Tests - @Test - public void checkEmptyAndP() { - //virgin, p = 1.0 - int k = 1024; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); - HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - usk.update(1); - assertEquals(sk1.getRetainedEntries(true), 1); - assertFalse(usk.isEmpty()); - - //virgin, p = .001 - UpdateSketch usk2 = UpdateSketch.builder().setFamily(fam_).setP((float)0.001) - .setNominalEntries(k).build(); - sk1 = (HeapAlphaSketch)usk2; - assertTrue(usk2.isEmpty()); - usk2.update(1); //will be rejected - assertEquals(sk1.getRetainedEntries(true), 0); - assertFalse(usk2.isEmpty()); - double est = usk2.getEstimate(); - //println("Est: "+est); - assertEquals(est, 0.0, 0.0); //because curCount = 0 - double ub = usk2.getUpperBound(2); //huge because theta is tiny! - //println("UB: "+ub); - assertTrue(ub > 0.0); - double lb = usk2.getLowerBound(2); - assertTrue(lb <= est); - //println("LB: "+lb); - } - - @Test - public void checkUpperAndLowerBounds() { - int k = 512; - int u = 2*k; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X2) - .setNominalEntries(k).build(); - - for (int i = 0; i < u; i++ ) { - usk.update(i); - } - - double est = usk.getEstimate(); - double ub = usk.getUpperBound(1); - double lb = usk.getLowerBound(1); - assertTrue(ub > est); - assertTrue(lb < est); - } - - @Test - public void checkRebuild() { - int k = 512; - int u = 4*k; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); - HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { - usk.update(i); - } - - assertFalse(usk.isEmpty()); - assertTrue(usk.getEstimate() > 0.0); - assertNotEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true)); - - sk1.rebuild(); - assertEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true)); - sk1.rebuild(); - assertEquals(sk1.getRetainedEntries(false), sk1.getRetainedEntries(true)); - } - - @Test - public void checkResetAndStartingSubMultiple() { - int k = 1024; - int u = 4*k; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X8) - .setNominalEntries(k).build(); - HeapAlphaSketch sk1 = (HeapAlphaSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i=0; i k); - println("Est: "+est); - } - - @Test - public void checkHeapifyAndWrap() { - final int lgK = 9; - final int k = 1< k); // in general it might be exactly k, but in this case must be greater - } - - @Test - public void checkSamplingMode() { - int k = 4096; - int u = k; - float p = (float)0.5; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setP(p).setNominalEntries(k).build(); - HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks - - for (int i = 0; i < u; i++ ) { - usk.update(i); - } - - double p2 = sk1.getP(); - double theta = sk1.getTheta(); - assertTrue(theta <= p2); - - double est = usk.getEstimate(); - double kdbl = k; - assertEquals(kdbl, est, kdbl*.05); - double ub = usk.getUpperBound(1); - assertTrue(ub > est); - double lb = usk.getLowerBound(1); - assertTrue(lb < est); - } - - @Test - public void checkErrorBounds() { - int k = 512; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X1).setNominalEntries(k).build(); - - //Exact mode - for (int i = 0; i < k; i++ ) { - usk.update(i); - } - - double est = usk.getEstimate(); - double lb = usk.getLowerBound(2); - double ub = usk.getUpperBound(2); - assertEquals(est, ub, 0.0); - assertEquals(est, lb, 0.0); - - //Est mode - int u = 10*k; - for (int i = k; i < u; i++ ) { - usk.update(i); - usk.update(i); //test duplicate rejection - } - est = usk.getEstimate(); - lb = usk.getLowerBound(2); - ub = usk.getUpperBound(2); - assertTrue(est <= ub); - assertTrue(est >= lb); - } - - //Empty Tests - @Test - public void checkEmptyAndP() { - //virgin, p = 1.0 - int k = 1024; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); - HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - usk.update(1); - assertEquals(sk1.getRetainedEntries(true), 1); - assertFalse(usk.isEmpty()); - - //virgin, p = .001 - UpdateSketch usk2 = UpdateSketch.builder().setFamily(fam_).setP((float)0.001).setNominalEntries(k).build(); - sk1 = (HeapQuickSelectSketch)usk2; - assertTrue(usk2.isEmpty()); - usk2.update(1); //will be rejected - assertEquals(sk1.getRetainedEntries(true), 0); - assertFalse(usk2.isEmpty()); - double est = usk2.getEstimate(); - //println("Est: "+est); - assertEquals(est, 0.0, 0.0); //because curCount = 0 - double ub = usk2.getUpperBound(2); //huge because theta is tiny! - //println("UB: "+ub); - assertTrue(ub > 0.0); - double lb = usk2.getLowerBound(2); - assertTrue(lb <= est); - //println("LB: "+lb); - } - - @Test - public void checkUpperAndLowerBounds() { - int k = 512; - int u = 2*k; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X2).setNominalEntries(k).build(); - - for (int i = 0; i < u; i++ ) { - usk.update(i); - } - - double est = usk.getEstimate(); - double ub = usk.getUpperBound(1); - double lb = usk.getLowerBound(1); - assertTrue(ub > est); - assertTrue(lb < est); - } - - @Test - public void checkRebuild() { - int k = 16; - int u = 4*k; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setNominalEntries(k).build(); - HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i = 0; i< u; i++) { - usk.update(i); - } - - assertFalse(usk.isEmpty()); - assertTrue(usk.getEstimate() > 0.0); - assertTrue(sk1.getRetainedEntries(false) > k); - - sk1.rebuild(); - assertEquals(sk1.getRetainedEntries(false), k); - assertEquals(sk1.getRetainedEntries(true), k); - sk1.rebuild(); - assertEquals(sk1.getRetainedEntries(false), k); - assertEquals(sk1.getRetainedEntries(true), k); - } - - @Test - public void checkResetAndStartingSubMultiple() { - int k = 1024; - int u = 4*k; - - UpdateSketch usk = UpdateSketch.builder().setFamily(fam_).setResizeFactor(X8).setNominalEntries(k).build(); - HeapQuickSelectSketch sk1 = (HeapQuickSelectSketch)usk; //for internal checks - - assertTrue(usk.isEmpty()); - - for (int i=0; i threshold; - println("null \t null:\t" + state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(null, null); - assertFalse(state); - - UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build(); - UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build(); - - //check both empty - jResults = jaccard(measured, expected); - state = jResults[1] > threshold; - println("empty\tempty:\t" + state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected); - assertTrue(state); - - state = exactlyEqual(measured, measured); - assertTrue(state); - - //adjust one - expected.update(1); - jResults = jaccard(measured, expected); - state = jResults[1] > threshold; - println("empty\t 1:\t" + state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected); - assertFalse(state); - - println(""); - } - - @Test - public void checkExactMode() { - int k = 1 << 12; - int u = k; - double threshold = 0.9999; - println("Exact Mode, minK: " + k + "\t Th: " + threshold); - - UpdateSketch measured = UpdateSketch.builder().setNominalEntries(k).build(); - UpdateSketch expected = UpdateSketch.builder().setNominalEntries(k).build(); - - for (int i = 0; i < (u-1); i++) { //one short - measured.update(i); - expected.update(i); - } - - double[] jResults = jaccard(measured, expected); - boolean state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected); - assertTrue(state); - - measured.update(u-1); //now exactly k entries - expected.update(u); //now exactly k entries but differs by one - jResults = jaccard(measured, expected); - state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected); - assertFalse(state); - - println(""); - } - - @Test - public void checkEstMode() { - int k = 1 << 12; - int u = 1 << 20; - double threshold = 0.9999; - println("Estimation Mode, minK: " + k + "\t Th: " + threshold); - - UpdateSketch measured = UpdateSketch.builder().setNominalEntries(k).build(); - UpdateSketch expected = UpdateSketch.builder().setNominalEntries(k).build(); - - for (int i = 0; i < u; i++) { - measured.update(i); - expected.update(i); - } - - double[] jResults = jaccard(measured, expected); - boolean state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected); - assertTrue(state); - - for (int i = u; i < (u + 50); i++) { //empirically determined - measured.update(i); - } - - jResults = jaccard(measured, expected); - state = jResults[1] >= threshold; - println(state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected); - assertFalse(state); - - println(""); - } - - /** - * Enable printing on this test and you will see that the distribution is pretty tight, - * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about - * +/- 1.56%. - */ - @Test - public void checkSimilarity() { - int minK = 1 << 12; - int u1 = 1 << 20; - int u2 = (int) (u1 * 0.95); - double threshold = 0.943; - println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); - - UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build(); - UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build(); - - for (int i = 0; i < u1; i++) { - expected.update(i); - } - - for (int i = 0; i < u2; i++) { - measured.update(i); - } - - double[] jResults = JaccardSimilarity.jaccard(measured, expected); - boolean state = JaccardSimilarity.similarityTest(measured, expected, threshold); - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - //check identity case - state = JaccardSimilarity.similarityTest(measured, measured, threshold); - assertTrue(state); - } - - /** - * Enable printing on this test and you will see that the distribution is much looser, - * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of - * intersection to the union becomes a small number. - */ - @Test - public void checkDissimilarity() { - int minK = 1 << 12; - int u1 = 1 << 20; - int u2 = (int) (u1 * 0.05); - double threshold = 0.061; - println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); - - UpdateSketch expected = UpdateSketch.builder().setNominalEntries(minK).build(); - UpdateSketch measured = UpdateSketch.builder().setNominalEntries(minK).build(); - - for (int i = 0; i < u1; i++) { - expected.update(i); - } - - for (int i = 0; i < u2; i++) { - measured.update(i); - } - - double[] jResults = JaccardSimilarity.jaccard(measured, expected); - boolean state = JaccardSimilarity.dissimilarityTest(measured, expected, threshold); - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - } - - private static String jaccardString(double[] jResults) { - double lb = jResults[0]; - double est = jResults[1]; - double ub = jResults[2]; - return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0); - } - - @Test - public void checkMinK() { - UpdateSketch skA = UpdateSketch.builder().build(); //4096 - UpdateSketch skB = UpdateSketch.builder().build(); //4096 - skA.update(1); - skB.update(1); - double[] result = JaccardSimilarity.jaccard(skA, skB); - println(result[0] + ", " + result[1] + ", " + result[2]); - for (int i = 1; i < 4096; i++) { - skA.update(i); - skB.update(i); - } - result = JaccardSimilarity.jaccard(skA, skB); - println(result[0] + ", " + result[1] + ", " + result[2]); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java b/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java deleted file mode 100644 index c0d9faeed..000000000 --- a/src/test/java/org/apache/datasketches/theta2/PairwiseSetOperationsTest.java +++ /dev/null @@ -1,410 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.fail; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.annotations.Test; - -public class PairwiseSetOperationsTest { - - // Intersection - - @Test - public void checkIntersectionNoOverlap() { - int lgK = 9; - int k = 1< k); - println(quick1.toString()); - println(PreambleUtil.preambleToString(seg)); - - final MemorySegment uSeg = MemorySegment.ofArray(new byte[getMaxUnionBytes(k)]); - final Union union = SetOperation.builder().setNominalEntries(k).buildUnion(uSeg); - union.union(quick1); - println(PreambleUtil.preambleToString(uSeg)); - } - - @Test - public void checkToStringWithPrelongsOf2() { - final int k = 16; - final int u = k; - final UpdateSketch quick1 = UpdateSketch.builder().setNominalEntries(k).build(); - for (int i = 0; i< u; i++) { - quick1.update(i); - } - final byte[] bytes = quick1.compact().toByteArray(); - println(Sketch.toString(bytes)); - } - - @Test - public void checkPreambleToStringExceptions() { - byte[] byteArr = new byte[7]; - try { //check preLongs < 8 fails - Sketch.toString(byteArr); - fail("Did not throw SketchesArgumentException."); - } catch (final SketchesArgumentException e) { - //expected - } - byteArr = new byte[8]; - byteArr[0] = (byte) 2; //needs min capacity of 16 - try { //check preLongs == 2 fails - Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly()); - fail("Did not throw SketchesArgumentException."); - } catch (final SketchesArgumentException e) { - //expected - } - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadSeedHashFromSeed() { - //In the first 64K values 50541 produces a seedHash of 0, - Util.computeSeedHash(50541); - } - - @Test - public void checkPreLongs() { - final UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(16).build(); - CompactSketch comp = sketch.compact(false, null); - byte[] byteArr = comp.toByteArray(); - println(Sketch.toString(byteArr)); //PreLongs = 1 - - sketch.update(1); - comp = sketch.compact(false, null); - byteArr = comp.toByteArray(); - println(Sketch.toString(byteArr)); //PreLongs = 2 - - for (int i=2; i<=32; i++) { - sketch.update(i); - } - comp = sketch.compact(false, null); - byteArr = comp.toByteArray(); - println(Sketch.toString(MemorySegment.ofArray(byteArr).asReadOnly())); //PreLongs = 3 - } - - @Test - public void checkInsertsAndExtracts() { - final byte[] arr = new byte[32]; - final MemorySegment wseg = MemorySegment.ofArray(arr); - - int v = 0; - insertPreLongs(wseg, ++v); - assertEquals(extractPreLongs(wseg), v); - insertPreLongs(wseg, 0); - - insertLgResizeFactor(wseg, 3); //limited to 2 bits - assertEquals(extractLgResizeFactor(wseg), 3); - insertLgResizeFactor(wseg, 0); - - insertSerVer(wseg, ++v); - assertEquals(extractSerVer(wseg), v); - insertSerVer(wseg, 0); - - insertFamilyID(wseg, ++v); - assertEquals(extractFamilyID(wseg), v); - insertFamilyID(wseg, 0); - - insertLgNomLongs(wseg, ++v); - assertEquals(extractLgNomLongs(wseg), v); - insertLgNomLongs(wseg, 0); - - insertLgArrLongs(wseg, ++v); - assertEquals(extractLgArrLongs(wseg), v); - insertLgArrLongs(wseg, 0); - - insertFlags(wseg, 3); - assertEquals(extractFlags(wseg), 3); - assertEquals(extractLgResizeRatioV1(wseg), 3); //also at byte 5, limited to 2 bits - insertFlags(wseg, 0); - - insertSeedHash(wseg, ++v); - assertEquals(extractSeedHash(wseg), v); - assertEquals(extractFlagsV1(wseg), v); //also at byte 6 - insertSeedHash(wseg, 0); - - insertCurCount(wseg, ++v); - assertEquals(extractCurCount(wseg), v); - insertCurCount(wseg, 0); - - insertP(wseg, (float) 1.0); - assertEquals(extractP(wseg), (float) 1.0); - insertP(wseg, (float) 0.0); - - insertThetaLong(wseg, ++v); - assertEquals(extractThetaLong(wseg), v); - insertThetaLong(wseg, 0L); - - insertUnionThetaLong(wseg, ++v); - assertEquals(extractUnionThetaLong(wseg), v); - insertUnionThetaLong(wseg, 0L); - - setEmpty(wseg); - assertTrue(isEmptyFlag(wseg)); - - clearEmpty(wseg); - assertFalse(isEmptyFlag(wseg)); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java deleted file mode 100644 index 0b1657679..000000000 --- a/src/test/java/org/apache/datasketches/theta2/SetOperationTest.java +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.common.ResizeFactor.X4; -import static org.apache.datasketches.theta2.Sketch.getMaxUpdateSketchBytes; -import static org.apache.datasketches.thetacommon2.HashOperations.minLgHashTableSize; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class SetOperationTest { - - @Test - public void checkBuilder() { - final int k = 2048; - final long seed = 1021; - - final UpdateSketch usk1 = UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build(); - final UpdateSketch usk2 = UpdateSketch.builder().setSeed(seed).setNominalEntries(k).build(); - - for (int i=0; i Next, we recover the Union SetOp and the 3 sketches and the space for the result. Then - * recompute the union using a Union of the same size as the input sketches, where the end result - * will be an estimate. - */ - @Test - public void checkDirectUnionExample() { - //The first task is to compute how much off-heap space we need and set the heap large enough. - //For the first trial, we will set the Union large enough for an exact result for THIS example. - final int sketchNomEntries = 1 << 14; //16K - int unionNomEntries = 1 << 15; //32K - final int[] heapLayout = getHeapLayout(sketchNomEntries, unionNomEntries); - - //This BB belongs to you and you always retain a link to it until you are completely - // done and then let java garbage collect it. - //I use a heap backing array, because for this example it is easier to peak into it and - // see what is going on. - final byte[] backingArr = new byte[heapLayout[5]]; - final ByteBuffer heapBuf = ByteBuffer.wrap(backingArr).order(ByteOrder.nativeOrder()); - - // Attaches a MemorySegment object to the underlying heap space of heapBuf. - // heapSeg will have a Read/Write view of the complete backing segment of heapBuf (direct or not). - // Any R/W action from heapSeg will be visible via heapBuf and visa versa. - // - // However, if you had created this WM object off-heap - // you would have the responsibility to close it when you are done. - // But, since it was allocated via BB, it closes it for you. - final MemorySegment heapSeg = MemorySegment.ofBuffer(heapBuf); - - double result = directUnionTrial1(heapSeg, heapLayout, sketchNomEntries, unionNomEntries); - println("1st est: "+result); - final int expected = sketchNomEntries*2; - assertEquals(result, expected, 0.0); //est must be exact. - - //For trial 2, we will use the same union space but use only part of it. - unionNomEntries = 1 << 14; //16K - result = directUnionTrial2(heapSeg, heapLayout, sketchNomEntries, unionNomEntries); - - //intentionally loose bounds - assertEquals(result, expected, expected*0.05); - println("2nd est: "+result); - println("Error %: "+(result/expected -1.0)*100); - } - - @Test - public void setOpsExample() { - println("Set Operations Example:"); - final int k = 4096; - final UpdateSketch skA = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - final UpdateSketch skB = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - final UpdateSketch skC = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - - for (int i=1; i<=10; i++) { skA.update(i); } - for (int i=1; i<=20; i++) { skB.update(i); } - for (int i=6; i<=15; i++) { skC.update(i); } //overlapping set - - final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); - union.union(skA); - union.union(skB); - // ... continue to iterate on the input sketches to union - - final CompactSketch unionSk = union.getResult(); //the result union sketch - println("A U B : "+unionSk.getEstimate()); //the estimate of the union - - //Intersection is similar - - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - inter.intersect(unionSk); - inter.intersect(skC); - // ... continue to iterate on the input sketches to intersect - - final CompactSketch interSk = inter.getResult(); //the result intersection sketch - println("(A U B) ^ C: "+interSk.getEstimate()); //the estimate of the intersection - - //The AnotB operation is a little different as it is stateless: - - final AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); - final CompactSketch not = aNotB.aNotB(skA, skC); - - println("A \\ C : "+not.getEstimate()); //the estimate of the AnotB operation - } - - @Test - public void checkIsSameResource() { - final int k = 16; - final MemorySegment wseg = MemorySegment.ofArray(new byte[k*16 + 32]);//288 - final MemorySegment emptySeg = MemorySegment.ofArray(new byte[8]); - final Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(wseg); - assertTrue(union.isSameResource(wseg)); - assertFalse(union.isSameResource(emptySeg)); - - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(wseg); - assertTrue(inter.isSameResource(wseg)); - assertFalse(inter.isSameResource(emptySeg)); - - final AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); - - assertFalse(aNotB.isSameResource(emptySeg)); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } - - /** - * Compute offsets for MyHeap for Union, sketch1, sketch2, sketch3, resultSketch, total layout. - * @param sketchNomEntries the configured nominal entries of the sketch - * @param unionNomEntries configured nominal entries of the union - * @return array of offsets for Union, sketch1, sketch2, sketch3, resultSketch, total layout - */ - private static int[] getHeapLayout(final int sketchNomEntries, final int unionNomEntries) { - final int[] heapLayout = new int[6]; - final int unionBytes = SetOperation.getMaxUnionBytes(unionNomEntries); - final int sketchBytes = getMaxUpdateSketchBytes(sketchNomEntries); - final int resultBytes = Sketch.getMaxCompactSketchBytes(unionNomEntries); - heapLayout[0] = 0; //offset for Union - heapLayout[1] = unionBytes; //offset for sketch1 - heapLayout[2] = unionBytes + sketchBytes; //offset for sketch2 - heapLayout[3] = unionBytes + 2*sketchBytes; //offset for sketch3 - heapLayout[4] = unionBytes + 3*sketchBytes; //offset for result - heapLayout[5] = unionBytes + 3*sketchBytes + resultBytes; //total - return heapLayout; - } - - private static double directUnionTrial1( - final MemorySegment heapSeg, final int[] heapLayout, final int sketchNomEntries, final int unionNomEntries) { - - final int offset = heapLayout[0]; - final int bytes = heapLayout[1] - offset; - final MemorySegment unionSeg = heapSeg.asSlice(offset, bytes); - - Union union = SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionSeg); - - final MemorySegment sketch1seg = heapSeg.asSlice(heapLayout[1], heapLayout[2]-heapLayout[1]); - final MemorySegment sketch2seg = heapSeg.asSlice(heapLayout[2], heapLayout[3]-heapLayout[2]); - final MemorySegment sketch3seg = heapSeg.asSlice(heapLayout[3], heapLayout[4]-heapLayout[3]); - final MemorySegment resultSeg = heapSeg.asSlice(heapLayout[4], heapLayout[5]-heapLayout[4]); - - //Initialize the 3 sketches - final UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch1seg); - final UpdateSketch sk2 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch2seg); - final UpdateSketch sk3 = UpdateSketch.builder().setNominalEntries(sketchNomEntries).build(sketch3seg); - - //This little trial has sk1 and sk2 distinct and sk2 overlap both. - //Build the sketches. - for (int i=0; i< sketchNomEntries; i++) { - sk1.update(i); - sk2.update(i + sketchNomEntries/2); - sk3.update(i + sketchNomEntries); - } - - //confirm that each of these 3 sketches is exact. - assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0); - assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0); - assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0); - - //Let's union the first 2 sketches - union.union(sk1); - union.union(sk2); - - //Let's recover the union and the 3rd sketch - union = Sketches.wrapUnion(unionSeg); - union.union(Sketch.wrap(sketch3seg)); - - final Sketch resSk = union.getResult(true, resultSeg); - final double est = resSk.getEstimate(); - - return est; - } - - private static double directUnionTrial2( - final MemorySegment heapSeg, final int[] heapLayout, final int sketchNomEntries, final int unionNomEntries) { - - final MemorySegment unionSeg = heapSeg.asSlice(heapLayout[0], heapLayout[1]-heapLayout[0]); - final MemorySegment sketch1seg = heapSeg.asSlice(heapLayout[1], heapLayout[2]-heapLayout[1]); - final MemorySegment sketch2seg = heapSeg.asSlice(heapLayout[2], heapLayout[3]-heapLayout[2]); - final MemorySegment sketch3seg = heapSeg.asSlice(heapLayout[3], heapLayout[4]-heapLayout[3]); - final MemorySegment resultSeg = heapSeg.asSlice(heapLayout[4], heapLayout[5]-heapLayout[4]); - - //Recover the 3 sketches - final UpdateSketch sk1 = (UpdateSketch) Sketch.wrap(sketch1seg); - final UpdateSketch sk2 = (UpdateSketch) Sketch.wrap(sketch2seg); - final UpdateSketch sk3 = (UpdateSketch) Sketch.wrap(sketch3seg); - - //confirm that each of these 3 sketches is exact. - assertEquals(sk1.getEstimate(), sketchNomEntries, 0.0); - assertEquals(sk2.getEstimate(), sketchNomEntries, 0.0); - assertEquals(sk3.getEstimate(), sketchNomEntries, 0.0); - - //Create a new union in the same space with a smaller size. - Util.clear(unionSeg); - final Union union = SetOperation.builder().setNominalEntries(unionNomEntries).buildUnion(unionSeg); - union.union(sk1); - union.union(sk2); - union.union(sk3); - - final Sketch resSk = union.getResult(true, resultSeg); - final double est = resSk.getEstimate(); - - return est; - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java b/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java deleted file mode 100644 index 12d3591aa..000000000 --- a/src/test/java/org/apache/datasketches/theta2/SetOpsCornerCasesTest.java +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EMPTY; -import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_HEAP; -import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EST_SEGMENT_UNORDERED; -import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.EXACT; -import static org.apache.datasketches.theta2.SetOpsCornerCasesTest.State.NULL; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertNull; - -import java.lang.foreign.MemorySegment; -import java.util.Random; - -import org.testng.Assert; -import org.testng.annotations.Test; - -public class SetOpsCornerCasesTest { - - /*******************************************/ - Random rand = new Random(9001); //deterministic - - @Test - public void checkSetOpsRandom() { - int hiA = 0, loB = 0, hiB = 0; - for (int i = 0; i < 1000; i++) { - hiA = rand.nextInt(128); //skA fed values between 0 and 127 - loB = rand.nextInt(64); - hiB = loB + rand.nextInt(64); //skB fed up to 63 values starting at loB - compareSetOpsRandom(64, 0, hiA, loB, hiB); - } - } - - private static void compareSetOpsRandom(int k, int loA, int hiA, int loB, int hiB) { - UpdateSketch tskA = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - UpdateSketch tskB = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - - for (int i = loA; i < hiA; i++) { tskA.update(i); } - for (int i = loB; i < hiB; i++) { tskB.update(i); } - - CompactSketch rcskStdU = doStdUnion(tskA, tskB, k, null); - CompactSketch rcskPwU = doPwUnion(tskA, tskB, k); - checkCornerCase(rcskPwU, rcskStdU); - - CompactSketch rcskStdPairU = doStdPairUnion(tskA, tskB, k, null); - checkCornerCase(rcskStdPairU, rcskStdU); - - CompactSketch rcskStdI = doStdIntersection(tskA, tskB, null); - CompactSketch rcskPwI = doPwIntersection(tskA, tskB); - checkCornerCase(rcskPwI, rcskStdI); - - CompactSketch rcskStdPairI = doStdPairIntersection(tskA, tskB, null); - checkCornerCase(rcskStdPairI, rcskStdI); - - CompactSketch rcskStdAnotB = doStdAnotB(tskA, tskB, null); - CompactSketch rcskPwAnotB = doPwAnotB(tskA, tskB); - checkCornerCase(rcskPwAnotB, rcskStdAnotB); - - CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tskA, tskB, null); - checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); - } - - /*******************************************/ - - @Test - //Check all corner cases against standard Union, Intersection, and AnotB. - //The unordered case is not tested - public void compareCornerCases() { - int k = 64; - for (State stateA : State.values()) { - for (State stateB : State.values()) { - if ((stateA == EST_SEGMENT_UNORDERED) || (stateB == EST_SEGMENT_UNORDERED)) { continue; } - if ((stateA == NULL) || (stateB == NULL)) { continue; } - cornerCaseChecks(stateA, stateB, k); - cornerCaseChecksMemorySegment(stateA, stateB, k); - } - } - } - -// @Test -// public void checkExactNullSpecificCase() { -// cornerCaseChecksMemorySegment(State.EXACT, State.NULL, 64); -// } - - private static void cornerCaseChecksMemorySegment(State stateA, State stateB, int k) { - println("StateA: " + stateA + ", StateB: " + stateB); - CompactSketch tcskA = generate(stateA, k); - CompactSketch tcskB = generate(stateB, k); - - MemorySegment wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxUnionBytes(k)]); - - CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null); - CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k); - checkCornerCase(rcskPwU, rcskStdU); //heap, heap - - rcskStdU = doStdUnion(tcskA, tcskB, k, wseg); - CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, wseg); - checkCornerCase(rcskStdPairU, rcskStdU); //direct, direct - - wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxIntersectionBytes(k)]); - - CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null); - CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB); - checkCornerCase(rcskPwI, rcskStdI); //empty, empty - - rcskStdI = doStdIntersection(tcskA, tcskB, wseg); - CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, wseg); - checkCornerCase(rcskStdPairI, rcskStdI); //empty, empty //direct, direct??? - - wseg = MemorySegment.ofArray(new byte[SetOperation.getMaxAnotBResultBytes(k)]); - - CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null); - CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB); - checkCornerCase(rcskPwAnotB, rcskStdAnotB); //heap, heap - - rcskStdAnotB = doStdAnotB(tcskA, tcskB, wseg); - CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB, wseg); - checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); //direct, heap - } - - private static void cornerCaseChecks(State stateA, State stateB, int k) { - println("StateA: " + stateA + ", StateB: " + stateB); - CompactSketch tcskA = generate(stateA, k); - CompactSketch tcskB = generate(stateB, k); - - CompactSketch rcskStdU = doStdUnion(tcskA, tcskB, k, null); - CompactSketch rcskPwU = doPwUnion(tcskA, tcskB, k); - checkCornerCase(rcskPwU, rcskStdU); - - CompactSketch rcskStdPairU = doStdPairUnion(tcskA, tcskB, k, null); - checkCornerCase(rcskStdPairU, rcskStdU); - - CompactSketch rcskStdI = doStdIntersection(tcskA, tcskB, null); - CompactSketch rcskPwI = doPwIntersection(tcskA, tcskB); - checkCornerCase(rcskPwI, rcskStdI); - - CompactSketch rcskStdPairI = doStdPairIntersection(tcskA, tcskB, null); - checkCornerCase(rcskStdPairI, rcskStdI); - - CompactSketch rcskStdAnotB = doStdAnotB(tcskA, tcskB, null); - CompactSketch rcskPwAnotB = doPwAnotB(tcskA, tcskB); - checkCornerCase(rcskPwAnotB, rcskStdAnotB); - - CompactSketch rcskStdStatefulAnotB = doStdStatefulAnotB(tcskA, tcskB, null); - checkCornerCase(rcskStdStatefulAnotB, rcskStdAnotB); - } - - private static CompactSketch doStdUnion(Sketch tskA, Sketch tskB, int k, MemorySegment wseg) { - Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); - union.union(tskA); - union.union(tskB); - return union.getResult(true, wseg); - } - - private static CompactSketch doStdPairUnion(Sketch tskA, Sketch tskB, int k, MemorySegment wseg) { - Union union = Sketches.setOperationBuilder().setNominalEntries(k).buildUnion(); - return union.union(tskA, tskB, true, wseg); - } - - private static CompactSketch doStdIntersection(Sketch tskA, Sketch tskB, MemorySegment wseg) { - Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - inter.intersect(tskA); - inter.intersect(tskB); - return inter.getResult(true, wseg); - } - - private static CompactSketch doStdPairIntersection(Sketch tskA, Sketch tskB, MemorySegment wseg) { - Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - return inter.intersect(tskA, tskB, true, wseg); - } - - private static CompactSketch doStdAnotB(Sketch tskA, Sketch tskB, MemorySegment wseg) { - AnotB anotb = Sketches.setOperationBuilder().buildANotB(); - return anotb.aNotB(tskA, tskB, true, wseg); - } - - private static CompactSketch doStdStatefulAnotB(Sketch tskA, Sketch tskB, MemorySegment wseg) { - AnotB anotb = Sketches.setOperationBuilder().buildANotB(); - anotb.setA(tskA); - anotb.notB(tskB); - anotb.getResult(false); - return anotb.getResult(true, wseg, true); - } - - private static CompactSketch doPwUnion(Sketch tskA, Sketch tskB, int k) { - CompactSketch tcskA, tcskB; - if (tskA == null) { tcskA = null; } - else { tcskA = (tskA instanceof CompactSketch) ? (CompactSketch) tskA : tskA.compact(); } - if (tskB == null) { tcskB = null; } - else { tcskB = (tskB instanceof CompactSketch) ? (CompactSketch) tskB : tskB.compact(); } - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); - return union.union(tcskA, tcskB); - } - - private static CompactSketch doPwIntersection(Sketch tskA, Sketch tskB) { - Intersection inter = SetOperation.builder().buildIntersection(); - return inter.intersect(tskA, tskB); - } - - private static CompactSketch doPwAnotB(Sketch tskA, Sketch tskB) { - AnotB aNotB = SetOperation.builder().buildANotB(); - return aNotB.aNotB(tskA, tskB); - } - - - private static void checkCornerCase(Sketch rskA, Sketch rskB) { - double estA = rskA.getEstimate(); - double estB = rskB.getEstimate(); - boolean emptyA = rskA.isEmpty(); - boolean emptyB = rskB.isEmpty(); - long thetaLongA = rskA.getThetaLong(); - long thetaLongB = rskB.getThetaLong(); - int countA = rskA.getRetainedEntries(true); - int countB = rskB.getRetainedEntries(true); - Assert.assertEquals(estB, estA, 0.0); - Assert.assertEquals(emptyB, emptyA); - Assert.assertEquals(thetaLongB, thetaLongA); - Assert.assertEquals(countB, countA); - Assert.assertEquals(rskA.getClass().getSimpleName(), rskB.getClass().getSimpleName()); - } - - /*******************************************/ - - @Test - public void checkUnionNotOrdered() { - int k = 64; - CompactSketch skNull = generate(NULL, k); - CompactSketch skEmpty = generate(EMPTY, k); - CompactSketch skHeap = generate(EST_HEAP, k); - CompactSketch skHeapUO = generate(EST_SEGMENT_UNORDERED, k); - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); - union.union(skNull, skHeapUO); - union.union(skEmpty, skHeapUO); - union.union(skHeapUO, skNull); - union.union(skHeapUO, skEmpty); - union.union(skHeapUO, skHeap); - union.union(skHeap, skHeapUO); - } - - @Test - public void checkSeedHash() { - int k = 64; - UpdateSketch tmp1 = Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build(); - tmp1.update(1); - tmp1.update(3); - CompactSketch skSmallSeed2A = tmp1.compact(true, null); - - UpdateSketch tmp2 = Sketches.updateSketchBuilder().setNominalEntries(k).setSeed(123).build(); - tmp2.update(1); - tmp2.update(2); - CompactSketch skSmallSeed2B = tmp2.compact(true, null); - - CompactSketch skExact = generate(EXACT, k); - CompactSketch skHeap = generate(EST_HEAP, 2 * k); - - Intersection inter = SetOperation.builder().buildIntersection(); - AnotB aNotB = SetOperation.builder().buildANotB(); - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); - - //Intersect - try { - inter.intersect(skExact, skSmallSeed2A); - Assert.fail(); - } catch (Exception e) { } //pass - try { - inter.intersect(skExact, skSmallSeed2B); - Assert.fail(); - } catch (Exception e) { } //pass - try { - inter.intersect(skSmallSeed2B, skExact); - Assert.fail(); - } catch (Exception e) { } //pass - try { - inter.intersect(skHeap, skSmallSeed2B); - Assert.fail(); - } catch (Exception e) { } //pass - //A NOT B - try { - aNotB.aNotB(skExact, skSmallSeed2A); - Assert.fail(); - } catch (Exception e) { } //pass - try { - aNotB.aNotB(skExact, skSmallSeed2B); - Assert.fail(); - } catch (Exception e) { } //pass - try { - aNotB.aNotB(skSmallSeed2B, skExact); - Assert.fail(); - } catch (Exception e) { } //pass - try { - aNotB.aNotB(skHeap, skSmallSeed2B); - Assert.fail(); - } catch (Exception e) { } //pass - //Union - try { - union.union(skExact, skSmallSeed2A); - Assert.fail(); - } catch (Exception e) { } //pass - try { - union.union(skExact, skSmallSeed2B); - Assert.fail(); - } catch (Exception e) { } //pass - try { - union.union(skSmallSeed2B, skExact); - Assert.fail(); - } catch (Exception e) { } //pass - try { - union.union(skHeap, skSmallSeed2B); - Assert.fail(); - } catch (Exception e) { } //pass - } - - @Test - public void checkPwUnionReduceToK() { - int k = 16; - CompactSketch skNull = generate(NULL, k); - CompactSketch skEmpty = generate(EMPTY, k); - CompactSketch skHeap1 = generate(EST_HEAP, k); - CompactSketch skHeap2 = generate(EST_HEAP, k); - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); - CompactSketch csk; - csk = union.union(skNull, skHeap1); - Assert.assertEquals(csk.getRetainedEntries(true), k); - csk = union.union(skEmpty, skHeap1); - Assert.assertEquals(csk.getRetainedEntries(true), k); - csk = union.union(skHeap1, skNull); - Assert.assertEquals(csk.getRetainedEntries(true), k); - csk = union.union(skHeap1, skEmpty); - Assert.assertEquals(csk.getRetainedEntries(true), k); - csk = union.union(skHeap1, skHeap2); - Assert.assertEquals(csk.getRetainedEntries(true), k); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - - @Test - public void checkGenerator() { - int k = 16; - CompactSketch csk; - - csk = generate(State.NULL, 0); - assertNull(csk); - - csk = generate(State.EMPTY, k); - assertEquals(csk.isEmpty(), true); - assertEquals(csk.isEstimationMode(), false); - assertEquals(csk.getRetainedEntries(true), 0); - assertEquals(csk.getThetaLong(), Long.MAX_VALUE); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), false); - assertEquals(csk.isOrdered(), true); - - csk = generate(State.SINGLE, k); - assertEquals(csk.isEmpty(), false); - assertEquals(csk.isEstimationMode(), false); - assertEquals(csk.getRetainedEntries(true), 1); - assertEquals(csk.getThetaLong(), Long.MAX_VALUE); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), false); - assertEquals(csk.isOrdered(), true); - - csk = generate(State.EXACT, k); - assertEquals(csk.isEmpty(), false); - assertEquals(csk.isEstimationMode(), false); - assertEquals(csk.getRetainedEntries(true), k); - assertEquals(csk.getThetaLong(), Long.MAX_VALUE); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), false); - assertEquals(csk.isOrdered(), true); - - csk = generate(State.EST_HEAP, k); - assertEquals(csk.isEmpty(), false); - assertEquals(csk.isEstimationMode(), true); - assertEquals(csk.getRetainedEntries(true) > k, true); - assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), false); - assertEquals(csk.isOrdered(), true); - - csk = generate(State.THLT1_CNT0_FALSE, k); - assertEquals(csk.isEmpty(), false); - assertEquals(csk.isEstimationMode(), true); - assertEquals(csk.getRetainedEntries(true), 0); - assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), false); - assertEquals(csk.isOrdered(), true); - - csk = generate(State.THEQ1_CNT0_TRUE, k); - assertEquals(csk.isEmpty(), true); - assertEquals(csk.isEstimationMode(), false); - assertEquals(csk.getRetainedEntries(true), 0); - assertEquals(csk.getThetaLong() < Long.MAX_VALUE, false); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), false); - assertEquals(csk.isOrdered(), true); - - csk = generate(State.EST_SEGMENT_UNORDERED, k); - assertEquals(csk.isEmpty(), false); - assertEquals(csk.isEstimationMode(), true); - assertEquals(csk.getRetainedEntries(true) > k, true); - assertEquals(csk.getThetaLong() < Long.MAX_VALUE, true); - assertEquals(csk.isDirect(), false); - assertEquals(csk.hasMemorySegment(), true); - assertEquals(csk.isOrdered(), false); - } - - enum State {NULL, EMPTY, SINGLE, EXACT, EST_HEAP, THLT1_CNT0_FALSE, THEQ1_CNT0_TRUE, EST_SEGMENT_UNORDERED} - - private static CompactSketch generate(State state, int k) { - UpdateSketch sk = null; - CompactSketch csk = null; - - switch(state) { - case NULL : { - //already null - break; - } - case EMPTY : { //results in EmptyCompactSketch - csk = Sketches.updateSketchBuilder().setNominalEntries(k).build().compact(true, null); - break; - } - case SINGLE : { //results in SingleItemSketches most of the time - sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - sk.update(1); - csk = sk.compact(true, null); - break; - } - case EXACT : { - sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - for (int i = 0; i < k; i++) { - sk.update(i); - } - csk = sk.compact(true, null); - break; - } - case EST_HEAP : { - sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - for (int i = 0; i < (4 * k); i++) { - sk.update(i); - } - csk = sk.compact(true, null); - break; - } - case THLT1_CNT0_FALSE : { - sk = Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build(); - sk.update(7); //above theta - assert(sk.getRetainedEntries(true) == 0); - csk = sk.compact(true, null); //compact as {Th < 1.0, 0, F} - break; - } - case THEQ1_CNT0_TRUE : { - sk = Sketches.updateSketchBuilder().setP((float)0.5).setNominalEntries(k).build(); - assert(sk.getRetainedEntries(true) == 0); - csk = sk.compact(true, null); //compact as {Th < 1.0, 0, T} - break; - } - case EST_SEGMENT_UNORDERED : { - sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - for (int i = 0; i < (4 * k); i++) { - sk.update(i); - } - int bytes = Sketch.getMaxCompactSketchBytes(sk.getRetainedEntries(true)); - byte[] byteArr = new byte[bytes]; - MemorySegment wseg = MemorySegment.ofArray(byteArr); - csk = sk.compact(false, wseg); - break; - } - } - return csk; - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java b/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java deleted file mode 100644 index bada1876c..000000000 --- a/src/test/java/org/apache/datasketches/theta2/SingleItemSketchTest.java +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertNull; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class SingleItemSketchTest { - final static short DEFAULT_SEED_HASH = (short) (Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED) & 0XFFFFL); - - @Test - public void check1() { - Union union = Sketches.setOperationBuilder().buildUnion(); - union.union(SingleItemSketch.create(1)); - union.union(SingleItemSketch.create(1.0)); - union.union(SingleItemSketch.create(0.0)); - union.union(SingleItemSketch.create("1")); - union.union(SingleItemSketch.create(new byte[] {1,2,3,4})); - union.union(SingleItemSketch.create(new char[] {'a'})); - union.union(SingleItemSketch.create(new int[] {2})); - union.union(SingleItemSketch.create(new long[] {3})); - - union.union(SingleItemSketch.create(-0.0)); //duplicate - - double est = union.getResult().getEstimate(); - println(""+est); - assertEquals(est, 8.0, 0.0); - - assertNull(SingleItemSketch.create("")); - String str = null; - assertNull(SingleItemSketch.create(str));//returns null - - assertNull(SingleItemSketch.create(new byte[0]));//returns null - byte[] byteArr = null; - assertNull(SingleItemSketch.create(byteArr));//returns null - - assertNull(SingleItemSketch.create(new char[0]));//returns null - char[] charArr = null; - assertNull(SingleItemSketch.create(charArr));//returns null - - assertNull(SingleItemSketch.create(new int[0]));//returns null - int[] intArr = null; - assertNull(SingleItemSketch.create(intArr));//returns null - - assertNull(SingleItemSketch.create(new long[0]));//returns null - long[] longArr = null; - assertNull(SingleItemSketch.create(longArr));//returns null - } - - @Test - public void check2() { - long seed = Util.DEFAULT_UPDATE_SEED; - Union union = Sketches.setOperationBuilder().buildUnion(); - union.union(SingleItemSketch.create(1, seed)); - union.union(SingleItemSketch.create(1.0, seed)); - union.union(SingleItemSketch.create(0.0, seed)); - union.union(SingleItemSketch.create("1", seed)); - union.union(SingleItemSketch.create(new byte[] {1,2,3,4}, seed)); - union.union(SingleItemSketch.create(new char[] {'a'}, seed)); - union.union(SingleItemSketch.create(new int[] {2}, seed)); - union.union(SingleItemSketch.create(new long[] {3}, seed)); - - union.union(SingleItemSketch.create(-0.0, seed)); //duplicate - - double est = union.getResult().getEstimate(); - println(""+est); - assertEquals(est, 8.0, 0.0); - - assertNull(SingleItemSketch.create("", seed)); - String str = null; - assertNull(SingleItemSketch.create(str, seed));//returns null - - assertNull(SingleItemSketch.create(new byte[0], seed));//returns null - byte[] byteArr = null; - assertNull(SingleItemSketch.create(byteArr, seed));//returns null - - assertNull(SingleItemSketch.create(new char[0], seed));//returns null - char[] charArr = null; - assertNull(SingleItemSketch.create(charArr, seed));//returns null - - assertNull(SingleItemSketch.create(new int[0], seed));//returns null - int[] intArr = null; - assertNull(SingleItemSketch.create(intArr, seed));//returns null - - assertNull(SingleItemSketch.create(new long[0], seed));//returns null - long[] longArr = null; - assertNull(SingleItemSketch.create(longArr, seed));//returns null - } - - @Test - public void checkSketchInterface() { - SingleItemSketch sis = SingleItemSketch.create(1); - assertEquals(sis.getCompactBytes(), 16); - assertEquals(sis.getEstimate(), 1.0); - assertEquals(sis.getLowerBound(1), 1.0); - assertEquals(sis.getRetainedEntries(true), 1); - assertEquals(sis.getUpperBound(1), 1.0); - assertFalse(sis.isDirect()); - assertFalse(sis.hasMemorySegment()); - assertFalse(sis.isEmpty()); - assertTrue(sis.isOrdered()); - } - - @Test - public void checkLessThanThetaLong() { - for (int i = 0; i < 10; i++) { - long[] data = { i }; - long h = hash(data, Util.DEFAULT_UPDATE_SEED)[0] >>> 1; - SingleItemSketch sis = SingleItemSketch.create(i); - long halfMax = Long.MAX_VALUE >> 1; - int count = sis.getCountLessThanThetaLong(halfMax); - assertEquals(count, (h < halfMax) ? 1 : 0); - } - } - - @Test - public void checkSerDe() { - SingleItemSketch sis = SingleItemSketch.create(1); - byte[] byteArr = sis.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArr); - final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); - SingleItemSketch sis2 = SingleItemSketch.heapify(seg, defaultSeedHash); - assertEquals(sis2.getEstimate(), 1.0); - - SingleItemSketch sis3 = SingleItemSketch.heapify(seg , defaultSeedHash); - assertEquals(sis3.getEstimate(), 1.0); - - Union union = Sketches.setOperationBuilder().buildUnion(); - union.union(sis); - union.union(sis2); - union.union(sis3); - CompactSketch csk = union.getResult(); - assertTrue(csk instanceof SingleItemSketch); - assertEquals(union.getResult().getEstimate(), 1.0); - } - - @Test - public void checkRestricted() { - SingleItemSketch sis = SingleItemSketch.create(1); - assertNull(sis.getMemorySegment()); - assertEquals(sis.getCompactPreambleLongs(), 1); - } - - @Test - public void unionWrapped() { - Sketch sketch = SingleItemSketch.create(1); - Union union = Sketches.setOperationBuilder().buildUnion(); - MemorySegment seg = MemorySegment.ofArray(sketch.toByteArray()); - union.union(seg ); - assertEquals(union.getResult().getEstimate(), 1, 0); - } - - @Test - public void buildAndCompact() { - UpdateSketch sk1; - CompactSketch csk; - int bytes; - //On-heap - sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk1.update(1); - csk = sk1.compact(true, null); - assertTrue(csk instanceof SingleItemSketch); - csk = sk1.compact(false, null); - assertTrue(csk instanceof SingleItemSketch); - - //Off-heap - bytes = Sketches.getMaxUpdateSketchBytes(32); - MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - sk1= Sketches.updateSketchBuilder().setNominalEntries(32).build(wseg ); - sk1.update(1); - csk = sk1.compact(true, null); - assertTrue(csk instanceof SingleItemSketch); - csk = sk1.compact(false, null); - assertTrue(csk instanceof SingleItemSketch); - - bytes = Sketches.getMaxCompactSketchBytes(1); - wseg = MemorySegment.ofArray(new byte[bytes]); - csk = sk1.compact(true, wseg ); - assertTrue(csk.isOrdered()); - csk = sk1.compact(false, wseg ); - assertTrue(csk.isOrdered()); - } - - @Test - public void intersection() { - UpdateSketch sk1, sk2; - CompactSketch csk; - int bytes; - //Intersection on-heap - sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk1.update(1); - sk1.update(2); - sk2.update(1); - Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - inter.intersect(sk1); - inter.intersect(sk2); - csk = inter.getResult(true, null); - assertTrue(csk instanceof SingleItemSketch); - - //Intersection off-heap - bytes = Sketches.getMaxIntersectionBytes(32); - MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - inter = Sketches.setOperationBuilder().buildIntersection(wseg ); - inter.intersect(sk1); - inter.intersect(sk2); - csk = inter.getResult(true, null); - assertTrue(csk instanceof SingleItemSketch); - csk = inter.getResult(false, null); - assertTrue(csk instanceof SingleItemSketch); - } - - @Test - public void union() { - UpdateSketch sk1, sk2; - CompactSketch csk; - int bytes; - //Union on-heap - sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk1.update(1); - sk2.update(1); - Union union = Sketches.setOperationBuilder().buildUnion(); - union.union(sk1); - union.union(sk2); - csk = union.getResult(true, null); - assertTrue(csk instanceof SingleItemSketch); - - //Union off-heap - bytes = Sketches.getMaxUnionBytes(32); - MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - union = Sketches.setOperationBuilder().buildUnion(wseg ); - union.union(sk1); - union.union(sk2); - csk = union.getResult(true, null); - assertTrue(csk instanceof SingleItemSketch); - csk = union.getResult(false, null); - assertTrue(csk instanceof SingleItemSketch); - } - - @Test - public void aNotB() { - UpdateSketch sk1, sk2; - CompactSketch csk; - //AnotB on-heap - sk1 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk2 = Sketches.updateSketchBuilder().setNominalEntries(32).build(); - sk1.update(1); - sk2.update(2); - AnotB aNotB = Sketches.setOperationBuilder().buildANotB(); - aNotB.setA(sk1); - aNotB.notB(sk2); - csk = aNotB.getResult(true, null, true); - assertTrue(csk instanceof SingleItemSketch); - //not AnotB off-heap form - } - - @Test - public void checkHeapifyInstance() { - UpdateSketch sk1 = new UpdateSketchBuilder().build(); - sk1.update(1); - UpdateSketch sk2 = new UpdateSketchBuilder().build(); - sk2.update(1); - Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - inter.intersect(sk1); - inter.intersect(sk2); - MemorySegment wseg = MemorySegment.ofArray(new byte[16]); - CompactSketch csk = inter.getResult(false, wseg ); - assertTrue(csk.isOrdered()); - Sketch csk2 = Sketches.heapifySketch(wseg ); - assertTrue(csk2 instanceof SingleItemSketch); - println(csk2.toString(true, true, 1, true)); - } - - @Test - public void checkSingleItemBadFlags() { - final short defaultSeedHash = Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED); - UpdateSketch sk1 = new UpdateSketchBuilder().build(); - sk1.update(1); - MemorySegment wseg = MemorySegment.ofArray(new byte[16]); - sk1.compact(true, wseg ); - wseg .set(JAVA_BYTE, 5, (byte) 0); //corrupt flags to zero - try { - SingleItemSketch.heapify(wseg , defaultSeedHash); //fails due to corrupted flags bytes - fail(); - } catch (SketchesArgumentException e) { } - } - - @Test - public void checkDirectUnionSingleItem2() { - Sketch sk = Sketch.wrap(siSkWoutSiFlag24Bytes()); - assertEquals(sk.getEstimate(), 1.0, 0.0); - //println(sk.toString()); - sk = Sketch.wrap(siSkWithSiFlag24Bytes()); - assertEquals(sk.getEstimate(), 1.0, 0.0); - //println(sk.toString()); - } - - @Test - public void checkSingleItemCompact() { - UpdateSketch sk1 = new UpdateSketchBuilder().build(); - sk1.update(1); - CompactSketch csk = sk1.compact(); - assertTrue(csk instanceof SingleItemSketch); - CompactSketch csk2 = csk.compact(); - assertEquals(csk, csk2); - CompactSketch csk3 = csk.compact(true, MemorySegment.ofArray(new byte[16])); - assertTrue(csk3 instanceof DirectCompactSketch); - assertEquals(csk2.getCurrentPreambleLongs(), 1); - assertEquals(csk3.getCurrentPreambleLongs(), 1); - } - - - static final long SiSkPre0WithSiFlag = 0x93cc3a0000030301L; - static final long SiSkPre0WoutSiFlag = 0x93cc1a0000030301L; - static final long Hash = 0x05a186bdcb7df915L; - - static MemorySegment siSkWithSiFlag24Bytes() { - int cap = 24; //8 extra bytes - MemorySegment wseg = MemorySegment.ofArray(new byte[cap]); - wseg .set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WithSiFlag); - wseg .set(JAVA_LONG_UNALIGNED, 8, Hash); - return wseg ; - } - - static MemorySegment siSkWoutSiFlag24Bytes() { - int cap = 24; //8 extra bytes - MemorySegment wseg = MemorySegment.ofArray(new byte[cap]); - wseg .set(JAVA_LONG_UNALIGNED, 0, SiSkPre0WoutSiFlag); - wseg .set(JAVA_LONG_UNALIGNED, 8, Hash); - return wseg; - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/SketchTest.java b/src/test/java/org/apache/datasketches/theta2/SketchTest.java deleted file mode 100644 index 77a8246e3..000000000 --- a/src/test/java/org/apache/datasketches/theta2/SketchTest.java +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.common.Family.ALPHA; -import static org.apache.datasketches.common.Family.COMPACT; -import static org.apache.datasketches.common.Family.QUICKSELECT; -import static org.apache.datasketches.common.ResizeFactor.X1; -import static org.apache.datasketches.common.ResizeFactor.X2; -import static org.apache.datasketches.common.ResizeFactor.X4; -import static org.apache.datasketches.common.ResizeFactor.X8; -import static org.apache.datasketches.common.Util.isSameResource; -import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2; -import static org.apache.datasketches.theta2.CompactOperations.computeCompactPreLongs; -import static org.apache.datasketches.theta2.PreambleUtil.COMPACT_FLAG_MASK; -import static org.apache.datasketches.theta2.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta2.PreambleUtil.READ_ONLY_FLAG_MASK; -import static org.apache.datasketches.theta2.Sketch.getMaxCompactSketchBytes; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class SketchTest { - - @Test - public void checkGetMaxBytesWithEntries() { - assertEquals(getMaxCompactSketchBytes(10), (10*8) + (Family.COMPACT.getMaxPreLongs() << 3) ); - } - - @Test - public void checkGetCurrentBytes() { - int k = 64; - int lowQSPreLongs = Family.QUICKSELECT.getMinPreLongs(); - int lowCompPreLongs = Family.COMPACT.getMinPreLongs(); - UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(k).build(); // QS Sketch - assertEquals(sketch.getCurrentPreambleLongs(), lowQSPreLongs); - assertEquals(sketch.getCompactPreambleLongs(), 1); //compact form - assertEquals(sketch.getCurrentDataLongs(), k*2); - assertEquals(sketch.getCurrentBytes(), (k*2*8) + (lowQSPreLongs << 3)); - assertEquals(sketch.getCompactBytes(), lowCompPreLongs << 3); - - CompactSketch compSk = sketch.compact(false, null); - assertEquals(compSk.getCompactBytes(), 8); - assertEquals(compSk.getCurrentBytes(), 8); - assertEquals(compSk.getCurrentDataLongs(), 0); - - int compPreLongs = computeCompactPreLongs(sketch.isEmpty(), sketch.getRetainedEntries(true), - sketch.getThetaLong()); - assertEquals(compPreLongs, 1); - - for (int i=0; i 1) { assertEquals(maxCompBytes, 24 + (i * 8)); } //assumes maybe estimation mode - } - } - - @Test - public void checkBuilder() { - int k = 2048; - int lgK = Integer.numberOfTrailingZeros(k); - long seed = 1021; - float p = (float)0.5; - ResizeFactor rf = X4; - Family fam = Family.ALPHA; - - UpdateSketch sk1 = UpdateSketch.builder().setSeed(seed) - .setP(p).setResizeFactor(rf).setFamily(fam).setNominalEntries(k).build(); - String nameS1 = sk1.getClass().getSimpleName(); - assertEquals(nameS1, "HeapAlphaSketch"); - assertEquals(sk1.getLgNomLongs(), lgK); - assertEquals(sk1.getSeed(), seed); - assertEquals(sk1.getP(), p); - - //check reset of defaults - - sk1 = UpdateSketch.builder().build(); - nameS1 = sk1.getClass().getSimpleName(); - assertEquals(nameS1, "HeapQuickSelectSketch"); - assertEquals(sk1.getLgNomLongs(), Integer.numberOfTrailingZeros(ThetaUtil.DEFAULT_NOMINAL_ENTRIES)); - assertEquals(sk1.getSeed(), Util.DEFAULT_UPDATE_SEED); - assertEquals(sk1.getP(), (float)1.0); - assertEquals(sk1.getResizeFactor(), ResizeFactor.X8); - } - - @Test - public void checkBuilderNonPowerOf2() { - int k = 1000; - UpdateSketch sk = UpdateSketch.builder().setNominalEntries(k).build(); - assertEquals(sk.getLgNomLongs(), 10); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBuilderIllegalP() { - float p = (float)1.5; - UpdateSketch.builder().setP(p).build(); - } - - @Test - public void checkBuilderResizeFactor() { - ResizeFactor rf; - rf = X1; - assertEquals(rf.getValue(), 1); - assertEquals(rf.lg(), 0); - assertEquals(ResizeFactor.getRF(0), X1); - rf = X2; - assertEquals(rf.getValue(), 2); - assertEquals(rf.lg(), 1); - assertEquals(ResizeFactor.getRF(1), X2); - rf = X4; - assertEquals(rf.getValue(), 4); - assertEquals(rf.lg(), 2); - assertEquals(ResizeFactor.getRF(2), X4); - rf = X8; - assertEquals(rf.getValue(), 8); - assertEquals(rf.lg(), 3); - assertEquals(ResizeFactor.getRF(3), X8); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkWrapBadFamily() { - UpdateSketch sketch = UpdateSketch.builder().setFamily(Family.ALPHA).setNominalEntries(1024).build(); - byte[] byteArr = sketch.toByteArray(); - MemorySegment srcSeg = MemorySegment.ofArray(byteArr); - Sketch.wrap(srcSeg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadFamily() { - UpdateSketch.builder().setFamily(Family.INTERSECTION).setNominalEntries(1024).build(); - } - - @SuppressWarnings("static-access") - @Test - public void checkSerVer() { - UpdateSketch sketch = UpdateSketch.builder().setNominalEntries(1024).build(); - byte[] sketchArray = sketch.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(sketchArray); - int serVer = Sketch.getSerializationVersion(seg); - assertEquals(serVer, 3); - MemorySegment wseg = MemorySegment.ofArray(sketchArray); - UpdateSketch sk2 = UpdateSketch.wrap(wseg); - serVer = sk2.getSerializationVersion(wseg); - assertEquals(serVer, 3); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifyAlphaCompactExcep() { - int k = 512; - Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); - byte[] byteArray = sketch1.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - //corrupt: - Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.heapify(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifyQSCompactExcep() { - int k = 512; - Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); - byte[] byteArray = sketch1.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - //corrupt: - Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.heapify(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifyNotCompactExcep() { - int k = 512; - UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); - int bytes = Sketch.getMaxCompactSketchBytes(0); - byte[] byteArray = new byte[bytes]; - MemorySegment seg = MemorySegment.ofArray(byteArray); - sketch1.compact(false, seg); - //corrupt: - Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.heapify(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHeapifyFamilyExcep() { - int k = 512; - Union union = SetOperation.builder().setNominalEntries(k).buildUnion(); - byte[] byteArray = union.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - //Improper use - Sketch.heapify(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkWrapAlphaCompactExcep() { - int k = 512; - Sketch sketch1 = UpdateSketch.builder().setFamily(ALPHA).setNominalEntries(k).build(); - byte[] byteArray = sketch1.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - //corrupt: - Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.wrap(seg); - - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkWrapQSCompactExcep() { - int k = 512; - Sketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); - byte[] byteArray = sketch1.toByteArray(); - MemorySegment seg = MemorySegment.ofArray(byteArray); - //corrupt: - Util.setBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.wrap(seg); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkWrapNotCompactExcep() { - int k = 512; - UpdateSketch sketch1 = UpdateSketch.builder().setFamily(QUICKSELECT).setNominalEntries(k).build(); - int bytes = Sketch.getMaxCompactSketchBytes(0); - byte[] byteArray = new byte[bytes]; - MemorySegment seg = MemorySegment.ofArray(byteArray); - sketch1.compact(false, seg); - //corrupt: - Util.clearBits(seg, FLAGS_BYTE, (byte) COMPACT_FLAG_MASK); - Sketch.wrap(seg); - } - - @Test - public void checkValidSketchID() { - assertFalse(Sketch.isValidSketchID(0)); - assertTrue(Sketch.isValidSketchID(ALPHA.getID())); - assertTrue(Sketch.isValidSketchID(QUICKSELECT.getID())); - assertTrue(Sketch.isValidSketchID(COMPACT.getID())); - } - - @Test - public void checkWrapToHeapifyConversion1() { - int k = 512; - UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build(); - for (int i = 0; i < k; i++) { - sketch1.update(i); - } - double uest1 = sketch1.getEstimate(); - - CompactSketch csk = sketch1.compact(); - assertEquals(csk.getEstimate(), uest1); - - MemorySegment v1seg = convertSerVer3toSerVer1(csk); - Sketch csk2 = Sketch.wrap(v1seg); //fails - assertFalse(csk2.isDirect()); - assertFalse(csk2.hasMemorySegment()); - assertEquals(uest1, csk2.getEstimate(), 0.0); - - MemorySegment v2seg = convertSerVer3toSerVer2(csk, Util.DEFAULT_UPDATE_SEED); - csk2 = Sketch.wrap(v2seg); - assertFalse(csk2.isDirect()); - assertFalse(csk2.hasMemorySegment()); - assertEquals(uest1, csk2.getEstimate(), 0.0); - } - - @Test - public void checkIsSameResource() { - int k = 16; - MemorySegment seg = MemorySegment.ofArray(new byte[(k*16) + 24]); //280 - MemorySegment cseg = MemorySegment.ofArray(new byte[32]); - UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(seg); - sketch.update(1); - sketch.update(2); - assertTrue(sketch.isSameResource(seg)); - DirectCompactSketch dcos = (DirectCompactSketch) sketch.compact(true, cseg); - assertTrue(isSameResource(dcos.getMemorySegment(), cseg)); - assertTrue(dcos.isOrdered()); - //never create 2 sketches with the same MemorySegment, so don't do as I do :) - DirectCompactSketch dcs = (DirectCompactSketch) sketch.compact(false, cseg); - assertTrue(isSameResource(dcs.getMemorySegment(), cseg)); - assertFalse(dcs.isOrdered()); - - Sketch sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - assertFalse(isSameResource(sk.getMemorySegment(),seg)); - } - - @Test - public void checkCountLessThanTheta() { - int k = 512; - UpdateSketch sketch1 = UpdateSketch.builder().setNominalEntries(k).build(); - for (int i = 0; i < (2*k); i++) { sketch1.update(i); } - - double theta = sketch1.rebuild().getTheta(); - final long thetaLong = (long) (LONG_MAX_VALUE_AS_DOUBLE * theta); - int count = sketch1.getCountLessThanThetaLong(thetaLong); - assertEquals(count, k); - } - - private static MemorySegment createCompactSketchMemorySegment(int k, int u) { - UpdateSketch usk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - for (int i = 0; i < u; i++) { usk.update(i); } - int bytes = Sketch.getMaxCompactSketchBytes(usk.getRetainedEntries(true)); - MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]); - usk.compact(true, wseg); - return wseg; - } - - @Test - public void checkCompactFlagsOnWrap() { - MemorySegment wseg = createCompactSketchMemorySegment(16, 32); - Sketch sk = Sketch.wrap(wseg); - assertTrue(sk instanceof CompactSketch); - int flags = PreambleUtil.extractFlags(wseg); - - int flagsNoCompact = flags & ~COMPACT_FLAG_MASK; - PreambleUtil.insertFlags(wseg, flagsNoCompact); - try { - sk = Sketch.wrap(wseg); - fail(); - } catch (SketchesArgumentException e) { } - - int flagsNoReadOnly = flags & ~READ_ONLY_FLAG_MASK; - PreambleUtil.insertFlags(wseg, flagsNoReadOnly); - try { - sk = Sketch.wrap(wseg); - fail(); - } catch (SketchesArgumentException e) { } - PreambleUtil.insertFlags(wseg, flags); //repair to original - PreambleUtil.insertSerVer(wseg, 5); - try { - sk = Sketch.wrap(wseg); - fail(); - } catch (SketchesArgumentException e) { } - } - - @Test - public void checkCompactSizeAndFlagsOnHeapify() { - MemorySegment wseg = createCompactSketchMemorySegment(16, 32); - Sketch sk = Sketch.heapify(wseg); - assertTrue(sk instanceof CompactSketch); - int flags = PreambleUtil.extractFlags(wseg); - - int flagsNoCompact = flags & ~READ_ONLY_FLAG_MASK; - PreambleUtil.insertFlags(wseg, flagsNoCompact); - try { - sk = Sketch.heapify(wseg); - fail(); - } catch (SketchesArgumentException e) { } - - wseg = MemorySegment.ofArray(new byte[7]); - PreambleUtil.insertSerVer(wseg, 3); - //PreambleUtil.insertFamilyID(wseg, 3); - try { - sk = Sketch.heapify(wseg); - fail(); - } catch (SketchesArgumentException e) { } - } - - @Test - public void check2Methods() { - int k = 16; - Sketch sk = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - int bytes1 = sk.getCompactBytes(); - int bytes2 = sk.getCurrentBytes(); - assertEquals(bytes1, 8); - assertEquals(bytes2, 280); //32*8 + 24 - int retEnt = sk.getRetainedEntries(); - assertEquals(retEnt, 0); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java b/src/test/java/org/apache/datasketches/theta2/SketchesTest.java deleted file mode 100644 index c9cdef434..000000000 --- a/src/test/java/org/apache/datasketches/theta2/SketchesTest.java +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; -import static org.apache.datasketches.theta2.Sketches.getCompactSketchMaxBytes; -import static org.apache.datasketches.theta2.Sketches.getMaxCompactSketchBytes; -import static org.apache.datasketches.theta2.Sketches.getMaxIntersectionBytes; -import static org.apache.datasketches.theta2.Sketches.getMaxUnionBytes; -import static org.apache.datasketches.theta2.Sketches.getMaxUpdateSketchBytes; -import static org.apache.datasketches.theta2.Sketches.getSerializationVersion; -import static org.apache.datasketches.theta2.Sketches.heapifySetOperation; -import static org.apache.datasketches.theta2.Sketches.heapifySketch; -import static org.apache.datasketches.theta2.Sketches.setOperationBuilder; -import static org.apache.datasketches.theta2.Sketches.updateSketchBuilder; -import static org.apache.datasketches.theta2.Sketches.wrapSetOperation; -import static org.apache.datasketches.theta2.Sketches.wrapSketch; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.thetacommon2.ThetaUtil; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class SketchesTest { - - private static MemorySegment getCompactSketchMemorySegment(final int k, final int from, final int to) { - final UpdateSketch sk1 = updateSketchBuilder().setNominalEntries(k).build(); - for (int i=from; i previous); - previous = it.get(); - } - } - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeFromCppCompressed() throws IOException { - final int[] nArr = {10, 100, 1000, 10000, 100000, 1000000}; - for (int n: nArr) { - final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_compressed_n" + n + "_cpp.sk")); - final CompactSketch sketch = CompactSketch.wrap(MemorySegment.ofArray(bytes)); - assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); - assertEquals(sketch.getEstimate(), n, n * 0.03); - assertTrue(sketch.isOrdered()); - final HashIterator it = sketch.iterator(); - long previous = 0; - while (it.next()) { - assertTrue(it.get() < sketch.getThetaLong()); - assertTrue(it.get() > previous); - previous = it.get(); - } - } - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeFromCppNonEmptyNoEntries() throws IOException { - final byte[] bytes = Files.readAllBytes(cppPath.resolve("theta_non_empty_no_entries_cpp.sk")); - final CompactSketch sketch = CompactSketch.wrap(MemorySegment.ofArray(bytes)); - assertFalse(sketch.isEmpty()); - assertEquals(sketch.getRetainedEntries(), 0); - } - -} diff --git a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java deleted file mode 100644 index 0a088f235..000000000 --- a/src/test/java/org/apache/datasketches/theta2/UnionImplTest.java +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.theta2; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; -import static org.apache.datasketches.common.Util.isSameResource; -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer1; -import static org.apache.datasketches.theta2.BackwardConversions.convertSerVer3toSerVer2; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.Arena; -import java.nio.ByteOrder; - -import java.lang.foreign.MemorySegment; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -public class UnionImplTest { - - @Test - public void checkGetCurrentAndMaxBytes() { - final int lgK = 10; - final Union union = Sketches.setOperationBuilder().setLogNominalEntries(lgK).buildUnion(); - assertEquals(union.getCurrentBytes(), 288); - assertEquals(union.getMaxUnionBytes(), 16416); - } - - @Test - public void checkUpdateWithSketch() { - final int k = 16; - final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + 24]); - final MemorySegment seg2 = MemorySegment.ofArray(new byte[k*8 + 24]); - final UpdateSketch sketch = Sketches.updateSketchBuilder().setNominalEntries(k).build(); - for (int i=0; i 0) && !(((lgT - lgA) % lgR) == 0); - boolean rf0 = (lgR == 0) && (lgA != lgT); - assertTrue((lgRbad == rf0) || (lgRbad == rf123)); - } - } - } - } - - - @SuppressWarnings("unused") - @Test - public void checkCompactOpsMemorySegmentToCompact() { - MemorySegment skwseg, cskwseg1, cskwseg2, cskwseg3; - CompactSketch csk1, csk2, csk3; - int lgK = 6; - UpdateSketch sk = Sketches.updateSketchBuilder().setLogNominalEntries(lgK).build(); - int n = 1 << (lgK + 1); - for (int i = 2; i < n; i++) { sk.update(i); } - int cbytes = sk.getCompactBytes(); - byte[] byteArr = sk.toByteArray(); - skwseg = MemorySegment.ofArray(byteArr); - cskwseg1 = MemorySegment.ofArray(new byte[cbytes]); - cskwseg2 = MemorySegment.ofArray(new byte[cbytes]); - cskwseg3 = MemorySegment.ofArray(new byte[cbytes]); - csk1 = sk.compact(true, cskwseg1); - csk2 = CompactOperations.segmentToCompact(skwseg, true, cskwseg2); - csk3 = CompactOperations.segmentToCompact(cskwseg1, true, cskwseg3); - assertTrue(equalContents(cskwseg1,cskwseg2)); - assertTrue(equalContents(cskwseg1, cskwseg3)); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); //disable here - } -} diff --git a/src/test/java/org/apache/datasketches/thetacommon/BinomialBoundsNTest.java b/src/test/java/org/apache/datasketches/thetacommon/BinomialBoundsNTest.java index 823b4104f..c991432a9 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/BinomialBoundsNTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon/BinomialBoundsNTest.java @@ -27,6 +27,7 @@ import static org.testng.Assert.fail; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.thetacommon.BinomialBoundsN; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSetsTest.java b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSetsTest.java index bf9a3d753..ed8833f7a 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSetsTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInThetaSketchedSetsTest.java @@ -27,6 +27,7 @@ import org.apache.datasketches.theta.Intersection; import org.apache.datasketches.theta.Sketches; import org.apache.datasketches.theta.UpdateSketch; +import org.apache.datasketches.thetacommon.BoundsOnRatiosInThetaSketchedSets; import org.testng.annotations.Test; public class BoundsOnRatiosInThetaSketchedSetsTest { diff --git a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSetsTest.java b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSetsTest.java index a0d9cf086..4a083b8e8 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSetsTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon/BoundsOnRatiosInTupleSketchedSetsTest.java @@ -22,13 +22,14 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.theta.UpdateSketch; import org.apache.datasketches.theta.UpdateSketchBuilder; +import org.apache.datasketches.thetacommon.BoundsOnRatiosInTupleSketchedSets; +import org.apache.datasketches.tuple.Intersection; import org.apache.datasketches.tuple.Sketch; import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.adouble.DoubleSummary; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations; -import org.apache.datasketches.tuple.Intersection; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; diff --git a/src/test/java/org/apache/datasketches/thetacommon/HashOperationsTest.java b/src/test/java/org/apache/datasketches/thetacommon/HashOperationsTest.java index 0d35b8fd5..e13a0b498 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/HashOperationsTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon/HashOperationsTest.java @@ -25,19 +25,20 @@ import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; import static org.apache.datasketches.thetacommon.HashOperations.hashArrayInsert; import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemory; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemorySegment; import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearchMemory; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearchMemorySegment; import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsert; -import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsertMemory; +import static org.apache.datasketches.thetacommon.HashOperations.hashSearchOrInsertMemorySegment; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; public class HashOperationsTest { @@ -105,20 +106,20 @@ public void testHashInsertOnlyWithStride() { } @Test - public void testHashInsertOnlyMemoryNoStride() { + public void testHashInsertOnlyMemorySegmentNoStride() { final long[] table = new long[32]; - final WritableMemory mem = WritableMemory.writableWrap(table); - final int index = hashInsertOnlyMemory(mem, 5, 1, 0); + final MemorySegment seg = MemorySegment.ofArray(table); + final int index = hashInsertOnlyMemorySegment(seg, 5, 1, 0); assertEquals(index, 1); assertEquals(table[1], 1L); } @Test - public void testHashInsertOnlyMemoryWithStride() { + public void testHashInsertOnlyMemorySegmentWithStride() { final long[] table = new long[32]; table[1] = 1; - final WritableMemory mem = WritableMemory.writableWrap(table); - final int index = hashInsertOnlyMemory(mem, 5, 1, 0); + final MemorySegment seg = MemorySegment.ofArray(table); + final int index = hashInsertOnlyMemorySegment(seg, 5, 1, 0); assertEquals(index, 2); assertEquals(table[2], 1L); } @@ -152,24 +153,24 @@ public void checkFullHeapTableCatchesInfiniteLoop() { @Test public void checkFullDirectTableCatchesInfiniteLoop() { final long[] table = new long[32]; - final WritableMemory mem = WritableMemory.writableWrap(table); + final MemorySegment seg = MemorySegment.ofArray(table); for (int i = 1; i <= 32; ++i) { - hashInsertOnlyMemory(mem, 5, i, 0); + hashInsertOnlyMemorySegment(seg, 5, i, 0); } // table full; search returns not found, others throw exception - final int retVal = hashSearchMemory(mem, 5, 33, 0); + final int retVal = hashSearchMemorySegment(seg, 5, 33, 0); assertEquals(retVal, -1); try { - hashInsertOnlyMemory(mem, 5, 33, 0); + hashInsertOnlyMemorySegment(seg, 5, 33, 0); fail(); } catch (final SketchesArgumentException e) { // expected } try { - hashSearchOrInsertMemory(mem, 5, 33, 0); + hashSearchOrInsertMemorySegment(seg, 5, 33, 0); fail(); } catch (final SketchesArgumentException e) { // expected @@ -179,22 +180,22 @@ public void checkFullDirectTableCatchesInfiniteLoop() { @Test public void checkFullFastDirectTableCatchesInfiniteLoop() { final long[] table = new long[32]; - final WritableMemory wmem = WritableMemory.writableWrap(table); + final MemorySegment wseg = MemorySegment.ofArray(table); for (int i = 1; i <= 32; ++i) { - hashInsertOnlyMemory(wmem, 5, i, 0); + hashInsertOnlyMemorySegment(wseg, 5, i, 0); } // table full; throws exception try { - hashInsertOnlyMemory(wmem, 5, 33, 0); + hashInsertOnlyMemorySegment(wseg, 5, 33, 0); fail(); } catch (final SketchesArgumentException e) { // expected } try { - hashSearchOrInsertMemory(wmem, 5, 33, 0); + hashSearchOrInsertMemorySegment(wseg, 5, 33, 0); fail(); } catch (final SketchesArgumentException e) { // expected diff --git a/src/test/java/org/apache/datasketches/thetacommon/ThetaUtilTest.java b/src/test/java/org/apache/datasketches/thetacommon/ThetaUtilTest.java index 3a4ed9574..2a7628470 100644 --- a/src/test/java/org/apache/datasketches/thetacommon/ThetaUtilTest.java +++ b/src/test/java/org/apache/datasketches/thetacommon/ThetaUtilTest.java @@ -20,6 +20,7 @@ package org.apache.datasketches.thetacommon; import org.apache.datasketches.quantilescommon.QuantilesUtil; +import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/thetacommon2/BinomialBoundsNTest.java b/src/test/java/org/apache/datasketches/thetacommon2/BinomialBoundsNTest.java deleted file mode 100644 index 4d656c04b..000000000 --- a/src/test/java/org/apache/datasketches/thetacommon2/BinomialBoundsNTest.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static org.apache.datasketches.thetacommon2.BinomialBoundsN.checkArgs; -import static org.apache.datasketches.thetacommon2.BinomialBoundsN.getLowerBound; -import static org.apache.datasketches.thetacommon2.BinomialBoundsN.getUpperBound; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.annotations.Test; - -/** - * @author Kevin Lang - */ -public class BinomialBoundsNTest { - - public static double[] runTestAux(final long max_numSamplesI, final int ci, final double min_p) { - long numSamplesI = 0; - double p, lb, ub; - double sum1 = 0.0; - double sum2 = 0.0; - double sum3 = 0.0; - double sum4 = 0.0; - long count = 0; - - while (numSamplesI <= max_numSamplesI) { /* was <= */ - p = 1.0; - - while (p >= min_p) { - lb = BinomialBoundsN.getLowerBound(numSamplesI, p, ci, false); - ub = BinomialBoundsN.getUpperBound(numSamplesI, p, ci, false); - - // if (numSamplesI == 300 && p > 0.365 && p < 0.367) { ub += 0.01; } // artificial discrepancy - - // the logarithm helps discrepancies to not be swamped out of the total - sum1 += Math.log(lb + 1.0); - sum2 += Math.log(ub + 1.0); - count += 2; - - if (p < 1.0) { - lb = BinomialBoundsN.getLowerBound(numSamplesI, 1.0 - p, ci, false); - ub = BinomialBoundsN.getUpperBound(numSamplesI, 1.0 - p, ci, false); - sum3 += Math.log(lb + 1.0); - sum4 += Math.log(ub + 1.0); - count += 2; - } - - p *= 0.99; - } - numSamplesI = Math.max(numSamplesI + 1, (1001 * numSamplesI) / 1000); - } - - println(String.format("{%.15e, %.15e, %.15e, %.15e, %d}", sum1, sum2, sum3, sum4, count)); - final double[] arrOut = {sum1, sum2, sum3, sum4, count}; - return arrOut; - } - - private static final double TOL = 1E-15; - - @Test - public static void checkBounds() { - int i = 0; - for (int ci = 1; ci <= 3; ci++, i++) { - final double[] arr = runTestAux(20, ci, 1e-3); - for (int j = 0; j < 5; j++) { - assertTrue(((arr[j] / std[i][j]) - 1.0) < TOL); - } - } - for (int ci = 1; ci <= 3; ci++, i++) { - final double[] arr = runTestAux(200, ci, 1e-5); - for (int j = 0; j < 5; j++) { - assertTrue(((arr[j] / std[i][j]) - 1.0) < TOL); - } - } - //comment last one out for a shorter test - // for (int ci = 1; ci <= 3; ci++, i++) { - // final double[] arr = runTestAux(2000, ci, 1e-7); - // for (int j = 0; j < 5; j++) { - // assertTrue(((arr[j] / std[i][j]) - 1.0) < TOL); - // } - //} - } - - // With all 3 enabled the test should produce in groups of 3 */ - private static final double[][] std = { - {7.083330682531043e+04, 8.530373642825481e+04, 3.273647725073409e+04, 3.734024243699785e+04, 57750}, - {6.539415269641498e+04, 8.945522372568645e+04, 3.222302546497840e+04, 3.904738469737429e+04, 57750}, - {6.006043493107306e+04, 9.318105731423477e+04, 3.186269956585285e+04, 4.096466221922520e+04, 57750}, - - {2.275584770163813e+06, 2.347586549014998e+06, 1.020399409477305e+06, 1.036729927598294e+06, 920982}, - {2.243569126699713e+06, 2.374663344107342e+06, 1.017017233582122e+06, 1.042597845553438e+06, 920982}, - {2.210056231903739e+06, 2.400441267999687e+06, 1.014081235946986e+06, 1.049480769755676e+06, 920982}, - - {4.688240115809608e+07, 4.718067204619278e+07, 2.148362024482338e+07, 2.153118905212302e+07, 12834414}, - {4.674205938540214e+07, 4.731333757486791e+07, 2.146902141966406e+07, 2.154916650733873e+07, 12834414}, - {4.659896614422579e+07, 4.744404182094614e+07, 2.145525391547799e+07, 2.156815612325058e+07, 12834414} - }; - - @Test - public static void checkCheckArgs() { - try { - checkArgs(-1L, 1.0, 1); - checkArgs(10L, 0.0, 1); - checkArgs(10L, 1.01, 1); - checkArgs(10L, 1.0, 3); - checkArgs(10L, 1.0, 0); - checkArgs(10L, 1.0, 4); - fail("Expected SketchesArgumentException"); - } catch (final SketchesArgumentException e) { - //pass - } - } - - @Test - public static void checkComputeApproxBino_LB_UB() { - final long n = 100; - final double theta = (2.0 - 1e-5) / 2.0; - double result = getLowerBound(n, theta, 1, false); - assertEquals(result, n, 0.0); - result = getUpperBound(n, theta, 1, false); - assertEquals(result, n + 1, 0.0); - result = getLowerBound(n, theta, 1, true); - assertEquals(result, 0.0, 0.0); - result = getUpperBound(n, theta, 1, true); - assertEquals(result, 0.0, 0.0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public static void checkThetaLimits1() { - BinomialBoundsN.getUpperBound(100, 1.1, 1, false); - } - - @Test - public static void boundsExample() { - println("BinomialBoundsN Example:"); - final int k = 500; - final double theta = 0.001; - final int stdDev = 2; - final double ub = BinomialBoundsN.getUpperBound(k, theta, stdDev, false); - final double est = k / theta; - final double lb = BinomialBoundsN.getLowerBound(k, theta, stdDev, false); - println("K=" + k + ", Theta=" + theta + ", SD=" + stdDev); - println("UB: " + ub); - println("Est: " + est); - println("LB: " + lb); - println(""); - } - - @Test - public void printlnTest() { - println("PRINTING: " + this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java b/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java deleted file mode 100644 index f7ee77b32..000000000 --- a/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInThetaSketchedSetsTest.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.theta2.CompactSketch; -import org.apache.datasketches.theta2.Intersection; -import org.apache.datasketches.theta2.Sketches; -import org.apache.datasketches.theta2.UpdateSketch; -import org.testng.annotations.Test; - -public class BoundsOnRatiosInThetaSketchedSetsTest { - - @Test - public void checkNormalReturns() { - final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K - final UpdateSketch skC = Sketches.updateSketchBuilder().build(); - final int uA = 10000; - final int uC = 100000; - for (int i = 0; i < uA; i++) { skA.update(i); } - for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } - final Intersection inter = Sketches.setOperationBuilder().buildIntersection(); - inter.intersect(skA); - inter.intersect(skC); - final CompactSketch skB = inter.getResult(); - - double est = BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skB); - double lb = BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(skA, skB); - double ub = BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(skA, skB); - assertTrue(ub > est); - assertTrue(est > lb); - assertEquals(est, 0.5, .03); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - skA.reset(); //skA is now empty - est = BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skB); - lb = BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(skA, skB); - ub = BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(skA, skB); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - skC.reset(); //Now both are empty - est = BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skC); - lb = BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(skA, skC); - ub = BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(skA, skC); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkAbnormalReturns() { - final UpdateSketch skA = Sketches.updateSketchBuilder().build(); //4K - final UpdateSketch skC = Sketches.updateSketchBuilder().build(); - final int uA = 100000; - final int uC = 10000; - for (int i = 0; i < uA; i++) { skA.update(i); } - for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } - BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(skA, skC); - } - - @Test - public void printlnTest() { - println("PRINTING: " + this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } -} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java b/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java deleted file mode 100644 index e4bd42478..000000000 --- a/src/test/java/org/apache/datasketches/thetacommon2/BoundsOnRatiosInTupleSketchedSetsTest.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.UpdatableSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; -import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations; -import org.apache.datasketches.tuple2.Intersection; -import org.testng.annotations.Test; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -/** - * @author Lee Rhodes - * @author David Cromberge - */ -public class BoundsOnRatiosInTupleSketchedSetsTest { - - private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum; - private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations(); - private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode); - private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); - private final UpdatableSketchBuilder tupleBldr = new UpdatableSketchBuilder<>(factory); - private final Double constSummary = 1.0; - - @Test - public void checkNormalReturns1() { // tuple, tuple - final UpdatableSketch skA = tupleBldr.build(); //4K - final UpdatableSketch skC = tupleBldr.build(); - final int uA = 10000; - final int uC = 100000; - for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } - for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2), constSummary); } - final Intersection inter = new Intersection<>(dsso); - inter.intersect(skA); - inter.intersect(skC); - final Sketch skB = inter.getResult(); - - double est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); - double lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); - double ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); - assertTrue(ub > est); - assertTrue(est > lb); - assertEquals(est, 0.5, .03); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - skA.reset(); //skA is now empty - est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); - lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); - ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - skC.reset(); //Now both are empty - est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); - lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skC); - ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skC); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - } - - @Test - public void checkNormalReturns2() { // tuple, theta - final UpdatableSketch skA = tupleBldr.build(); //4K - final UpdateSketch skC = thetaBldr.build(); - final int uA = 10000; - final int uC = 100000; - for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } - for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } - final Intersection inter = new Intersection<>(dsso); - inter.intersect(skA); - inter.intersect(skC, factory.newSummary()); - final Sketch skB = inter.getResult(); - - double est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); - double lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); - double ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); - assertTrue(ub > est); - assertTrue(est > lb); - assertEquals(est, 0.5, .03); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - skA.reset(); //skA is now empty - est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skB); - lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skB); - ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skB); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - skC.reset(); //Now both are empty - est = BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); - lb = BoundsOnRatiosInTupleSketchedSets.getLowerBoundForBoverA(skA, skC); - ub = BoundsOnRatiosInTupleSketchedSets.getUpperBoundForBoverA(skA, skC); - println("ub : " + ub); - println("est: " + est); - println("lb : " + lb); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkAbnormalReturns1() { // tuple, tuple - final UpdatableSketch skA = tupleBldr.build(); //4K - final UpdatableSketch skC = tupleBldr.build(); - final int uA = 100000; - final int uC = 10000; - for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } - for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2), constSummary); } - BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkAbnormalReturns2() { // tuple, theta - final UpdatableSketch skA = tupleBldr.build(); //4K - final UpdateSketch skC = thetaBldr.build(); - final int uA = 100000; - final int uC = 10000; - for (int i = 0; i < uA; i++) { skA.update(i, constSummary); } - for (int i = 0; i < uC; i++) { skC.update(i + (uA / 2)); } - BoundsOnRatiosInTupleSketchedSets.getEstimateOfBoverA(skA, skC); - } - - @Test - public void printlnTest() { - println("PRINTING: " + this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } -} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java b/src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java deleted file mode 100644 index 1ff10ef8c..000000000 --- a/src/test/java/org/apache/datasketches/thetacommon2/HashOperationsTest.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.apache.datasketches.thetacommon2.HashOperations.checkHashCorruption; -import static org.apache.datasketches.thetacommon2.HashOperations.checkThetaCorruption; -import static org.apache.datasketches.thetacommon2.HashOperations.continueCondition; -import static org.apache.datasketches.thetacommon2.HashOperations.hashArrayInsert; -import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon2.HashOperations.hashInsertOnlyMemorySegment; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearch; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchMemorySegment; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsert; -import static org.apache.datasketches.thetacommon2.HashOperations.hashSearchOrInsertMemorySegment; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; -import org.testng.annotations.Test; - -public class HashOperationsTest { - - //Not otherwise already covered - - @Test(expectedExceptions = SketchesStateException.class) - public void testThetaCorruption1() { - checkThetaCorruption(0); - } - - @Test(expectedExceptions = SketchesStateException.class) - public void testThetaCorruption2() { - checkThetaCorruption(-1); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void testHashCorruption() { - checkHashCorruption(-1); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkHashSearch() { - hashSearch(new long[4], 2, 0); - } - - @Test - public void checkHashArrayInsert() { - final long[] hTable = new long[16]; - final long[] hashIn = new long[1]; - for (int i = 0; i < 8; i++) { - hashIn[0] = i; - final long h = hash(hashIn, 0)[0] >>> 1; - hashInsertOnly(hTable, 4, h); - final int count = hashArrayInsert(hTable, hTable, 4, Long.MAX_VALUE); - assertEquals(count, 0); - } - - } - - @Test - public void testContinueCondtion() { - final long thetaLong = Long.MAX_VALUE / 2; - assertTrue(continueCondition(thetaLong, 0)); - assertTrue(continueCondition(thetaLong, thetaLong)); - assertTrue(continueCondition(thetaLong, thetaLong + 1)); - assertFalse(continueCondition(thetaLong, thetaLong - 1)); - } - - @Test - public void testHashInsertOnlyNoStride() { - final long[] table = new long[32]; - final int index = hashInsertOnly(table, 5, 1); - assertEquals(index, 1); - assertEquals(table[1], 1L); - } - - @Test - public void testHashInsertOnlyWithStride() { - final long[] table = new long[32]; - table[1] = 1; - final int index = hashInsertOnly(table, 5, 1); - assertEquals(index, 2); - assertEquals(table[2], 1L); - } - - @Test - public void testHashInsertOnlyMemorySegmentNoStride() { - final long[] table = new long[32]; - final MemorySegment seg = MemorySegment.ofArray(table); - final int index = hashInsertOnlyMemorySegment(seg, 5, 1, 0); - assertEquals(index, 1); - assertEquals(table[1], 1L); - } - - @Test - public void testHashInsertOnlyMemorySegmentWithStride() { - final long[] table = new long[32]; - table[1] = 1; - final MemorySegment seg = MemorySegment.ofArray(table); - final int index = hashInsertOnlyMemorySegment(seg, 5, 1, 0); - assertEquals(index, 2); - assertEquals(table[2], 1L); - } - - @Test - public void checkFullHeapTableCatchesInfiniteLoop() { - final long[] table = new long[32]; - for (int i = 1; i <= 32; ++i) { - hashInsertOnly(table, 5, i); - } - - // table full; search returns not found, others throw exception - final int retVal = hashSearch(table, 5, 33); - assertEquals(retVal, -1); - - try { - hashInsertOnly(table, 5, 33); - fail(); - } catch (final SketchesArgumentException e) { - // expected - } - - try { - hashSearchOrInsert(table, 5, 33); - fail(); - } catch (final SketchesArgumentException e) { - // expected - } - } - - @Test - public void checkFullDirectTableCatchesInfiniteLoop() { - final long[] table = new long[32]; - final MemorySegment seg = MemorySegment.ofArray(table); - for (int i = 1; i <= 32; ++i) { - hashInsertOnlyMemorySegment(seg, 5, i, 0); - } - - // table full; search returns not found, others throw exception - final int retVal = hashSearchMemorySegment(seg, 5, 33, 0); - assertEquals(retVal, -1); - - try { - hashInsertOnlyMemorySegment(seg, 5, 33, 0); - fail(); - } catch (final SketchesArgumentException e) { - // expected - } - - try { - hashSearchOrInsertMemorySegment(seg, 5, 33, 0); - fail(); - } catch (final SketchesArgumentException e) { - // expected - } - } - - @Test - public void checkFullFastDirectTableCatchesInfiniteLoop() { - final long[] table = new long[32]; - final MemorySegment wseg = MemorySegment.ofArray(table); - - for (int i = 1; i <= 32; ++i) { - hashInsertOnlyMemorySegment(wseg, 5, i, 0); - } - - // table full; throws exception - try { - hashInsertOnlyMemorySegment(wseg, 5, 33, 0); - fail(); - } catch (final SketchesArgumentException e) { - // expected - } - - try { - hashSearchOrInsertMemorySegment(wseg, 5, 33, 0); - fail(); - } catch (final SketchesArgumentException e) { - // expected - } - } - - - @Test - public void printlnTest() { - println("PRINTING: " + this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java b/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java deleted file mode 100644 index 71afa354b..000000000 --- a/src/test/java/org/apache/datasketches/thetacommon2/QuickSelectTest.java +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import static java.lang.String.format; -import static org.apache.datasketches.common.QuickSelect.select; -import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; -import static org.apache.datasketches.common.QuickSelect.selectIncludingZeros; - -import java.util.Random; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class QuickSelectTest { - private static final String LS = System.getProperty("line.separator"); - private static final Random random = new Random(); // pseudo-random number generator - - //long[] arrays - - @Test - public void checkQuickSelect0Based() { - final int len = 64; - final long[] arr = new long[len]; - for (int i = 0; i < len; i++ ) { - arr[i] = i; - } - for (int pivot = 0; pivot < 64; pivot++ ) { - final long trueVal = pivot; - for (int i = 0; i < 1000; i++ ) { - shuffle(arr); - final long retVal = select(arr, 0, len - 1, pivot); - Assert.assertEquals(retVal, trueVal); - } - } - } - - @Test - public void checkQuickSelect1BasedExcludingZeros() { - final int len = 64; - final int nonZeros = (7 * len) / 8; - final long[] arr = new long[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - final int pivot = len / 2; - final long trueVal = arr[pivot - 1]; - shuffle(arr); - final long retVal = selectExcludingZeros(arr, nonZeros, pivot); - Assert.assertEquals(retVal, trueVal); - } - - @Test - public void checkQuickSelect1BasedExcludingZeros2() { - final int len = 64; - final int nonZeros = 16; - final long[] arr = new long[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - shuffle(arr); - final int pivot = len / 2; - final long retVal = selectExcludingZeros(arr, nonZeros, pivot); - Assert.assertEquals(retVal, 0); - } - - @Test - public void checkQuickSelect1BasedIncludingZeros() { - final int len = 64; - final int zeros = len / 8; - final long[] arr = new long[len]; - for (int i = zeros; i < len; i++ ) { - arr[i] = i + 1; - } - final int pivot = len / 2; - final long trueVal = arr[pivot - 1]; - shuffle(arr); - final long retVal = selectIncludingZeros(arr, pivot); - Assert.assertEquals(retVal, trueVal); - } - - //double[] arrays - - @Test - public void checkQuickSelectDbl0Based() { - final int len = 64; - final double[] arr = new double[len]; - for (int i = 0; i < len; i++ ) { - arr[i] = i; - } - for (int pivot = 0; pivot < 64; pivot++ ) { - final double trueVal = pivot; - for (int i = 0; i < 1000; i++ ) { - shuffle(arr); - final double retVal = select(arr, 0, len - 1, pivot); - Assert.assertEquals(retVal, trueVal, 0.0); - } - } - } - - @Test - public void checkQuickSelectDbl1BasedExcludingZeros() { - final int len = 64; - final int nonZeros = (7 * len) / 8; - final double[] arr = new double[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - final int pivot = len / 2; - final double trueVal = arr[pivot - 1]; - shuffle(arr); - final double retVal = selectExcludingZeros(arr, nonZeros, pivot); - Assert.assertEquals(retVal, trueVal, 0.0); - } - - @Test - public void checkQuickSelectDbl1BasedExcludingZeros2() { - final int len = 64; - final int nonZeros = 16; - final double[] arr = new double[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - shuffle(arr); - final int pivot = len / 2; - final double retVal = selectExcludingZeros(arr, nonZeros, pivot); - Assert.assertEquals(retVal, 0, 0.0); - } - - @Test - public void checkQuickSelectDbl1BasedIncludingZeros() { - final int len = 64; - final int zeros = len / 8; - final double[] arr = new double[len]; - for (int i = zeros; i < len; i++ ) { - arr[i] = i + 1; - } - final int pivot = len / 2; - final double trueVal = arr[pivot - 1]; - shuffle(arr); - final double retVal = selectIncludingZeros(arr, pivot); - Assert.assertEquals(retVal, trueVal, 0.0); - } - - - /** - * Rearrange the elements of an array in random order. - * @param a long array - */ - public static void shuffle(final long[] a) { - final int N = a.length; - for (int i = 0; i < N; i++ ) { - final int r = i + uniform(N - i); // between i and N-1 - final long temp = a[i]; - a[i] = a[r]; - a[r] = temp; - } - } - - /** - * Rearrange the elements of an array in random order. - * @param a double array - */ - public static void shuffle(final double[] a) { - final int N = a.length; - for (int i = 0; i < N; i++ ) { - final int r = i + uniform(N - i); // between i and N-1 - final double temp = a[i]; - a[i] = a[r]; - a[r] = temp; - } - } - - - /** - * Returns an integer uniformly between 0 (inclusive) and n (exclusive) where {@code n > 0} - * - * @param n the upper exclusive bound - * @return random integer - */ - public static int uniform(final int n) { - if (n <= 0) { - throw new SketchesArgumentException("n must be positive"); - } - return random.nextInt(n); - } - - private static String printArr(final long[] arr) { - final StringBuilder sb = new StringBuilder(); - final int len = arr.length; - sb.append(" Base0").append(" Base1").append(" Value").append(LS); - for (int i = 0; i < len; i++ ) { - sb - .append(format("%6d", i)).append(format("%6d", i + 1)).append(format("%6d", arr[i])) - .append(LS); - } - return sb.toString(); - } - - private static String printArr(final double[] arr) { - final StringBuilder sb = new StringBuilder(); - final int len = arr.length; - sb.append(" Base0").append(" Base1").append(" Value").append(LS); - for (int i = 0; i < len; i++ ) { - sb - .append(format("%6d", i)).append(format("%6d", i + 1)).append(format("%9.3f", arr[i])) - .append(LS); - } - return sb.toString(); - } - - //For console testing - static void test1() { - final int len = 16; - final int nonZeros = (3 * len) / 4; - final int zeros = len - nonZeros; - final long[] arr = new long[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - println("Generated Numbers:"); - println(printArr(arr)); - shuffle(arr); - println("Randomized Ordering:"); - println(printArr(arr)); - final int pivot = len / 2; - println("select(...):"); - println("ArrSize : " + len); - println("NonZeros: " + nonZeros); - println("Zeros : " + zeros); - println("Choose pivot at 1/2 array size, pivot: " + pivot); - final long ret = select(arr, 0, len - 1, pivot); - println("Return value of 0-based pivot including zeros:"); - println("select(arr, 0, " + (len - 1) + ", " + pivot + ") => " + ret); - println("0-based index of pivot = pivot = " + (pivot)); - println("Result Array:" + LS); - println(printArr(arr)); - } - - //For console testing - static void test2() { - final int len = 16; - final int nonZeros = (3 * len) / 4; - final int zeros = len - nonZeros; - final long[] arr = new long[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - println("Generated Numbers:"); - println(printArr(arr)); - shuffle(arr); - println("Randomized Ordering:"); - println(printArr(arr)); - final int pivot = len / 2; //= 8 - println("selectDiscountingZeros(...):"); - println("ArrSize : " + len); - println("NonZeros: " + nonZeros); - println("Zeros : " + zeros); - println("Choose pivot at 1/2 array size, pivot= " + pivot); - final long ret = selectExcludingZeros(arr, nonZeros, pivot); - println("Return value of 1-based pivot discounting zeros:"); - println("selectDiscountingZeros(arr, " + nonZeros + ", " + pivot + ") => " + ret); - println("0-based index of pivot= pivot+zeros-1 = " + ((pivot + zeros) - 1)); - println("Result Array:" + LS); - println(printArr(arr)); - } - - //For console testing - static void test3() { - final int len = 16; - final int nonZeros = (3 * len) / 4; - final int zeros = len - nonZeros; - final long[] arr = new long[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - println("Generated Numbers:"); - println(printArr(arr)); - shuffle(arr); - println("Randomized Ordering:"); - println(printArr(arr)); - final int pivot = len / 2; //= 8 - println("selectIncludingZeros(...):"); - println("ArrSize : " + len); - println("NonZeros: " + nonZeros); - println("Zeros : " + zeros); - println("Choose pivot at 1/2 array size, pivot= " + pivot); - final long ret = selectIncludingZeros(arr, pivot); - println("Return value of 1-based pivot including zeros:"); - println("selectIncludingZeros(arr, " + pivot + ") => " + ret); - println("0-based index of pivot= pivot-1 = " + (pivot - 1)); - println("Result Array:" + LS); - println(printArr(arr)); - } - - static void testDbl1() { - final int len = 16; - final int nonZeros = (3 * len) / 4; - final int zeros = len - nonZeros; - final double[] arr = new double[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - println("Generated Numbers:"); - println(printArr(arr)); - shuffle(arr); - println("Randomized Ordering:"); - println(printArr(arr)); - final int pivot = len / 2; - println("select(...):"); - println("ArrSize : " + len); - println("NonZeros: " + nonZeros); - println("Zeros : " + zeros); - println("Choose pivot at 1/2 array size, pivot: " + pivot); - final double ret = select(arr, 0, len - 1, pivot); - println("Return value of 0-based pivot including zeros:"); - println("select(arr, 0, " + (len - 1) + ", " + pivot + ") => " + ret); - println("0-based index of pivot = pivot = " + (pivot)); - println("Result Array:" + LS); - println(printArr(arr)); - } - - //For console testing - static void testDbl2() { - final int len = 16; - final int nonZeros = (3 * len) / 4; - final int zeros = len - nonZeros; - final double[] arr = new double[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - println("Generated Numbers:"); - println(printArr(arr)); - shuffle(arr); - println("Randomized Ordering:"); - println(printArr(arr)); - final int pivot = len / 2; //= 8 - println("selectDiscountingZeros(...):"); - println("ArrSize : " + len); - println("NonZeros: " + nonZeros); - println("Zeros : " + zeros); - println("Choose pivot at 1/2 array size, pivot= " + pivot); - final double ret = selectExcludingZeros(arr, nonZeros, pivot); - println("Return value of 1-based pivot discounting zeros:"); - println("selectDiscountingZeros(arr, " + nonZeros + ", " + pivot + ") => " + ret); - println("0-based index of pivot= pivot+zeros-1 = " + ((pivot + zeros) - 1)); - println("Result Array:" + LS); - println(printArr(arr)); - } - - //For console testing - static void testDbl3() { - final int len = 16; - final int nonZeros = (3 * len) / 4; - final int zeros = len - nonZeros; - final double[] arr = new double[len]; - for (int i = 0; i < nonZeros; i++ ) { - arr[i] = i + 1; - } - println("Generated Numbers:"); - println(printArr(arr)); - shuffle(arr); - println("Randomized Ordering:"); - println(printArr(arr)); - final int pivot = len / 2; //= 8 - println("selectIncludingZeros(...):"); - println("ArrSize : " + len); - println("NonZeros: " + nonZeros); - println("Zeros : " + zeros); - println("Choose pivot at 1/2 array size, pivot= " + pivot); - final double ret = selectIncludingZeros(arr, pivot); - println("Return value of 1-based pivot including zeros:"); - println("selectIncludingZeros(arr, " + pivot + ") => " + ret); - println("0-based index of pivot= pivot-1 = " + (pivot - 1)); - println("Result Array:" + LS); - println(printArr(arr)); - } - - // public static void main(String[] args) { - // println(LS+"==LONGS 1=========="+LS); - // test1(); - // println(LS+"==LONGS 2=========="+LS); - // test2(); - // println(LS+"==LONGS 3=========="+LS); - // test3(); - // println(LS+"==DOUBLES 1========"+LS); - // testDbl1(); - // println(LS+"==DOUBLES 2========"+LS); - // testDbl2(); - // println(LS+"==DOUBLES 3========"+LS); - // testDbl3(); - // - // - // QuickSelectTest qst = new QuickSelectTest(); - // qst.checkQuickSelect0Based(); - // qst.checkQuickSelect1BasedExcludingZeros(); - // qst.checkQuickSelect1BasedExcludingZeros2(); - // qst.checkQuickSelect1BasedIncludingZeros(); - // qst.checkQuickSelectDbl0Based(); - // qst.checkQuickSelectDbl1BasedExcludingZeros(); - // qst.checkQuickSelectDbl1BasedExcludingZeros2(); - // qst.checkQuickSelectDbl1BasedIncludingZeros(); - // - // } - - @Test - public void printlnTest() { - println("PRINTING: " + this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //disable here - } - - /** - * @param d value to print - */ - static void println(final double d) { - //System.out.println(d); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java b/src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java deleted file mode 100644 index 0943e29d7..000000000 --- a/src/test/java/org/apache/datasketches/thetacommon2/ThetaUtilTest.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.thetacommon2; - -import org.apache.datasketches.quantilescommon.QuantilesUtil; -import org.testng.Assert; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class ThetaUtilTest { - - @Test - public void checkStartingSubMultiple() { - Assert.assertEquals(ThetaUtil.startingSubMultiple(8, 3, 4), 5); - Assert.assertEquals(ThetaUtil.startingSubMultiple(7, 3, 4), 4); - Assert.assertEquals(ThetaUtil.startingSubMultiple(6, 3, 4), 6); - } - - @Test(expectedExceptions = NullPointerException.class) - public void checkValidateValuesNullException() { - QuantilesUtil.checkDoublesSplitPointsOrder(null); - } - -} - diff --git a/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java index 045777c19..a093d4feb 100644 --- a/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java +++ b/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java @@ -19,12 +19,19 @@ package org.apache.datasketches.tuple; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.CompactSketch; +import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Sketches; +import org.apache.datasketches.tuple.TupleSketchIterator; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.adouble.DoubleSummary; -import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.Assert; import org.testng.annotations.Test; @@ -124,7 +131,7 @@ public void serializeDeserializeSmallExact() { us.update("c", 1.0); CompactSketch sketch1 = us.compact(); Sketch sketch2 = - Sketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), + Sketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray()), new DoubleSummaryDeserializer()); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); @@ -158,7 +165,7 @@ public void serializeDeserializeEstimation() throws Exception { //TestUtil.writeBytesToFile(bytes, "CompactSketchWithDoubleSummary4K.sk"); Sketch sketch2 = - Sketches.heapifySketch(Memory.wrap(bytes), new DoubleSummaryDeserializer()); + Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer()); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); @@ -180,7 +187,7 @@ public void deserializeWrongType() { us.update(i, 1.0); } CompactSketch sketch1 = us.compact(); - Sketches.heapifyUpdatableSketch(Memory.wrap(sketch1.toByteArray()), + Sketches.heapifyUpdatableSketch(MemorySegment.ofArray(sketch1.toByteArray()), new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); } diff --git a/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java b/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java index e476573d3..68c74b0a5 100644 --- a/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java +++ b/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java @@ -19,8 +19,13 @@ package org.apache.datasketches.tuple; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ByteArrayUtil; -import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.DeserializeResult; +import org.apache.datasketches.tuple.UpdatableSummary; /** * Summary for generic tuple sketches of type Integer. @@ -67,12 +72,12 @@ public byte[] toByteArray() { /** * Creates an instance of the IntegerSummary given a serialized representation - * @param mem Memory object with serialized IntegerSummary + * @param seg MemorySegment object with serialized IntegerSummary * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes - * read from the Memory + * read from the MemorySegment */ - public static DeserializeResult fromMemory(final Memory mem) { - return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX)), SERIALIZED_SIZE_BYTES); + public static DeserializeResult fromMemorySegment(final MemorySegment seg) { + return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX)), SERIALIZED_SIZE_BYTES); } } diff --git a/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java index 33c0e34f8..56a222758 100644 --- a/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java +++ b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java @@ -19,13 +19,16 @@ package org.apache.datasketches.tuple; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.tuple.DeserializeResult; +import org.apache.datasketches.tuple.SummaryDeserializer; public class IntegerSummaryDeserializer implements SummaryDeserializer { @Override - public DeserializeResult heapifySummary(final Memory mem) { - return IntegerSummary.fromMemory(mem); + public DeserializeResult heapifySummary(final MemorySegment seg) { + return IntegerSummary.fromMemorySegment(seg); } } diff --git a/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java index cc21ffffd..3a61a4aee 100644 --- a/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java +++ b/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java @@ -19,6 +19,8 @@ package org.apache.datasketches.tuple; +import org.apache.datasketches.tuple.SummaryFactory; + /** * Factory for IntegerSummary. */ diff --git a/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java index d3c9a1bd0..1553596c9 100644 --- a/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java +++ b/src/test/java/org/apache/datasketches/tuple/JaccardSimilarityTest.java @@ -19,13 +19,15 @@ package org.apache.datasketches.tuple; +import org.apache.datasketches.theta.UpdateSketch; +import org.apache.datasketches.theta.UpdateSketchBuilder; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.adouble.DoubleSummary; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations; import org.testng.annotations.Test; -import org.apache.datasketches.theta.UpdateSketch; -import org.apache.datasketches.theta.UpdateSketchBuilder; import static org.apache.datasketches.tuple.JaccardSimilarity.dissimilarityTest; import static org.apache.datasketches.tuple.JaccardSimilarity.exactlyEqual; import static org.apache.datasketches.tuple.JaccardSimilarity.jaccard; @@ -450,8 +452,8 @@ public void printlnTest() { /** * @param s value to print */ - static void println(String s) { - //System.out.println(s); //disable here + static void println(Object o) { + //System.out.println(o.toString()); //disable here } } diff --git a/src/test/java/org/apache/datasketches/tuple/MiscTest.java b/src/test/java/org/apache/datasketches/tuple/MiscTest.java index 23ff08b2d..e4eb97064 100644 --- a/src/test/java/org/apache/datasketches/tuple/MiscTest.java +++ b/src/test/java/org/apache/datasketches/tuple/MiscTest.java @@ -23,9 +23,12 @@ import static org.testng.Assert.assertTrue; import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; +import org.apache.datasketches.tuple.Util; import org.apache.datasketches.tuple.adouble.DoubleSummary; -import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/tuple/ReadOnlyMemoryTest.java b/src/test/java/org/apache/datasketches/tuple/ReadOnlyMemorySegmentTest.java similarity index 92% rename from src/test/java/org/apache/datasketches/tuple/ReadOnlyMemoryTest.java rename to src/test/java/org/apache/datasketches/tuple/ReadOnlyMemorySegmentTest.java index 3da256d6d..9e6d4b231 100644 --- a/src/test/java/org/apache/datasketches/tuple/ReadOnlyMemoryTest.java +++ b/src/test/java/org/apache/datasketches/tuple/ReadOnlyMemorySegmentTest.java @@ -19,8 +19,9 @@ package org.apache.datasketches.tuple; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches; @@ -30,14 +31,14 @@ import org.testng.Assert; import org.testng.annotations.Test; -public class ReadOnlyMemoryTest { +public class ReadOnlyMemorySegmentTest { @Test public void wrapAndTryUpdatingSketch() { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1}); final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) - ArrayOfDoublesSketches.wrapSketch(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertEquals(sketch2.getEstimate(), 1.0); sketch2.toByteArray(); boolean thrown = false; @@ -60,7 +61,7 @@ public void heapifyAndUpdateSketch() { sketch1.update(1, new double[] {1}); // downcasting is not recommended, for testing only final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) - ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); sketch2.update(2, new double[] {1}); Assert.assertEquals(sketch2.getEstimate(), 2.0); } @@ -75,7 +76,7 @@ public void wrapAndTryUpdatingUnionEstimationMode() { } final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union1.union(sketch1); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(MemorySegment.ofArray(union1.toByteArray()).asReadOnly()); final ArrayOfDoublesSketch resultSketch = union2.getResult(); Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); @@ -105,7 +106,7 @@ public void heapifyAndUpdateUnion() { } final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union1.union(sketch1); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(MemorySegment.ofArray(union1.toByteArray())); final ArrayOfDoublesSketch resultSketch = union2.getResult(); Assert.assertTrue(resultSketch.isEstimationMode()); Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); diff --git a/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java index 5a896ba86..92a8599b7 100644 --- a/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java +++ b/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java @@ -19,9 +19,13 @@ package org.apache.datasketches.tuple; +import static org.apache.datasketches.common.Util.computeSeedHash; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.SerializerDeserializer; import org.testng.Assert; import org.testng.annotations.Test; @@ -31,22 +35,16 @@ public class SerializerDeserializerTest { public void validSketchType() { byte[] bytes = new byte[4]; bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal(); - Assert.assertEquals(SerializerDeserializer.getSketchType(Memory.wrap(bytes)), SerializerDeserializer.SketchType.CompactSketch); + Assert.assertEquals(SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)), SerializerDeserializer.SketchType.CompactSketch); } @Test(expectedExceptions = SketchesArgumentException.class) public void invalidSketchType() { byte[] bytes = new byte[4]; bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = 33; - SerializerDeserializer.getSketchType(Memory.wrap(bytes)); + SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)); } -// @Test(expectedExceptions = SketchesArgumentException.class) -// public void deserializeFromMemoryUsupportedClass() { -// Memory mem = null; -// SerializerDeserializer.deserializeFromMemory(mem, 0, "bogus"); -// } - @Test(expectedExceptions = SketchesArgumentException.class) public void validateFamilyNotTuple() { SerializerDeserializer.validateFamily((byte) 1, (byte) 0); @@ -59,6 +57,6 @@ public void validateFamilyWrongPreambleLength() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkBadSeedHash() { - org.apache.datasketches.common.Util.computeSeedHash(50541); + computeSeedHash(50541); } } diff --git a/src/test/java/org/apache/datasketches/tuple/TupleCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple/TupleCrossLanguageTest.java index e6ea1e79f..f20cb99dc 100644 --- a/src/test/java/org/apache/datasketches/tuple/TupleCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/tuple/TupleCrossLanguageTest.java @@ -27,13 +27,17 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; import java.io.IOException; import java.nio.file.Files; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.TestUtil; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Sketches; +import org.apache.datasketches.tuple.TupleSketchIterator; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.adouble.DoubleSummary; import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer; import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion; @@ -45,7 +49,7 @@ public class TupleCrossLanguageTest { @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) public void serialVersion1Compatibility() { final byte[] byteArr = TestUtil.getResourceBytes("CompactSketchWithDoubleSummary4K_serialVersion1.sk"); - Sketch sketch = Sketches.heapifySketch(Memory.wrap(byteArr), new DoubleSummaryDeserializer()); + Sketch sketch = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new DoubleSummaryDeserializer()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch.getRetainedEntries(), 4096); @@ -61,7 +65,7 @@ public void serialVersion1Compatibility() { @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) public void version2Compatibility() { final byte[] byteArr = TestUtil.getResourceBytes("TupleWithTestIntegerSummary4kTrimmedSerVer2.sk"); - Sketch sketch1 = Sketches.heapifySketch(Memory.wrap(byteArr), new IntegerSummaryDeserializer()); + Sketch sketch1 = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new IntegerSummaryDeserializer()); // construct the same way final int lgK = 12; @@ -87,7 +91,7 @@ public void deserializeFromCppIntegerSummary() throws IOException { for (int n: nArr) { final byte[] bytes = Files.readAllBytes(cppPath.resolve("tuple_int_n" + n + "_cpp.sk")); final Sketch sketch = - Sketches.heapifySketch(Memory.wrap(bytes), new IntegerSummaryDeserializer()); + Sketches.heapifySketch(MemorySegment.ofArray(bytes), new IntegerSummaryDeserializer()); assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); assertTrue(n > 1000 ? sketch.isEstimationMode() : !sketch.isEstimationMode()); assertEquals(sketch.getEstimate(), n, n * 0.03); @@ -105,7 +109,9 @@ public void generateForCppIntegerSummary() throws IOException { for (int n: nArr) { final UpdatableSketch sk = new UpdatableSketchBuilder<>(new IntegerSummaryFactory()).build(); - for (int i = 0; i < n; i++) sk.update(i, i); + for (int i = 0; i < n; i++) { + sk.update(i, i); + } Files.newOutputStream(javaPath.resolve("tuple_int_n" + n + "_java.sk")).write(sk.compact().toByteArray()); } } @@ -113,13 +119,13 @@ public void generateForCppIntegerSummary() throws IOException { @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES}) public void noSupportHeapifyV0_9_1() throws Exception { final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk"); - ArrayOfDoublesUnion.heapify(Memory.wrap(byteArr)); + ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(byteArr)); } @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES}) public void noSupportWrapV0_9_1() throws Exception { final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk"); - ArrayOfDoublesUnion.wrap(WritableMemory.writableWrap(byteArr)); + ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(byteArr)); } } diff --git a/src/test/java/org/apache/datasketches/tuple/TupleExamples2Test.java b/src/test/java/org/apache/datasketches/tuple/TupleExamples2Test.java index 802fe1404..c26f6f394 100644 --- a/src/test/java/org/apache/datasketches/tuple/TupleExamples2Test.java +++ b/src/test/java/org/apache/datasketches/tuple/TupleExamples2Test.java @@ -23,10 +23,16 @@ import org.apache.datasketches.theta.UpdateSketch; import org.apache.datasketches.theta.UpdateSketchBuilder; +import org.apache.datasketches.tuple.CompactSketch; +import org.apache.datasketches.tuple.Intersection; +import org.apache.datasketches.tuple.TupleSketchIterator; +import org.apache.datasketches.tuple.Union; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.adouble.DoubleSummary; -import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations; +import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java b/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java index ace6c5ff4..f6941e334 100644 --- a/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java +++ b/src/test/java/org/apache/datasketches/tuple/TupleExamplesTest.java @@ -23,10 +23,16 @@ import org.apache.datasketches.theta.UpdateSketch; import org.apache.datasketches.theta.UpdateSketchBuilder; +import org.apache.datasketches.tuple.CompactSketch; +import org.apache.datasketches.tuple.Intersection; +import org.apache.datasketches.tuple.TupleSketchIterator; +import org.apache.datasketches.tuple.Union; +import org.apache.datasketches.tuple.UpdatableSketch; +import org.apache.datasketches.tuple.UpdatableSketchBuilder; import org.apache.datasketches.tuple.aninteger.IntegerSummary; -import org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode; import org.apache.datasketches.tuple.aninteger.IntegerSummaryFactory; import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations; +import org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java index 7f20a3f90..a878e50dc 100644 --- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java @@ -32,6 +32,8 @@ import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.UpdatableSketchBuilder; +import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java index 4950eb332..b7ee014d6 100644 --- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java @@ -28,10 +28,13 @@ import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; import org.apache.datasketches.tuple.Sketch; -import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.Sketches; +import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.UpdatableSketchBuilder; +import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations; import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java index a1cfc1a7d..d66f2a044 100644 --- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleTest.java @@ -21,16 +21,20 @@ import static org.testng.Assert.assertEquals; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.Sketch; import org.apache.datasketches.tuple.Sketches; import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.UpdatableSketchBuilder; +import org.apache.datasketches.tuple.adouble.DoubleSketch; +import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer; +import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.Assert; import org.testng.annotations.Test; @@ -75,8 +79,8 @@ public void serDeTest() { a1Sk.update(key, 1.0); } final double est1 = a1Sk.getEstimate(); - final Memory mem = Memory.wrap(a1Sk.toByteArray()); - final DoubleSketch a1Sk2 = new DoubleSketch(mem, Mode.AlwaysOne); + final MemorySegment seg = MemorySegment.ofArray(a1Sk.toByteArray()); + final DoubleSketch a1Sk2 = new DoubleSketch(seg, Mode.AlwaysOne); final double est2 = a1Sk2.getEstimate(); assertEquals(est1, est2); } @@ -331,7 +335,7 @@ public void serializeDeserializeExact() throws Exception { sketch1.update(1, 1.0); final UpdatableSketch sketch2 = Sketches.heapifyUpdatableSketch( - Memory.wrap(sketch1.toByteArray()), + MemorySegment.ofArray(sketch1.toByteArray()), new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); Assert.assertEquals(sketch2.getEstimate(), 1.0); @@ -366,7 +370,7 @@ public void serializeDeserializeEstimationNoResizing() throws Exception { //TestUtil.writeBytesToFile(bytes, "UpdatableSketchWithDoubleSummary4K.sk"); final Sketch sketch2 = - Sketches.heapifySketch(Memory.wrap(bytes), new DoubleSummaryDeserializer()); + Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); @@ -391,7 +395,7 @@ public void serializeDeserializeSampling() throws Exception { sketch1.update(i, 1.0); } final Sketch sketch2 = Sketches.heapifySketch( - Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer()); + MemorySegment.ofArray(sketch1.toByteArray()), new DoubleSummaryDeserializer()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleUnionTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleUnionTest.java index dcacf8eed..c9a3d5887 100644 --- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleUnionTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleUnionTest.java @@ -29,6 +29,9 @@ import org.apache.datasketches.tuple.Union; import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.UpdatableSketchBuilder; +import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; +import org.apache.datasketches.tuple.adouble.DoubleSummarySetOperations; import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java index be7aa264b..5e4f24d8e 100644 --- a/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java +++ b/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java @@ -26,6 +26,8 @@ import org.apache.datasketches.tuple.Sketches; import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.UpdatableSketchBuilder; +import org.apache.datasketches.tuple.adouble.DoubleSummary; +import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory; import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java index e05574595..0df275001 100644 --- a/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/CornerCaseTupleSetOperationsTest.java @@ -30,6 +30,9 @@ import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; import org.apache.datasketches.tuple.Union; +import org.apache.datasketches.tuple.aninteger.IntegerSketch; +import org.apache.datasketches.tuple.aninteger.IntegerSummary; +import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations; import org.testng.annotations.Test; public class CornerCaseTupleSetOperationsTest { diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java index 871822e5b..c467fb7af 100644 --- a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java @@ -28,6 +28,9 @@ import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.Union; +import org.apache.datasketches.tuple.aninteger.IntegerSketch; +import org.apache.datasketches.tuple.aninteger.IntegerSummary; +import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java index 1d3a5dc7d..951129425 100644 --- a/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java @@ -21,10 +21,14 @@ import static org.testng.Assert.assertEquals; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.tuple.AnotB; import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; +import org.apache.datasketches.tuple.aninteger.IntegerSketch; +import org.apache.datasketches.tuple.aninteger.IntegerSummary; +import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations; import org.testng.annotations.Test; /** @@ -44,8 +48,8 @@ public void serDeTest() { a1Sk.update(i, 1); } final double est1 = a1Sk.getEstimate(); - final Memory mem = Memory.wrap(a1Sk.toByteArray()); - final IntegerSketch a1Sk2 = new IntegerSketch(mem, a1Mode); + final MemorySegment seg = MemorySegment.ofArray(a1Sk.toByteArray()); + final IntegerSketch a1Sk2 = new IntegerSketch(seg, a1Mode); final double est2 = a1Sk2.getEstimate(); assertEquals(est1, est2); } diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/MikhailsBugTupleTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/MikhailsBugTupleTest.java index 1e5aad3d2..0809e972d 100644 --- a/src/test/java/org/apache/datasketches/tuple/aninteger/MikhailsBugTupleTest.java +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/MikhailsBugTupleTest.java @@ -22,6 +22,9 @@ import org.apache.datasketches.tuple.AnotB; import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; +import org.apache.datasketches.tuple.aninteger.IntegerSketch; +import org.apache.datasketches.tuple.aninteger.IntegerSummary; +import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/ParameterLeakageTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/ParameterLeakageTest.java index 3411cea4f..51eee988b 100644 --- a/src/test/java/org/apache/datasketches/tuple/aninteger/ParameterLeakageTest.java +++ b/src/test/java/org/apache/datasketches/tuple/aninteger/ParameterLeakageTest.java @@ -25,10 +25,12 @@ import org.apache.datasketches.tuple.AnotB; import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; -//import org.apache.datasketches.tuple.UpdatableSketch; import org.apache.datasketches.tuple.Sketch; import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.Union; +import org.apache.datasketches.tuple.aninteger.IntegerSketch; +import org.apache.datasketches.tuple.aninteger.IntegerSummary; +import org.apache.datasketches.tuple.aninteger.IntegerSummarySetOperations; import org.testng.annotations.Test; /** diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/AodSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/AodSketchCrossLanguageTest.java index e3b0db819..faca658d8 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/AodSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/AodSketchCrossLanguageTest.java @@ -27,10 +27,14 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; import java.io.IOException; import java.nio.file.Files; -import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.annotations.Test; /** @@ -44,7 +48,9 @@ public void generateBinariesForCompatibilityTestingOneValue() throws IOException final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; for (int n: nArr) { final ArrayOfDoublesUpdatableSketch sk = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < n; i++) sk.update(i, new double[] {i}); + for (int i = 0; i < n; i++) { + sk.update(i, new double[] {i}); + } Files.newOutputStream(javaPath.resolve("aod_1_n" + n + "_java.sk")).write(sk.compact().toByteArray()); } } @@ -54,7 +60,9 @@ public void generateBinariesForCompatibilityTestingThreeValues() throws IOExcept final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; for (int n: nArr) { final ArrayOfDoublesUpdatableSketch sk = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(3).build(); - for (int i = 0; i < n; i++) sk.update(i, new double[] {i, i, i}); + for (int i = 0; i < n; i++) { + sk.update(i, new double[] {i, i, i}); + } Files.newOutputStream(javaPath.resolve("aod_3_n" + n + "_java.sk")).write(sk.compact().toByteArray()); } } @@ -74,7 +82,7 @@ public void deserializeFromCppOneValue() throws IOException { final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (int n: nArr) { final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_1_n" + n + "_cpp.sk")); - final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(Memory.wrap(bytes)); + final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); assertEquals(sketch.getEstimate(), n, n * 0.03); assertEquals(sketch.getNumValues(), 1); @@ -90,7 +98,7 @@ public void deserializeFromCppThreeValues() throws IOException { final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (int n: nArr) { final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_3_n" + n + "_cpp.sk")); - final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(Memory.wrap(bytes)); + final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); assertEquals(sketch.getEstimate(), n, n * 0.03); assertEquals(sketch.getNumValues(), 3); @@ -106,7 +114,7 @@ public void deserializeFromCppThreeValues() throws IOException { @Test(groups = {CHECK_CPP_FILES}) public void deserializeFromCppOneValueNonEmptyNoEntries() throws IOException { final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_1_non_empty_no_entries_cpp.sk")); - final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(Memory.wrap(bytes)); + final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); assertFalse(sketch.isEmpty()); assertEquals(sketch.getRetainedEntries(), 0); } diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBTest.java index 571558c92..48f60cdcc 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotBTest.java @@ -21,9 +21,17 @@ import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesAnotB; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -238,7 +246,7 @@ public void estimationMode() { // same operation, but compact sketches and off-heap result aNotB.update(sketchA.compact(), sketchB.compact()); - result = aNotB.getResult(WritableMemory.writableWrap(new byte[1000000])); + result = aNotB.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); @@ -278,7 +286,7 @@ public void estimationModeLargeB() { // same operation, but compact sketches and off-heap result aNotB.update(sketchA.compact(), sketchB.compact()); - result = aNotB.getResult(WritableMemory.writableWrap(new byte[1000000])); + result = aNotB.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getEstimate(), expected, expected * 0.1); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java index f4a7b765d..7bd2ee14b 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java @@ -21,9 +21,17 @@ import static org.testng.Assert.assertEquals; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.DirectArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.DirectArrayOfDoublesQuickSelectSketchR; +import org.apache.datasketches.tuple.arrayofdoubles.HeapArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.HeapArrayOfDoublesQuickSelectSketch; import org.testng.Assert; import org.testng.annotations.Test; @@ -41,8 +49,8 @@ public void heapToDirectExactTwoDoubles() { sketch1.update("c", new double[] {1, 2}); sketch1.update("d", new double[] {1, 2}); ArrayOfDoublesCompactSketch csk = sketch1.compact(); - Memory mem = Memory.wrap(csk.toByteArray()); - ArrayOfDoublesSketch sketch2 = new DirectArrayOfDoublesCompactSketch(mem); + MemorySegment seg = MemorySegment.ofArray(csk.toByteArray()); + ArrayOfDoublesSketch sketch2 = new DirectArrayOfDoublesCompactSketch(seg); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 4.0); @@ -62,7 +70,7 @@ public void heapToDirectExactTwoDoubles() { @Test public void directToHeapExactTwoDoubles() { ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(MemorySegment.ofArray(new byte[1000000])); sketch1.update("a", new double[] {1, 2}); sketch1.update("b", new double[] {1, 2}); sketch1.update("c", new double[] {1, 2}); @@ -73,7 +81,7 @@ public void directToHeapExactTwoDoubles() { sketch1.update("d", new double[] {1, 2}); ArrayOfDoublesSketch sketch2 = new HeapArrayOfDoublesCompactSketch( - Memory.wrap(sketch1.compact(WritableMemory.writableWrap(new byte[1000000])).toByteArray())); + MemorySegment.ofArray(sketch1.compact(MemorySegment.ofArray(new byte[1000000])).toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 4.0); @@ -113,10 +121,10 @@ public void checkGetValuesAndKeysMethods() { assertEquals(values1d2, values1d); assertEquals(keys2, keys); - Memory hqsskMem = Memory.wrap(hqssk.toByteArray()); + MemorySegment hqsskSeg = MemorySegment.ofArray(hqssk.toByteArray()); DirectArrayOfDoublesQuickSelectSketchR dqssk = - (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskMem, Util.DEFAULT_UPDATE_SEED); + (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskSeg, Util.DEFAULT_UPDATE_SEED); final double[][] values3 = dqssk.getValues(); final double[] values1d3 = dqssk.getValuesAsOneDimension(); final long[] keys3 = dqssk.getKeys(); @@ -124,10 +132,10 @@ public void checkGetValuesAndKeysMethods() { assertEquals(values1d3, values1d); assertEquals(keys3, keys); - Memory hcskMem = Memory.wrap(hcsk.toByteArray()); + MemorySegment hcskSeg = MemorySegment.ofArray(hcsk.toByteArray()); DirectArrayOfDoublesCompactSketch dcsk2 = - (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskMem, Util.DEFAULT_UPDATE_SEED); + (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskSeg, Util.DEFAULT_UPDATE_SEED); final double[][] values4 = dqssk.getValues(); final double[] values1d4 = dqssk.getValuesAsOneDimension(); final long[] keys4 = dqssk.getKeys(); diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersectionTest.java index 164557b98..6a54adbdd 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersectionTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesIntersectionTest.java @@ -21,8 +21,15 @@ import static org.testng.Assert.fail; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCombiner; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesIntersection; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -105,19 +112,19 @@ public void heapExactWithEmpty() { @Test public void directExactWithEmpty() { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder() - .build(WritableMemory.writableWrap(new byte[1000000])); + .build(MemorySegment.ofArray(new byte[1000000])); sketch1.update(1, new double[] {1}); sketch1.update(2, new double[] {1}); sketch1.update(3, new double[] {1}); final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder() - .build(WritableMemory.writableWrap(new byte[1000000])); + .build(MemorySegment.ofArray(new byte[1000000])); final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder(). - buildIntersection(WritableMemory.writableWrap(new byte[1000000])); + buildIntersection(MemorySegment.ofArray(new byte[1000000])); intersection.intersect(sketch1, null); intersection.intersect(sketch2, null); - final ArrayOfDoublesCompactSketch result = intersection.getResult(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertTrue(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); @@ -190,22 +197,22 @@ public void heapDisjointEstimationMode() { public void directDisjointEstimationMode() { int key = 0; final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder(). - buildIntersection(WritableMemory.writableWrap(new byte[1000000])); + buildIntersection(MemorySegment.ofArray(new byte[1000000])); intersection.intersect(sketch1, combiner); intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); @@ -245,21 +252,21 @@ public void heapEstimationMode() { @Test public void directEstimationMode() { int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(MemorySegment.ofArray(new byte[1000000])); intersection.intersect(sketch1, combiner); intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java index 46b1a0631..a54c12d20 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java @@ -19,9 +19,12 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesQuickSelectSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -43,7 +46,7 @@ public void heapToDirectExactTwoDoubles() { sketch1.update("d", valuesArr); sketch1.update("a", valuesArr); noopUpdates(sketch1, valuesArr); - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(WritableMemory.writableWrap(sketch1.toByteArray())); + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); sketch2.update("b", valuesArr); sketch2.update("c", valuesArr); sketch2.update("d", valuesArr); @@ -74,7 +77,7 @@ public void heapToDirectWithSeed() { sketch1.update("b", values); sketch1.update("c", values); - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(WritableMemory.writableWrap(sketch1.toByteArray()), seed); + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray()), seed); sketch2.update("b", values); sketch2.update("c", values); sketch2.update("d", values); @@ -94,14 +97,14 @@ public void directToHeapExactTwoDoubles() { double[] valuesArr = {1.0, 2.0}; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). - setNumberOfValues(2).build(WritableMemory.writableWrap(new byte[1000000])); + setNumberOfValues(2).build(MemorySegment.ofArray(new byte[1000000])); sketch1.update("a", valuesArr); sketch1.update("b", valuesArr); sketch1.update("c", valuesArr); sketch1.update("d", valuesArr); sketch1.update("a", valuesArr); noopUpdates(sketch1, valuesArr); - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); sketch2.update("b", valuesArr); sketch2.update("c", valuesArr); sketch2.update("d", valuesArr); @@ -128,12 +131,12 @@ public void directToHeapWithSeed() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build( - WritableMemory.writableWrap(new byte[1000000])); + MemorySegment.ofArray(new byte[1000000])); sketch1.update("a", values); sketch1.update("b", values); sketch1.update("c", values); - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(Memory.wrap(sketch1.toByteArray()), seed); + ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray()), seed); sketch2.update("b", values); sketch2.update("c", values); sketch2.update("d", values); diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnionTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnionTest.java index 82e3936d4..ea507b89c 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnionTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesUnionTest.java @@ -19,11 +19,18 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -58,8 +65,8 @@ public void heapExactMode() { Assert.assertEquals(values[1][0], 3.0); Assert.assertEquals(values[2][0], 3.0); - final WritableMemory wmem = WritableMemory.writableWrap(union.toByteArray()); - final ArrayOfDoublesUnion wrappedUnion = ArrayOfDoublesSketches.wrapUnion(wmem); + final MemorySegment wseg = MemorySegment.ofArray(union.toByteArray()); + final ArrayOfDoublesUnion wrappedUnion = ArrayOfDoublesSketches.wrapUnion(wseg); result = wrappedUnion.getResult(); Assert.assertEquals(result.getEstimate(), 3.0); values = result.getValues(); @@ -181,7 +188,7 @@ public void heapSerializeDeserialize() { union1.union(sketch1); union1.union(sketch2); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(Memory.wrap(union1.toByteArray())); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(union1.toByteArray())); ArrayOfDoublesCompactSketch result = union2.getResult(); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); @@ -218,7 +225,7 @@ public void heapSerializeDeserializeWithSeed() { union1.union(sketch1); union1.union(sketch2); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(Memory.wrap(union1.toByteArray()), seed); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(union1.toByteArray()), seed); final ArrayOfDoublesCompactSketch result = union2.getResult(); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); } @@ -227,25 +234,25 @@ public void heapSerializeDeserializeWithSeed() { public void directSerializeDeserialize() { int key = 0; final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build( - WritableMemory.writableWrap(new byte[1000000])); + MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build( - WritableMemory.writableWrap(new byte[1000000])); + MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion( - WritableMemory.writableWrap(new byte[1000000])); + MemorySegment.ofArray(new byte[1000000])); union1.union(sketch1); union1.union(sketch2); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(WritableMemory.writableWrap(union1.toByteArray())); - ArrayOfDoublesCompactSketch result = union2.getResult(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(union1.toByteArray())); + ArrayOfDoublesCompactSketch result = union2.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); union2.reset(); @@ -267,39 +274,39 @@ public void directSerializeDeserializeWithSeed() { final long seed = 1; int key = 0; final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) - .build(WritableMemory.writableWrap(new byte[1000000])); + .build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) - .build(WritableMemory.writableWrap(new byte[1000000])); + .build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed) - .buildUnion(WritableMemory.writableWrap(new byte[1000000])); + .buildUnion(MemorySegment.ofArray(new byte[1000000])); union1.union(sketch1); union1.union(sketch2); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(WritableMemory.writableWrap(union1.toByteArray()), seed); - final ArrayOfDoublesCompactSketch result = union2.getResult(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(union1.toByteArray()), seed); + final ArrayOfDoublesCompactSketch result = union2.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); } @Test public void directExactMode() { final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); sketch1.update(1, new double[] {1.0}); sketch1.update(1, new double[] {1.0}); sketch1.update(1, new double[] {1.0}); sketch1.update(2, new double[] {1.0}); final ArrayOfDoublesUpdatableSketch sketch2 = - new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); sketch2.update(2, new double[] {1.0}); sketch2.update(2, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); @@ -307,10 +314,10 @@ public void directExactMode() { sketch2.update(3, new double[] {1.0}); final ArrayOfDoublesUnion union = - new ArrayOfDoublesSetOperationBuilder().buildUnion(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); union.union(sketch1); union.union(sketch2); - ArrayOfDoublesCompactSketch result = union.getResult(WritableMemory.writableWrap(new byte[1000000])); + ArrayOfDoublesCompactSketch result = union.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertEquals(result.getEstimate(), 3.0); final double[][] values = result.getValues(); Assert.assertEquals(values[0][0], 3.0); @@ -331,23 +338,23 @@ public void directExactMode() { public void directEstimationMode() { int key = 0; final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries final ArrayOfDoublesUpdatableSketch sketch2 = - new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } final ArrayOfDoublesUnion union = - new ArrayOfDoublesSetOperationBuilder().buildUnion(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); union.union(sketch1); union.union(sketch2); - ArrayOfDoublesCompactSketch result = union.getResult(WritableMemory.writableWrap(new byte[1000000])); + ArrayOfDoublesCompactSketch result = union.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); union.reset(); @@ -379,10 +386,10 @@ public void heapToDirect() { heapUnion.union(sketch1); final ArrayOfDoublesUnion directUnion = - ArrayOfDoublesUnion.wrap(WritableMemory.writableWrap(heapUnion.toByteArray())); - directUnion.union(sketch2); + ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(heapUnion.toByteArray())); + directUnion.union(sketch2); //throws - final ArrayOfDoublesCompactSketch result = directUnion.getResult(WritableMemory.writableWrap(new byte[1000000])); + final ArrayOfDoublesCompactSketch result = directUnion.getResult(MemorySegment.ofArray(new byte[1000000])); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getEstimate(), 3.0); final double[][] values = result.getValues(); @@ -408,10 +415,10 @@ public void directToHeap() { sketch2.update(3, new double[] {1.0}); final ArrayOfDoublesUnion directUnion = - new ArrayOfDoublesSetOperationBuilder().buildUnion(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); directUnion.union(sketch1); - final ArrayOfDoublesUnion heapUnion = ArrayOfDoublesUnion.heapify(Memory.wrap(directUnion.toByteArray())); + final ArrayOfDoublesUnion heapUnion = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(directUnion.toByteArray())); heapUnion.union(sketch2); final ArrayOfDoublesCompactSketch result = heapUnion.getResult(); @@ -448,8 +455,8 @@ public void incompatibleInputSketchMoreValues() { @Test public void directDruidUsageOneSketch() { - final WritableMemory mem = WritableMemory.writableWrap(new byte[1_000_000]); - new ArrayOfDoublesSetOperationBuilder().buildUnion(mem); // just set up memory to wrap later + final MemorySegment seg = MemorySegment.ofArray(new byte[1_000_000]); + new ArrayOfDoublesSetOperationBuilder().buildUnion(seg); // just set up MemorySegment to wrap later final int n = 100_000; // estimation mode final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); @@ -458,22 +465,22 @@ public void directDruidUsageOneSketch() { } sketch.trim(); // pretend this is a result from a union - // as Druid wraps memory - WritableMemory mem2 = WritableMemory.writableWrap(new byte[1_000_000]); - ArrayOfDoublesCompactSketch dcsk = sketch.compact(mem2); - ArrayOfDoublesUnion union = ArrayOfDoublesSketches.wrapUnion(mem); //empty union + // as Druid wraps MemorySegment + MemorySegment seg2 = MemorySegment.ofArray(new byte[1_000_000]); + ArrayOfDoublesCompactSketch dcsk = sketch.compact(seg2); + ArrayOfDoublesUnion union = ArrayOfDoublesSketches.wrapUnion(seg); //empty union union.union(dcsk); - //ArrayOfDoublesSketches.wrapUnion(mem).union(sketch.compact(WritableMemory.writableWrap(new byte[1_000_000]))); + //ArrayOfDoublesSketches.wrapUnion(seg).union(sketch.compact(MemorySegment.ofArray(new byte[1_000_000]))); - final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(mem).getResult(); + final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(seg).getResult(); Assert.assertEquals(result.getEstimate(), sketch.getEstimate());//expected [98045.91060164096] but found [4096.0] Assert.assertEquals(result.isEstimationMode(), sketch.isEstimationMode()); } @Test public void directDruidUsageTwoSketches() { - final WritableMemory mem = WritableMemory.writableWrap(new byte[1000000]); - new ArrayOfDoublesSetOperationBuilder().buildUnion(mem); // just set up memory to wrap later + final MemorySegment seg = MemorySegment.ofArray(new byte[1000000]); + new ArrayOfDoublesSetOperationBuilder().buildUnion(seg); // just set up MemorySegment to wrap later int key = 0; @@ -482,16 +489,16 @@ public void directDruidUsageTwoSketches() { for (int i = 0; i < n1; i++) { sketch1.update(key++, new double[] {1.0}); } - // as Druid wraps memory - ArrayOfDoublesSketches.wrapUnion(mem).union(sketch1.compact(WritableMemory.writableWrap(new byte[1000000]))); + // as Druid wraps MemorySegment + ArrayOfDoublesSketches.wrapUnion(seg).union(sketch1.compact(MemorySegment.ofArray(new byte[1000000]))); final int n2 = 1000000; // estimation mode final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < n2; i++) { sketch2.update(key++, new double[] {1.0}); } - // as Druid wraps memory - ArrayOfDoublesSketches.wrapUnion(mem).union(sketch2.compact(WritableMemory.writableWrap(new byte[1000000]))); + // as Druid wraps MemorySegment + ArrayOfDoublesSketches.wrapUnion(seg).union(sketch2.compact(MemorySegment.ofArray(new byte[1000000]))); // build one sketch that must be the same as union key = 0; // reset to have the same keys @@ -502,7 +509,7 @@ public void directDruidUsageTwoSketches() { } expected.trim(); // union result is trimmed, so we need to trim this sketch for valid comparison - final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(mem).getResult(); + final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(seg).getResult(); Assert.assertEquals(result.getEstimate(), expected.getEstimate()); Assert.assertEquals(result.isEstimationMode(), expected.isEstimationMode()); Assert.assertEquals(result.getUpperBound(1), expected.getUpperBound(1)); diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java index 5d647716a..f1ede92e6 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java @@ -24,6 +24,14 @@ import static org.testng.Assert.assertTrue; import org.apache.datasketches.common.Util; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesAnotB; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCombiner; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesIntersection; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.annotations.Test; public class CornerCaseArrayOfDoublesSetOperationsTest { diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java index 34160a41d..4cf9693ca 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java @@ -19,12 +19,16 @@ package org.apache.datasketches.tuple.arrayofdoubles; -import static org.apache.datasketches.common.Util.computeSeedHash; -import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED; +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.WritableMemory; -//import org.apache.datasketches.tuple.Util; +import org.apache.datasketches.common.Util; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -33,8 +37,8 @@ public class DirectArrayOfDoublesCompactSketchTest { @Test public void emptyFromQuickSelectSketch() { ArrayOfDoublesUpdatableSketch us = - new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); - ArrayOfDoublesCompactSketch sketch = us.compact(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesCompactSketch sketch = us.compact(MemorySegment.ofArray(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); @@ -54,14 +58,14 @@ public void emptyFromQuickSelectSketch() { @Test public void exactModeFromQuickSelectSketch() { ArrayOfDoublesUpdatableSketch us = - new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); us.update(1, new double[] {1.0}); us.update(2, new double[] {1.0}); us.update(3, new double[] {1.0}); us.update(1, new double[] {1.0}); us.update(2, new double[] {1.0}); us.update(3, new double[] {1.0}); - ArrayOfDoublesCompactSketch sketch = us.compact(WritableMemory.writableWrap(new byte[1000000])); + ArrayOfDoublesCompactSketch sketch = us.compact(MemorySegment.ofArray(new byte[1000000])); Assert.assertFalse(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 3.0); @@ -70,7 +74,7 @@ public void exactModeFromQuickSelectSketch() { Assert.assertEquals(sketch.getRetainedEntries(), 3); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getSeedHash(), computeSeedHash(DEFAULT_UPDATE_SEED)); + Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); double[][] values = sketch.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { @@ -80,12 +84,12 @@ public void exactModeFromQuickSelectSketch() { @Test public void serializeDeserializeSmallExact() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); us.update("a", new double[] {1.0}); us.update("b", new double[] {1.0}); us.update("c", new double[] {1.0}); - ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.writableWrap(new byte[1000000])); - ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(WritableMemory.writableWrap(sketch1.toByteArray())); + ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 3.0); @@ -103,12 +107,12 @@ public void serializeDeserializeSmallExact() { @Test public void serializeDeserializeEstimation() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } - ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.writableWrap(new byte[1000000])); - ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(WritableMemory.writableWrap(sketch1.toByteArray())); + ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); @@ -117,19 +121,19 @@ public void serializeDeserializeEstimation() { @Test(expectedExceptions = SketchesArgumentException.class) public void deserializeWithWrongSeed() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } - ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.writableWrap(new byte[1000000])); - ArrayOfDoublesSketches.wrapSketch(WritableMemory.writableWrap(sketch1.toByteArray()), 123); + ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); + ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray()), 123); } @Test(expectedExceptions = SketchesArgumentException.class) - public void fromQuickSelectSketchNotEnoughMemory() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.writableWrap(new byte[1000000])); + public void fromQuickSelectSketchNotEnoughMemorySegment() { + ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); us.update(1, new double[] {1.0}); - us.compact(WritableMemory.writableWrap(new byte[39])); + us.compact(MemorySegment.ofArray(new byte[39])); } } diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java index a8e066f4d..781f3893b 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java @@ -19,11 +19,16 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; +import org.apache.datasketches.tuple.arrayofdoubles.DirectArrayOfDoublesQuickSelectSketch; import org.testng.Assert; import org.testng.annotations.Test; @@ -32,7 +37,7 @@ public class DirectArrayOfDoublesQuickSelectSketchTest { public void isEmpty() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); @@ -52,7 +57,7 @@ public void isEmptyWithSampling() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). setSamplingProbability(samplingProbability). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertTrue(((DirectArrayOfDoublesQuickSelectSketch)sketch).isInSamplingMode()); Assert.assertFalse(sketch.isEstimationMode()); @@ -72,7 +77,7 @@ public void sampling() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). setSamplingProbability(samplingProbability). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); sketch.update("a", new double[] {1.0}); Assert.assertFalse(sketch.isEmpty()); Assert.assertTrue(sketch.isEstimationMode()); @@ -88,7 +93,7 @@ public void sampling() { public void exactMode() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertEquals(sketch.getEstimate(), 0.0); for (int i = 0; i < 4096; i++) { @@ -136,7 +141,7 @@ public void exactMode() { public void estimationMode() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[4096 * 2 * 16 + 32])); + build(MemorySegment.ofArray(new byte[4096 * 2 * 16 + 32])); Assert.assertEquals(sketch.getEstimate(), 0.0); for (int i = 1; i <= 8192; i++) { sketch.update(i, new double[] {1.0}); @@ -177,7 +182,7 @@ public void estimationMode() { public void updatesOfAllKeyTypes() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); sketch.update(1L, new double[] {1.0}); sketch.update(2.0, new double[] {1.0}); final byte[] bytes = new byte[] {3, 4}; @@ -194,7 +199,7 @@ public void updatesOfAllKeyTypes() { public void doubleSum() { final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); sketch.update(1, new double[] {1.0}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 1.0); @@ -210,10 +215,10 @@ public void doubleSum() { public void serializeDeserializeExact() throws Exception { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); sketch1.update(1, new double[] {1.0}); - final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(WritableMemory.writableWrap(sketch1.toByteArray())); + final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertEquals(sketch2.getEstimate(), 1.0); final double[][] values = sketch2.getValues(); @@ -232,7 +237,7 @@ public void serializeDeserializeExact() throws Exception { public void serializeDeserializeEstimationNoResize() throws Exception { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setResizeFactor(ResizeFactor.X1). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); for (int j = 0; j < 10; j++) { for (int i = 0; i < 8192; i++) { sketch1.update(i, new double[] {1.0}); @@ -243,7 +248,7 @@ public void serializeDeserializeEstimationNoResize() throws Exception { //for visual testing //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); - final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(WritableMemory.writableWrap(byteArray)); + final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(byteArray)); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); @@ -261,12 +266,12 @@ public void serializeDeserializeSampling() { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(sketchSize).setSamplingProbability(0.5f). - build(WritableMemory.writableWrap(new byte[1000000])); + build(MemorySegment.ofArray(new byte[1000000])); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, new double[] {1.0}); } final ArrayOfDoublesSketch sketch2 = - ArrayOfDoublesSketch.wrap(WritableMemory.writableWrap(sketch1.toByteArray())); + ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); @@ -274,8 +279,8 @@ public void serializeDeserializeSampling() { } @Test(expectedExceptions = SketchesArgumentException.class) - public void notEnoughMemory() { + public void memorySegmentNotLargeEnough() { new ArrayOfDoublesUpdatableSketchBuilder(). - setNominalEntries(32).build(WritableMemory.writableWrap(new byte[1055])); + setNominalEntries(32).build(MemorySegment.ofArray(new byte[1055])); } } diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java index 055914d3a..0583c139f 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java @@ -19,9 +19,15 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesCompactSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketches; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -80,7 +86,7 @@ public void serializeDeserializeSmallExact() { us.update("c", new double[] {1.0}); ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketch sketch2 = - ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 3.0); @@ -102,14 +108,14 @@ public void serializeDeserializeEstimation() { for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } - WritableMemory wmem = WritableMemory.writableWrap(us.toByteArray()); - ArrayOfDoublesUpdatableSketch wrappedUS = ArrayOfDoublesSketches.wrapUpdatableSketch(wmem); + MemorySegment wseg = MemorySegment.ofArray(us.toByteArray()); + ArrayOfDoublesUpdatableSketch wrappedUS = ArrayOfDoublesSketches.wrapUpdatableSketch(wseg); Assert.assertFalse(wrappedUS.isEmpty()); Assert.assertTrue(wrappedUS.isEstimationMode()); Assert.assertEquals(wrappedUS.getEstimate(), us.getEstimate()); Assert.assertEquals(wrappedUS.getThetaLong(), us.getThetaLong()); - ArrayOfDoublesUpdatableSketch heapUS = ArrayOfDoublesSketches.heapifyUpdatableSketch(wmem); + ArrayOfDoublesUpdatableSketch heapUS = ArrayOfDoublesSketches.heapifyUpdatableSketch(wseg); Assert.assertFalse(heapUS.isEmpty()); Assert.assertTrue(heapUS.isEstimationMode()); Assert.assertEquals(heapUS.getEstimate(), us.getEstimate()); @@ -117,7 +123,7 @@ public void serializeDeserializeEstimation() { ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketch sketch2 = - ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); + ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); @@ -131,7 +137,7 @@ public void deserializeWithWrongSeed() { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(); - Memory mem = Memory.wrap(sketch1.toByteArray()); - ArrayOfDoublesSketches.heapifySketch(mem, 123); + MemorySegment seg = MemorySegment.ofArray(sketch1.toByteArray()); + ArrayOfDoublesSketches.heapifySketch(seg, 123); } } diff --git a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java index 15de291ee..9fe991b36 100644 --- a/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java @@ -19,9 +19,13 @@ package org.apache.datasketches.tuple.arrayofdoubles; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketchIterator; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketch; +import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -186,7 +190,7 @@ public void serializeDeserializeExact() { final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1.0}); - final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(WritableMemory.writableWrap(sketch1.toByteArray())); + final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertEquals(sketch2.getEstimate(), 1.0); final double[][] values = sketch2.getValues(); @@ -215,7 +219,7 @@ public void serializeDeserializeEstimationNoResize() throws Exception { //for visual testing //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); - final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(byteArray)); + final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(MemorySegment.ofArray(byteArray)); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); @@ -234,7 +238,7 @@ public void serializeDeserializeSampling() { for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, new double[] {1.0}); } - final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); + final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java index 803c2f591..7287c3429 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java @@ -22,12 +22,16 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.datasketches.memory.WritableMemory; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.tuple.AnotB; import org.apache.datasketches.tuple.CompactSketch; import org.apache.datasketches.tuple.Intersection; import org.apache.datasketches.tuple.TupleSketchIterator; import org.apache.datasketches.tuple.Union; +import org.apache.datasketches.tuple.strings.ArrayOfStringsSketch; +import org.apache.datasketches.tuple.strings.ArrayOfStringsSummary; +import org.apache.datasketches.tuple.strings.ArrayOfStringsSummarySetOperations; import org.testng.annotations.Test; /** @@ -48,8 +52,8 @@ public void checkSketch() { sketch1.update(strArrArr[0], strArrArr[0]); //insert duplicate printSummaries(sketch1.iterator()); byte[] array = sketch1.toByteArray(); - WritableMemory wmem = WritableMemory.writableWrap(array); - ArrayOfStringsSketch sketch2 = new ArrayOfStringsSketch(wmem); + MemorySegment wseg = MemorySegment.ofArray(array); + ArrayOfStringsSketch sketch2 = new ArrayOfStringsSketch(wseg); printSummaries(sketch2.iterator()); checkSummaries(sketch2, sketch2); diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryTest.java index 99cc2bc9d..1b1d94bed 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryTest.java @@ -23,10 +23,13 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; + import org.testng.annotations.Test; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.tuple.DeserializeResult; +import org.apache.datasketches.tuple.strings.ArrayOfStringsSummary; +import org.apache.datasketches.tuple.strings.ArrayOfStringsSummaryDeserializer; /** * @author Lee Rhodes @@ -41,15 +44,15 @@ public void checkToByteArray() { assertTrue(copy.equals(nsum)); byte[] out = nsum.toByteArray(); - Memory mem = Memory.wrap(out); - ArrayOfStringsSummary nsum2 = new ArrayOfStringsSummary(mem); + MemorySegment seg = MemorySegment.ofArray(out); + ArrayOfStringsSummary nsum2 = new ArrayOfStringsSummary(seg); String[] nodesArr = nsum2.getValue(); for (String s : nodesArr) { println(s); } - println("\nfromMemory(mem)"); - DeserializeResult dres = ArrayOfStringsSummaryDeserializer.fromMemory(mem); + println("\nfromMemorySegment(seg)"); + DeserializeResult dres = ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); ArrayOfStringsSummary nsum3 = dres.getObject(); nodesArr = nsum3.getValue(); for (String s : nodesArr) { @@ -64,8 +67,8 @@ public void checkNumNodes() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkInBytes() { - Memory mem = Memory.wrap(new byte[100]); - ArrayOfStringsSummary.checkInBytes(mem, 200); + MemorySegment seg = MemorySegment.ofArray(new byte[100]); + ArrayOfStringsSummary.checkInBytes(seg, 200); } @SuppressWarnings("unlikely-arg-type") diff --git a/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java deleted file mode 100644 index fdaf1de26..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/CompactSketchWithDoubleSummaryTest.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.adouble.DoubleSummary; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryDeserializer; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class CompactSketchWithDoubleSummaryTest { - private final DoubleSummary.Mode mode = Mode.Sum; - - @Test - public void emptyFromNonPublicConstructorNullArray() { - CompactSketch sketch = - new CompactSketch<>(null, null, Long.MAX_VALUE, true); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getRetainedEntries(), 0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - TupleSketchIterator it = sketch.iterator(); - Assert.assertNotNull(it); - Assert.assertFalse(it.next()); - sketch.toString(); - } - - @Test - public void emptyFromNonPublicConstructor() { - long[] keys = new long[0]; - DoubleSummary[] summaries = - (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0); - CompactSketch sketch = - new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getRetainedEntries(), 0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - TupleSketchIterator it = sketch.iterator(); - Assert.assertNotNull(it); - Assert.assertFalse(it.next()); - } - - @Test - public void emptyFromQuickSelectSketch() { - UpdatableSketch us = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - CompactSketch sketch = us.compact(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getRetainedEntries(), 0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - TupleSketchIterator it = sketch.iterator(); - Assert.assertNotNull(it); - Assert.assertFalse(it.next()); - } - - @Test - public void exactModeFromQuickSelectSketch() { - UpdatableSketch us = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - us.update(1, 1.0); - us.update(2, 1.0); - us.update(3, 1.0); - us.update(1, 1.0); - us.update(2, 1.0); - us.update(3, 1.0); - CompactSketch sketch = us.compact(); - Assert.assertFalse(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 3.0); - Assert.assertEquals(sketch.getLowerBound(1), 3.0); - Assert.assertEquals(sketch.getUpperBound(1), 3.0); - Assert.assertEquals(sketch.getRetainedEntries(), 3); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - TupleSketchIterator it = sketch.iterator(); - int count = 0; - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 2.0); - count++; - } - Assert.assertEquals(count, 3); - } - - @Test - public void serializeDeserializeSmallExact() { - UpdatableSketch us = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - us.update("a", 1.0); - us.update("b", 1.0); - us.update("c", 1.0); - CompactSketch sketch1 = us.compact(); - Sketch sketch2 = - Sketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray()), - new DoubleSummaryDeserializer()); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 3.0); - Assert.assertEquals(sketch2.getLowerBound(1), 3.0); - Assert.assertEquals(sketch2.getUpperBound(1), 3.0); - Assert.assertEquals(sketch2.getRetainedEntries(), 3); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - TupleSketchIterator it = sketch2.iterator(); - int count = 0; - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 1.0); - count++; - } - Assert.assertEquals(count, 3); - } - - @Test - public void serializeDeserializeEstimation() throws Exception { - UpdatableSketch us = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - us.update(i, 1.0); - } - us.trim(); - CompactSketch sketch1 = us.compact(); - byte[] bytes = sketch1.toByteArray(); - - // for binary testing - //TestUtil.writeBytesToFile(bytes, "CompactSketchWithDoubleSummary4K.sk"); - - Sketch sketch2 = - Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer()); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); - Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); - TupleSketchIterator it = sketch2.iterator(); - int count = 0; - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 1.0); - count++; - } - Assert.assertEquals(count, 4096); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void deserializeWrongType() { - UpdatableSketch us = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - us.update(i, 1.0); - } - CompactSketch sketch1 = us.compact(); - Sketches.heapifyUpdatableSketch(MemorySegment.ofArray(sketch1.toByteArray()), - new DoubleSummaryDeserializer(), - new DoubleSummaryFactory(mode)); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java deleted file mode 100644 index 7fd85554a..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/IntegerSummary.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ByteArrayUtil; - -/** - * Summary for generic tuple sketches of type Integer. - * This summary keeps an Integer value. - */ -public class IntegerSummary implements UpdatableSummary { - private int value_; - - /** - * Creates an instance of IntegerSummary with a given starting value. - * @param value starting value - */ - public IntegerSummary(final int value) { - value_ = value; - } - - @Override - public IntegerSummary update(final Integer value) { - value_ += value; - return this; - } - - @Override - public IntegerSummary copy() { - return new IntegerSummary(value_); - } - - /** - * @return current value of the IntegerSummary - */ - public int getValue() { - return value_; - } - - private static final int SERIALIZED_SIZE_BYTES = 4; - private static final int VALUE_INDEX = 0; - - @Override - public byte[] toByteArray() { - final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; - ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_); - return bytes; - } - - /** - * Creates an instance of the IntegerSummary given a serialized representation - * @param seg MemorySegment object with serialized IntegerSummary - * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes - * read from the MemorySegment - */ - public static DeserializeResult fromMemorySegment(final MemorySegment seg) { - return new DeserializeResult<>(new IntegerSummary(seg.get(JAVA_INT_UNALIGNED, VALUE_INDEX)), SERIALIZED_SIZE_BYTES); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java deleted file mode 100644 index 885bbf0cf..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryDeserializer.java +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import java.lang.foreign.MemorySegment; - -public class IntegerSummaryDeserializer implements SummaryDeserializer { - - @Override - public DeserializeResult heapifySummary(final MemorySegment seg) { - return IntegerSummary.fromMemorySegment(seg); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java b/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java deleted file mode 100644 index 6edf88608..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/IntegerSummaryFactory.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -/** - * Factory for IntegerSummary. - */ -public class IntegerSummaryFactory implements SummaryFactory { - - @Override - public IntegerSummary newSummary() { - return new IntegerSummary(0); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java b/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java deleted file mode 100644 index a4dab704e..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/JaccardSimilarityTest.java +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import org.apache.datasketches.tuple2.adouble.DoubleSummary; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; -import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations; -import org.testng.annotations.Test; - -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import static org.apache.datasketches.tuple2.JaccardSimilarity.dissimilarityTest; -import static org.apache.datasketches.tuple2.JaccardSimilarity.exactlyEqual; -import static org.apache.datasketches.tuple2.JaccardSimilarity.jaccard; -import static org.apache.datasketches.tuple2.JaccardSimilarity.similarityTest; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -/** - * @author Lee Rhodes - * @author David Cromberge - */ -public class JaccardSimilarityTest { - private final DoubleSummary.Mode umode = DoubleSummary.Mode.Sum; - private final DoubleSummarySetOperations dsso = new DoubleSummarySetOperations(); - private final DoubleSummaryFactory factory = new DoubleSummaryFactory(umode); - private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); - private final UpdatableSketchBuilder tupleBldr = new UpdatableSketchBuilder<>(factory); - private final Double constSummary = 1.0; - - @Test - public void checkNullsEmpties1() { // tuple, tuple - int minK = 1 << 12; - double threshold = 0.95; - println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold); - //check both null - double[] jResults = jaccard(null, null, dsso); - boolean state = jResults[1] > threshold; - println("null \t null:\t" + state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(null, null, dsso); - assertFalse(state); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); - final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).build(); - - //check both empty - jResults = jaccard(measured, expected, dsso); - state = jResults[1] > threshold; - println("empty\tempty:\t" + state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected, dsso); - assertTrue(state); - - state = exactlyEqual(measured, measured, dsso); - assertTrue(state); - - //adjust one - expected.update(1, constSummary); - jResults = jaccard(measured, expected, dsso); - state = jResults[1] > threshold; - println("empty\t 1:\t" + state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected, dsso); - assertFalse(state); - - println(""); - } - - @Test - public void checkNullsEmpties2() { // tuple, theta - int minK = 1 << 12; - double threshold = 0.95; - println("Check nulls & empties, minK: " + minK + "\t Th: " + threshold); - //check both null - double[] jResults = jaccard(null, null, factory.newSummary(), dsso); - boolean state = jResults[1] > threshold; - println("null \t null:\t" + state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(null, null, factory.newSummary(), dsso); - assertFalse(state); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); - final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build(); - - //check both empty - jResults = jaccard(measured, expected, factory.newSummary(), dsso); - state = jResults[1] > threshold; - println("empty\tempty:\t" + state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected, factory.newSummary(), dsso); - assertTrue(state); - - state = exactlyEqual(measured, measured, dsso); - assertTrue(state); - - //adjust one - expected.update(1); - jResults = jaccard(measured, expected, factory.newSummary(), dsso); - state = jResults[1] > threshold; - println("empty\t 1:\t" + state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected, factory.newSummary(), dsso); - assertFalse(state); - - println(""); - } - - @Test - public void checkExactMode1() { // tuple, tuple - int k = 1 << 12; - int u = k; - double threshold = 0.9999; - println("Exact Mode, minK: " + k + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); - final UpdatableSketch expected = tupleBldr.setNominalEntries(k).build(); - - for (int i = 0; i < (u-1); i++) { //one short - measured.update(i, constSummary); - expected.update(i, constSummary); - } - - double[] jResults = jaccard(measured, expected, dsso); - boolean state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected, dsso); - assertTrue(state); - - measured.update(u-1, constSummary); //now exactly k entries - expected.update(u, constSummary); //now exactly k entries but differs by one - jResults = jaccard(measured, expected, dsso); - state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected, dsso); - assertFalse(state); - - println(""); - } - - @Test - public void checkExactMode2() { // tuple, theta - int k = 1 << 12; - int u = k; - double threshold = 0.9999; - println("Exact Mode, minK: " + k + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); - final UpdateSketch expected = thetaBldr.setNominalEntries(k).build(); - - for (int i = 0; i < (u-1); i++) { //one short - measured.update(i, constSummary); - expected.update(i); - } - - double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); - boolean state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected, factory.newSummary(), dsso); - assertTrue(state); - - measured.update(u-1, constSummary); //now exactly k entries - expected.update(u); //now exactly k entries but differs by one - jResults = jaccard(measured, expected, factory.newSummary(), dsso); - state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected, factory.newSummary(), dsso); - assertFalse(state); - - println(""); - } - - @Test - public void checkEstMode1() { // tuple, tuple - int k = 1 << 12; - int u = 1 << 20; - double threshold = 0.9999; - println("Estimation Mode, minK: " + k + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); - final UpdatableSketch expected = tupleBldr.setNominalEntries(k).build(); - - for (int i = 0; i < u; i++) { - measured.update(i, constSummary); - expected.update(i, constSummary); - } - - double[] jResults = jaccard(measured, expected, dsso); - boolean state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected, dsso); - assertTrue(state); - - for (int i = u; i < (u + 50); i++) { //empirically determined - measured.update(i, constSummary); - } - - jResults = jaccard(measured, expected, dsso); - state = jResults[1] >= threshold; - println(state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected, dsso); - assertFalse(state); - - println(""); - } - - @Test - public void checkEstMode2() { // tuple, theta - int k = 1 << 12; - int u = 1 << 20; - double threshold = 0.9999; - println("Estimation Mode, minK: " + k + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(k).build(); - final UpdateSketch expected = thetaBldr.setNominalEntries(k).build(); - - for (int i = 0; i < u; i++) { - measured.update(i, constSummary); - expected.update(i); - } - - double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); - boolean state = jResults[1] > threshold; - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - - state = exactlyEqual(measured, expected, factory.newSummary(), dsso); - assertTrue(state); - - for (int i = u; i < (u + 50); i++) { //empirically determined - measured.update(i, constSummary); - } - - jResults = jaccard(measured, expected, factory.newSummary(), dsso); - state = jResults[1] >= threshold; - println(state + "\t" + jaccardString(jResults)); - assertFalse(state); - - state = exactlyEqual(measured, expected, factory.newSummary(), dsso); - assertFalse(state); - - println(""); - } - - /** - * Enable printing on this test and you will see that the distribution is pretty tight, - * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about - * +/- 1.56%. - */ - @Test - public void checkSimilarity1() { // tuple, tuple - int minK = 1 << 12; - int u1 = 1 << 20; - int u2 = (int) (u1 * 0.95); - double threshold = 0.943; - println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); - final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).build(); - - for (int i = 0; i < u1; i++) { - expected.update(i, constSummary); - } - - for (int i = 0; i < u2; i++) { - measured.update(i, constSummary); - } - - double[] jResults = jaccard(measured, expected, dsso); - boolean state = similarityTest(measured, expected, dsso, threshold); - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - //check identity case - state = similarityTest(measured, measured, dsso, threshold); - assertTrue(state); - } - - /** - * Enable printing on this test and you will see that the distribution is pretty tight, - * about +/- 0.7%, which is pretty good since the accuracy of the underlying sketch is about - * +/- 1.56%. - */ - @Test - public void checkSimilarity2() { // tuple, theta - int minK = 1 << 12; - int u1 = 1 << 20; - int u2 = (int) (u1 * 0.95); - double threshold = 0.943; - println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).build(); - final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build(); - - for (int i = 0; i < u1; i++) { - expected.update(i); - } - - for (int i = 0; i < u2; i++) { - measured.update(i, constSummary); - } - - double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); - boolean state = similarityTest(measured, expected, factory.newSummary(), dsso, threshold); - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - //check identity case - state = similarityTest(measured, measured, dsso, threshold); - assertTrue(state); - } - - /** - * Enable printing on this test and you will see that the distribution is much looser, - * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of - * intersection to the union becomes a small number. - */ - @Test - public void checkDissimilarity1() { // tuple, tuple - int minK = 1 << 12; - int u1 = 1 << 20; - int u2 = (int) (u1 * 0.05); - double threshold = 0.061; - println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build(); - final UpdatableSketch expected = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build(); - - for (int i = 0; i < u1; i++) { - expected.update(i, constSummary); - } - - for (int i = 0; i < u2; i++) { - measured.update(i, constSummary); - } - - double[] jResults = jaccard(measured, expected, dsso); - boolean state = dissimilarityTest(measured, expected, dsso, threshold); - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - } - - /** - * Enable printing on this test and you will see that the distribution is much looser, - * about +/- 14%. This is due to the fact that intersections loose accuracy as the ratio of - * intersection to the union becomes a small number. - */ - @Test - public void checkDissimilarity2() { // tuple, theta - int minK = 1 << 12; - int u1 = 1 << 20; - int u2 = (int) (u1 * 0.05); - double threshold = 0.061; - println("Estimation Mode, minK: " + minK + "\t Th: " + threshold); - - final UpdatableSketch measured = tupleBldr.setNominalEntries(minK).setNominalEntries(minK).build(); - final UpdateSketch expected = thetaBldr.setNominalEntries(minK).build(); - - for (int i = 0; i < u1; i++) { - expected.update(i); - } - - for (int i = 0; i < u2; i++) { - measured.update(i, constSummary); - } - - double[] jResults = jaccard(measured, expected, factory.newSummary(), dsso); - boolean state = dissimilarityTest(measured, expected, factory.newSummary(), dsso, threshold); - println(state + "\t" + jaccardString(jResults)); - assertTrue(state); - } - - private static String jaccardString(double[] jResults) { - double lb = jResults[0]; - double est = jResults[1]; - double ub = jResults[2]; - return lb + "\t" + est + "\t" + ub + "\t" + ((lb/est) - 1.0) + "\t" + ((ub/est) - 1.0); - } - - @Test - public void checkMinK1() { // tuple, tuple - final UpdatableSketch skA = tupleBldr.build(); //4096 - final UpdatableSketch skB = tupleBldr.build(); //4096 - skA.update(1, constSummary); - skB.update(1, constSummary); - double[] result = jaccard(skA, skB, dsso); - println(result[0] + ", " + result[1] + ", " + result[2]); - for (int i = 1; i < 4096; i++) { - skA.update(i, constSummary); - skB.update(i, constSummary); - } - result = jaccard(skA, skB, dsso); - println(result[0] + ", " + result[1] + ", " + result[2]); - } - - @Test - public void checkMinK2() { // tuple, theta - final UpdatableSketch skA = tupleBldr.build(); //4096 - final UpdateSketch skB = UpdateSketch.builder().build(); //4096 - skA.update(1, constSummary); - skB.update(1); - double[] result = jaccard(skA, skB, factory.newSummary(), dsso); - println(result[0] + ", " + result[1] + ", " + result[2]); - for (int i = 1; i < 4096; i++) { - skA.update(i, constSummary); - skB.update(i); - } - result = jaccard(skA, skB, factory.newSummary(), dsso); - println(result[0] + ", " + result[1] + ", " + result[2]); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(Object o) { - //System.out.println(o.toString()); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/MiscTest.java b/src/test/java/org/apache/datasketches/tuple2/MiscTest.java deleted file mode 100644 index 6c56c852e..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/MiscTest.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -import org.apache.datasketches.thetacommon2.SetOperationCornerCases.CornerCase; -import org.apache.datasketches.tuple2.adouble.DoubleSummary; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class MiscTest { - - @Test - public void checkUpdatableSketchBuilderReset() { - final DoubleSummary.Mode mode = Mode.Sum; - final UpdatableSketchBuilder bldr = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)); - bldr.reset(); - final UpdatableSketch sk = bldr.build(); - assertTrue(sk.isEmpty()); - } - - @Test - public void checkStringToByteArray() { - Util.stringToByteArray(""); - } - - @Test - public void checkDoubleToLongArray() { - final long[] v = Util.doubleToLongArray(-0.0); - assertEquals(v[0], 0); - } - - //@Test - public void checkById() { - final int[] ids = {0,1,2, 5, 6 }; - final int len = ids.length; - for (int i = 0; i < len; i++) { - for (int j = 0; j < len; j++) { - final int id = ids[i] << 3 | ids[j]; - final CornerCase cCase = CornerCase.caseIdToCornerCase(id); - final String interResStr = cCase.getIntersectAction().getActionDescription(); - final String anotbResStr = cCase.getAnotbAction().getActionDescription(); - println(Integer.toOctalString(id) + "\t" + cCase + "\t" + cCase.getCaseDescription() - + "\t" + interResStr + "\t" + anotbResStr); - } - } - } - - @Test - public void checkCopyCtor() { - final DoubleSummary.Mode mode = Mode.Sum; - final UpdatableSketchBuilder bldr = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)); - bldr.reset(); - final UpdatableSketch sk = bldr.build(); - sk.update(1.0, 1.0); - assertEquals(sk.getRetainedEntries(), 1); - final UpdatableSketch sk2 = sk.copy(); - assertEquals(sk2.getRetainedEntries(), 1); - } - - - /** - * - * @param o object to print - */ - private static void println(final Object o) { - //System.out.println(o.toString()); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java b/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java deleted file mode 100644 index e2eaa3900..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/ReadOnlyMemorySegmentTest.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesReadOnlyException; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesSetOperationBuilder; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesSketch; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesSketches; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUnion; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUpdatableSketch; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUpdatableSketchBuilder; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ReadOnlyMemorySegmentTest { - - @Test - public void wrapAndTryUpdatingSketch() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1}); - final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) - ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertEquals(sketch2.getEstimate(), 1.0); - sketch2.toByteArray(); - boolean thrown = false; - try { - sketch2.update(2, new double[] {1}); - } catch (final SketchesReadOnlyException e) { - thrown = true; - } - try { - sketch2.trim(); - } catch (final SketchesReadOnlyException e) { - thrown = true; - } - Assert.assertTrue(thrown); - } - - @Test - public void heapifyAndUpdateSketch() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1}); - // downcasting is not recommended, for testing only - final ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) - ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); - sketch2.update(2, new double[] {1}); - Assert.assertEquals(sketch2.getEstimate(), 2.0); - } - - @Test - public void wrapAndTryUpdatingUnionEstimationMode() { - final int numUniques = 10000; - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < numUniques; i++) { - sketch1.update(key++, new double[] {1}); - } - final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union1.union(sketch1); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.wrapUnion(MemorySegment.ofArray(union1.toByteArray()).asReadOnly()); - final ArrayOfDoublesSketch resultSketch = union2.getResult(); - Assert.assertTrue(resultSketch.isEstimationMode()); - Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); - - // make sure union update actually needs to modify the union - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < numUniques; i++) { - sketch2.update(key++, new double[] {1}); - } - - boolean thrown = false; - try { - union2.union(sketch2); - } catch (final SketchesReadOnlyException e) { - thrown = true; - } - Assert.assertTrue(thrown); - } - - @Test - public void heapifyAndUpdateUnion() { - final int numUniques = 10000; - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < numUniques; i++) { - sketch1.update(key++, new double[] {1}); - } - final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union1.union(sketch1); - final ArrayOfDoublesUnion union2 = ArrayOfDoublesSketches.heapifyUnion(MemorySegment.ofArray(union1.toByteArray())); - final ArrayOfDoublesSketch resultSketch = union2.getResult(); - Assert.assertTrue(resultSketch.isEstimationMode()); - Assert.assertEquals(resultSketch.getEstimate(), numUniques, numUniques * 0.04); - - // make sure union update actually needs to modify the union - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < numUniques; i++) { - sketch2.update(key++, new double[] {1}); - } - union2.union(sketch2); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java b/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java deleted file mode 100644 index 7fab43ac4..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/SerializerDeserializerTest.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static org.apache.datasketches.common.Util.computeSeedHash; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class SerializerDeserializerTest { - - @Test - public void validSketchType() { - byte[] bytes = new byte[4]; - bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal(); - Assert.assertEquals(SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)), SerializerDeserializer.SketchType.CompactSketch); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void invalidSketchType() { - byte[] bytes = new byte[4]; - bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = 33; - SerializerDeserializer.getSketchType(MemorySegment.ofArray(bytes)); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void validateFamilyNotTuple() { - SerializerDeserializer.validateFamily((byte) 1, (byte) 0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void validateFamilyWrongPreambleLength() { - SerializerDeserializer.validateFamily((byte) Family.TUPLE.getID(), (byte) 0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkBadSeedHash() { - computeSeedHash(50541); - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java deleted file mode 100644 index 196c82510..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/TupleCrossLanguageTest.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; -import static org.apache.datasketches.common.TestUtil.CHECK_CPP_HISTORICAL_FILES; -import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; -import static org.apache.datasketches.common.TestUtil.cppPath; -import static org.apache.datasketches.common.TestUtil.javaPath; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; -import java.io.IOException; -import java.nio.file.Files; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.TestUtil; -import org.apache.datasketches.tuple2.adouble.DoubleSummary; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryDeserializer; -import org.apache.datasketches.tuple2.arrayofdoubles.ArrayOfDoublesUnion; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class TupleCrossLanguageTest { - - @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) - public void serialVersion1Compatibility() { - final byte[] byteArr = TestUtil.getResourceBytes("CompactSketchWithDoubleSummary4K_serialVersion1.sk"); - Sketch sketch = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new DoubleSummaryDeserializer()); - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99); - Assert.assertEquals(sketch.getRetainedEntries(), 4096); - int count = 0; - TupleSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 1.0); - count++; - } - Assert.assertEquals(count, 4096); - } - - @Test(groups = {CHECK_CPP_HISTORICAL_FILES}) - public void version2Compatibility() { - final byte[] byteArr = TestUtil.getResourceBytes("TupleWithTestIntegerSummary4kTrimmedSerVer2.sk"); - Sketch sketch1 = Sketches.heapifySketch(MemorySegment.ofArray(byteArr), new IntegerSummaryDeserializer()); - - // construct the same way - final int lgK = 12; - final int K = 1 << lgK; - final UpdatableSketchBuilder builder = - new UpdatableSketchBuilder<>(new IntegerSummaryFactory()); - final UpdatableSketch updatableSketch = builder.build(); - for (int i = 0; i < 2 * K; i++) { - updatableSketch.update(i, 1); - } - updatableSketch.trim(); - Sketch sketch2 = updatableSketch.compact(); - - Assert.assertEquals(sketch1.getRetainedEntries(), sketch2.getRetainedEntries()); - Assert.assertEquals(sketch1.getThetaLong(), sketch2.getThetaLong()); - Assert.assertEquals(sketch1.isEmpty(), sketch2.isEmpty()); - Assert.assertEquals(sketch1.isEstimationMode(), sketch2.isEstimationMode()); - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeFromCppIntegerSummary() throws IOException { - final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; - for (int n: nArr) { - final byte[] bytes = Files.readAllBytes(cppPath.resolve("tuple_int_n" + n + "_cpp.sk")); - final Sketch sketch = - Sketches.heapifySketch(MemorySegment.ofArray(bytes), new IntegerSummaryDeserializer()); - assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); - assertTrue(n > 1000 ? sketch.isEstimationMode() : !sketch.isEstimationMode()); - assertEquals(sketch.getEstimate(), n, n * 0.03); - final TupleSketchIterator it = sketch.iterator(); - while (it.next()) { - assertTrue(it.getHash() < sketch.getThetaLong()); - assertTrue(it.getSummary().getValue() < n); - } - } - } - - @Test(groups = {GENERATE_JAVA_FILES}) - public void generateForCppIntegerSummary() throws IOException { - final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; - for (int n: nArr) { - final UpdatableSketch sk = - new UpdatableSketchBuilder<>(new IntegerSummaryFactory()).build(); - for (int i = 0; i < n; i++) { - sk.update(i, i); - } - Files.newOutputStream(javaPath.resolve("tuple_int_n" + n + "_java.sk")).write(sk.compact().toByteArray()); - } - } - - @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES}) - public void noSupportHeapifyV0_9_1() throws Exception { - final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk"); - ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(byteArr)); - } - - @Test(expectedExceptions = SketchesArgumentException.class, groups = {CHECK_CPP_HISTORICAL_FILES}) - public void noSupportWrapV0_9_1() throws Exception { - final byte[] byteArr = TestUtil.getResourceBytes("ArrayOfDoublesUnion_v0.9.1.sk"); - ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(byteArr)); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java b/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java deleted file mode 100644 index 8cc8b7975..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/TupleExamples2Test.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - - import static org.testng.Assert.assertEquals; - -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.apache.datasketches.tuple2.adouble.DoubleSummaryFactory; -import org.apache.datasketches.tuple2.adouble.DoubleSummarySetOperations; -import org.testng.annotations.Test; - - /** - * Tests for Version 2.0.0 - * @author Lee Rhodes - */ - public class TupleExamples2Test { - private final DoubleSummary.Mode umode = Mode.Sum; - private final DoubleSummary.Mode imode = Mode.AlwaysOne; - private final DoubleSummarySetOperations dsso0 = new DoubleSummarySetOperations(); - private final DoubleSummarySetOperations dsso1 = new DoubleSummarySetOperations(umode); - private final DoubleSummarySetOperations dsso2 = new DoubleSummarySetOperations(umode, imode); - private final DoubleSummaryFactory ufactory = new DoubleSummaryFactory(umode); - private final DoubleSummaryFactory ifactory = new DoubleSummaryFactory(imode); - private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); - private final UpdatableSketchBuilder tupleBldr = - new UpdatableSketchBuilder<>(ufactory); - - - @Test - public void example1() { // stateful: tuple, theta, use dsso2 - //Load source sketches - final UpdatableSketch tupleSk = tupleBldr.build(); - final UpdateSketch thetaSk = thetaBldr.build(); - for (int i = 1; i <= 12; i++) { - tupleSk.update(i, 1.0); - thetaSk.update(i + 3); - } - - //Union - final Union union = new Union<>(dsso2); - union.union(tupleSk); - union.union(thetaSk, ufactory.newSummary().update(1.0)); - final CompactSketch ucsk = union.getResult(); - int entries = ucsk.getRetainedEntries(); - println("Union Stateful: tuple, theta: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = (int)uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection - final Intersection inter = new Intersection<>(dsso2); - inter.intersect(tupleSk); - inter.intersect(thetaSk, ifactory.newSummary().update(1.0)); - final CompactSketch icsk = inter.getResult(); - entries = icsk.getRetainedEntries(); - println("Intersection Stateful: tuple, theta: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = (int)iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 1 - assertEquals(i, 1); - } - } - - @Test - public void example2() { //stateless: tuple1, tuple2, use dsso2 - //Load source sketches - final UpdatableSketch tupleSk1 = tupleBldr.build(); - final UpdatableSketch tupleSk2 = tupleBldr.build(); - - for (int i = 1; i <= 12; i++) { - tupleSk1.update(i, 1.0); - tupleSk2.update(i + 3, 1.0); - } - - //Union - final Union union = new Union<>(dsso2); - final CompactSketch ucsk = union.union(tupleSk1, tupleSk2); - int entries = ucsk.getRetainedEntries(); - println("Union: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = (int)uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection - final Intersection inter = new Intersection<>(dsso2); - final CompactSketch icsk = inter.intersect(tupleSk1, tupleSk2); - entries = icsk.getRetainedEntries(); - println("Intersection: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = (int)iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2 - assertEquals(i, 1); - } - } - - @Test - public void example3() { //stateless: tuple1, tuple2, use dsso2 - //Load source sketches - final UpdatableSketch tupleSk = tupleBldr.build(); - final UpdateSketch thetaSk = thetaBldr.build(); - for (int i = 1; i <= 12; i++) { - tupleSk.update(i, 1.0); - thetaSk.update(i + 3); - } - - //Union - final Union union = new Union<>(dsso2); - final CompactSketch ucsk = - union.union(tupleSk, thetaSk, ufactory.newSummary().update(1.0)); - int entries = ucsk.getRetainedEntries(); - println("Union: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = (int)uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection - final Intersection inter = new Intersection<>(dsso2); - final CompactSketch icsk = - inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1.0)); - entries = icsk.getRetainedEntries(); - println("Intersection: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = (int)iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2 - assertEquals(i, 1); - } - } - - @Test - public void example4() { //stateful: tuple, theta, Mode=sum for both, use dsso0 - //Load source sketches - final UpdatableSketch tupleSk = tupleBldr.build(); - final UpdateSketch thetaSk = thetaBldr.build(); - for (int i = 1; i <= 12; i++) { - tupleSk.update(i, 1.0); - thetaSk.update(i + 3); - } - - //Union - final Union union = new Union<>(dsso0); - union.union(tupleSk); - union.union(thetaSk, ufactory.newSummary().update(1.0)); - final CompactSketch ucsk = union.getResult(); - int entries = ucsk.getRetainedEntries(); - println("Union Stateful: tuple, theta: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = (int)uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection - final Intersection inter = new Intersection<>(dsso0); - inter.intersect(tupleSk); - inter.intersect(thetaSk, ifactory.newSummary().update(1.0)); - final CompactSketch icsk = inter.getResult(); - entries = icsk.getRetainedEntries(); - println("Intersection Stateful: tuple, theta: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = (int)iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 1 - assertEquals(i, 2); - } - } - - @Test - public void example5() { //stateful, tuple, theta, Mode=sum for both, use dsso1 - //Load source sketches - final UpdatableSketch tupleSk = tupleBldr.build(); - final UpdateSketch thetaSk = thetaBldr.build(); - for (int i = 1; i <= 12; i++) { - tupleSk.update(i, 1.0); - thetaSk.update(i + 3); - } - - //Union - final Union union = new Union<>(dsso1); - union.union(tupleSk); - union.union(thetaSk, ufactory.newSummary().update(1.0)); - final CompactSketch ucsk = union.getResult(); - int entries = ucsk.getRetainedEntries(); - println("Union Stateful: tuple, theta: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = (int)uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection - final Intersection inter = new Intersection<>(dsso1); - inter.intersect(tupleSk); - inter.intersect(thetaSk, ifactory.newSummary().update(1.0)); - final CompactSketch icsk = inter.getResult(); - entries = icsk.getRetainedEntries(); - println("Intersection Stateful: tuple, theta: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = (int)iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 1 - assertEquals(i, 2); - } - } - - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //enable/disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java b/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java deleted file mode 100644 index a1409ac37..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/TupleExamplesTest.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2; - -import static org.testng.Assert.assertEquals; - -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.aninteger.IntegerSummary; -import org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode; -import org.apache.datasketches.tuple2.aninteger.IntegerSummaryFactory; -import org.apache.datasketches.tuple2.aninteger.IntegerSummarySetOperations; -import org.testng.annotations.Test; - -/** - * Tests for Version 2.0.0 - * @author Lee Rhodes - */ -public class TupleExamplesTest { - private final IntegerSummary.Mode umode = Mode.Sum; - private final IntegerSummary.Mode imode = Mode.AlwaysOne; - private final IntegerSummarySetOperations isso = new IntegerSummarySetOperations(umode, imode); - private final IntegerSummaryFactory ufactory = new IntegerSummaryFactory(umode); - private final IntegerSummaryFactory ifactory = new IntegerSummaryFactory(imode); - private final UpdateSketchBuilder thetaBldr = UpdateSketch.builder(); - private final UpdatableSketchBuilder tupleBldr = - new UpdatableSketchBuilder<>(ufactory); - - - @Test - public void example1() { - //Load source sketches - final UpdatableSketch tupleSk = tupleBldr.build(); - final UpdateSketch thetaSk = thetaBldr.build(); - for (int i = 1; i <= 12; i++) { - tupleSk.update(i, 1); - thetaSk.update(i + 3); - } - - //Union stateful: tuple, theta - final Union union = new Union<>(isso); - union.union(tupleSk); - union.union(thetaSk, ufactory.newSummary().update(1)); - final CompactSketch ucsk = union.getResult(); - int entries = ucsk.getRetainedEntries(); - println("Union Stateful: tuple, theta: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection stateful: tuple, theta - final Intersection inter = new Intersection<>(isso); - inter.intersect(tupleSk); - inter.intersect(thetaSk, ifactory.newSummary().update(1)); - final CompactSketch icsk = inter.getResult(); - entries = icsk.getRetainedEntries(); - println("Intersection Stateful: tuple, theta: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 1 - assertEquals(i, 1); - } - } - - @Test - public void example2() { - //Load source sketches - final UpdatableSketch tupleSk1 = tupleBldr.build(); - final UpdatableSketch tupleSk2 = tupleBldr.build(); - - for (int i = 1; i <= 12; i++) { - tupleSk1.update(i, 1); - tupleSk2.update(i + 3, 1); - } - - //Union, stateless: tuple1, tuple2 - final Union union = new Union<>(isso); - final CompactSketch ucsk = union.union(tupleSk1, tupleSk2); - int entries = ucsk.getRetainedEntries(); - println("Union: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection stateless: tuple1, tuple2 - final Intersection inter = new Intersection<>(isso); - final CompactSketch icsk = inter.intersect(tupleSk1, tupleSk2); - entries = icsk.getRetainedEntries(); - println("Intersection: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2 - assertEquals(i, 1); - } - } - - @Test - public void example3() { - //Load source sketches - final UpdatableSketch tupleSk = tupleBldr.build(); - final UpdateSketch thetaSk = thetaBldr.build(); - for (int i = 1; i <= 12; i++) { - tupleSk.update(i, 1); - thetaSk.update(i + 3); - } - - //Union, stateless: tuple1, tuple2 - final Union union = new Union<>(isso); - final CompactSketch ucsk = - union.union(tupleSk, thetaSk, ufactory.newSummary().update(1)); - int entries = ucsk.getRetainedEntries(); - println("Union: " + entries); - final TupleSketchIterator uiter = ucsk.iterator(); - int counter = 1; - int twos = 0; - int ones = 0; - while (uiter.next()) { - final int i = uiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2, 6 entries = 1 - if (i == 1) { ones++; } - if (i == 2) { twos++; } - } - assertEquals(ones, 6); - assertEquals(twos, 9); - - //Intersection stateless: tuple1, tuple2 - final Intersection inter = new Intersection<>(isso); - final CompactSketch icsk = - inter.intersect(tupleSk, thetaSk, ufactory.newSummary().update(1)); - entries = icsk.getRetainedEntries(); - println("Intersection: " + entries); - final TupleSketchIterator iiter = icsk.iterator(); - counter = 1; - while (iiter.next()) { - final int i = iiter.getSummary().getValue(); - println(counter++ + ", " + i); //9 entries = 2 - assertEquals(i, 1); - } - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(final String s) { - //System.out.println(s); //enable/disable here - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java deleted file mode 100644 index 8951c528c..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleAnotBTest.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.fail; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.AnotB; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.UpdatableSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.testng.Assert; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class AdoubleAnotBTest { - private static final DoubleSummary.Mode mode = Mode.Sum; - private final Results results = new Results(); - - private static void threeMethodsWithTheta( - final AnotB aNotB, - final Sketch skA, - final Sketch skB, - final org.apache.datasketches.theta2.Sketch skThetaB, - final Results results) - { - CompactSketch result; - - //Stateful, A = Tuple, B = Tuple - if (skA != null) { - try { - aNotB.setA(skA); - aNotB.notB(skB); - result = aNotB.getResult(true); - results.check(result); - } - catch (final SketchesArgumentException e) { } - } - - //Stateless A = Tuple, B = Tuple - if (skA == null || skB == null) { - try { - result = AnotB.aNotB(skA, skB); - fail(); - } - catch (final SketchesArgumentException e) { } - } else { - result = AnotB.aNotB(skA, skB); - results.check(result); - } - - //Stateless A = Tuple, B = Theta - if (skA == null || skThetaB == null) { - try { result = AnotB.aNotB(skA, skThetaB); fail(); } - catch (final SketchesArgumentException e) { } - } else { - result = AnotB.aNotB(skA, skThetaB); - results.check(result); - } - - //Stateful A = Tuple, B = Tuple - if (skA == null) { - try { aNotB.setA(skA); fail(); } - catch (final SketchesArgumentException e) { } - } else { - aNotB.setA(skA); - aNotB.notB(skB); - result = aNotB.getResult(true); - results.check(result); - } - - //Stateful A = Tuple, B = Theta - if (skA == null) { - try { aNotB.setA(skA); fail(); } - catch (final SketchesArgumentException e) { } - } else { - aNotB.setA(skA); - aNotB.notB(skThetaB); - result = aNotB.getResult(false); - results.check(result); - result = aNotB.getResult(true); - results.check(result); - } - } - - private static class Results { - private int retEnt = 0; - private boolean empty = true; - private double expect = 0.0; - private double tol = 0.0; - private double sum = 0.0; - - Results() {} - - Results set(final int retEnt, final boolean empty, - final double expect, final double tol, final double sum) { - this.retEnt = retEnt; //retained Entries - this.empty = empty; - this.expect = expect; //expected estimate - this.tol = tol; //tolerance - this.sum = sum; - return this; - } - - void check(final CompactSketch result) { - assertEquals(result.getRetainedEntries(), retEnt); - assertEquals(result.isEmpty(), empty); - if (result.getTheta() < 1.0) { - final double est = result.getEstimate(); - assertEquals(est, expect, expect * tol); - assertTrue(result.getUpperBound(1) > est); - assertTrue(result.getLowerBound(1) <= est); - } else { - assertEquals(result.getEstimate(), expect, 0.0); - assertEquals(result.getUpperBound(1), expect, 0.0); - assertEquals(result.getLowerBound(1), expect, 0.0); - } - final TupleSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), sum); - } - } - } //End class Results - - private static UpdatableSketch buildUpdatableTuple() { - return new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - } - - private static UpdateSketch buildUpdateTheta() { - return new UpdateSketchBuilder().build(); - } - - /*****************************************/ - - @Test - public void aNotBNullEmptyCombinations() { - final AnotB aNotB = new AnotB<>(); - // calling getResult() before calling update() should yield an empty set - final CompactSketch result = aNotB.getResult(true); - results.set(0, true, 0.0, 0.0, 0.0).check(result); - - final UpdatableSketch sketch = buildUpdatableTuple(); - final UpdateSketch skTheta = buildUpdateTheta(); - - threeMethodsWithTheta(aNotB, null, null, null, results); - threeMethodsWithTheta(aNotB, sketch, null, null, results); - threeMethodsWithTheta(aNotB, null, sketch, null, results); - threeMethodsWithTheta(aNotB, sketch, sketch, null, results); - threeMethodsWithTheta(aNotB, null, null, skTheta, results); - threeMethodsWithTheta(aNotB, sketch, null, skTheta, results); - threeMethodsWithTheta(aNotB, null, sketch, skTheta, results); - threeMethodsWithTheta(aNotB, sketch, sketch, skTheta, results); - } - - @Test - public void aNotBCheckDoubleSetAs() { - final UpdatableSketch skA = buildUpdatableTuple(); - skA.update(1, 1.0); - skA.update(2, 1.0); - final UpdatableSketch skA2 = buildUpdatableTuple(); - final AnotB aNotB = new AnotB<>(); - aNotB.setA(skA); - assertEquals(aNotB.getResult(false).isEmpty(), false); - aNotB.setA(skA2); - assertEquals(aNotB.getResult(false).isEmpty(), true); - } - - @Test - public void aNotBEmptyExact() { - final UpdatableSketch sketchA = buildUpdatableTuple(); - final UpdatableSketch sketchB = buildUpdatableTuple(); - sketchB.update(1, 1.0); - sketchB.update(2, 1.0); - final UpdateSketch skThetaB = buildUpdateTheta(); - skThetaB.update(1); - skThetaB.update(2); - - final AnotB aNotB = new AnotB<>(); - results.set(0, true, 0.0, 0.0, 0.0); - threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); - } - - @Test - public void aNotBExactEmpty() { - final UpdatableSketch sketchA = buildUpdatableTuple(); - sketchA.update(1, 1.0); - sketchA.update(2, 1.0); - final UpdatableSketch sketchB = buildUpdatableTuple(); - final UpdateSketch skThetaB = buildUpdateTheta(); - - final AnotB aNotB = new AnotB<>(); - results.set(2, false, 2.0, 0.0, 1.0); - threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); - - // same thing, but compact sketches - threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results); - } - - @Test - public void aNotBExactOverlap() { - final UpdatableSketch sketchA = buildUpdatableTuple(); - sketchA.update(1, 1.0); - sketchA.update(1, 1.0); - sketchA.update(2, 1.0); - sketchA.update(2, 1.0); - - final UpdatableSketch sketchB = buildUpdatableTuple(); - sketchB.update(2, 1.0); - sketchB.update(2, 1.0); - sketchB.update(3, 1.0); - sketchB.update(3, 1.0); - - final UpdateSketch skThetaB = buildUpdateTheta(); - skThetaB.update(2); - skThetaB.update(3); - - final AnotB aNotB = new AnotB<>(); - results.set(1, false, 1.0, 0.0, 2.0); - threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); - } - - @Test - public void aNotBEstimationOverlap() { - final UpdatableSketch sketchA = buildUpdatableTuple(); - for (int i = 0; i < 8192; i++) { - sketchA.update(i, 1.0); - } - - final UpdatableSketch sketchB = buildUpdatableTuple(); - for (int i = 0; i < 4096; i++) { - sketchB.update(i, 1.0); - } - - final UpdateSketch skThetaB = buildUpdateTheta(); - for (int i = 0; i < 4096; i++) { - skThetaB.update(i); - } - - final AnotB aNotB = new AnotB<>(); - results.set(2123, false, 4096.0, 0.03, 1.0); - threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); - - // same thing, but compact sketches - threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results); - } - - @Test - public void aNotBEstimationOverlapLargeB() { - final UpdatableSketch sketchA = buildUpdatableTuple(); - for (int i = 0; i < 10_000; i++) { - sketchA.update(i, 1.0); - } - - final UpdatableSketch sketchB = buildUpdatableTuple(); - for (int i = 0; i < 100_000; i++) { - sketchB.update(i + 8000, 1.0); - } - - final UpdateSketch skThetaB = buildUpdateTheta(); - for (int i = 0; i < 100_000; i++) { - skThetaB.update(i + 8000); - } - - final int expected = 8_000; - final AnotB aNotB = new AnotB<>(); - results.set(376, false, expected, 0.1, 1.0); - threeMethodsWithTheta(aNotB, sketchA, sketchB, skThetaB, results); - - // same thing, but compact sketches - threeMethodsWithTheta(aNotB, sketchA.compact(), sketchB.compact(), skThetaB.compact(), results); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java deleted file mode 100644 index 1a85334fc..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleIntersectionTest.java +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import static org.testng.Assert.fail; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Intersection; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.Sketches; -import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.UpdatableSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.testng.Assert; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class AdoubleIntersectionTest { - private final DoubleSummary.Mode mode = Mode.Sum; - - @Test - public void intersectionNotEmptyNoEntries() { - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<> - (new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build(); - sketch1.update("a", 1.0); // this happens to get rejected because of sampling with low probability - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - intersection.intersect(sketch1); - final CompactSketch result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0, 0.0001); - Assert.assertTrue(result.getUpperBound(1) > 0); - } - - @Test - public void intersectionExactWithEmpty() { - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch1.update(1, 1.0); - sketch1.update(2, 1.0); - sketch1.update(3, 1.0); - - final Sketch sketch2 = Sketches.createEmptySketch(); - - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - intersection.intersect(sketch1); - intersection.intersect(sketch2); - final CompactSketch result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - - @Test - public void intersectionExactMode() { - UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch1.update(1, 1.0); - sketch1.update(1, 1.0); - sketch1.update(2, 1.0); - sketch1.update(2, 1.0); - - final UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch2.update(2, 1.0); - sketch2.update(2, 1.0); - sketch2.update(3, 1.0); - sketch2.update(3, 1.0); - - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - intersection.intersect(sketch1); - intersection.intersect(sketch2); - final CompactSketch result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 1); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 1.0); - Assert.assertEquals(result.getLowerBound(1), 1.0); - Assert.assertEquals(result.getUpperBound(1), 1.0); - final TupleSketchIterator it = result.iterator(); - Assert.assertTrue(it.next()); - Assert.assertTrue(it.getHash() > 0); - Assert.assertEquals(it.getSummary().getValue(), 4.0); - Assert.assertFalse(it.next()); - - intersection.reset(); - sketch1 = null; - try { intersection.intersect(sketch1); fail();} - catch (final SketchesArgumentException e) { } - -} - - @Test - public void intersectionDisjointEstimationMode() { - int key = 0; - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, 1.0); - } - - final UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, 1.0); - } - - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - intersection.intersect(sketch1); - intersection.intersect(sketch2); - CompactSketch result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertTrue(result.getUpperBound(1) > 0); - - // an intersection with no entries must survive more updates - intersection.intersect(sketch1); - result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertTrue(result.getUpperBound(1) > 0); - } - - @Test - public void intersectionEstimationMode() { - int key = 0; - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, 1.0); - } - - key -= 4096; // overlap half of the entries - final UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, 1.0); - } - - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - intersection.intersect(sketch1); - intersection.intersect(sketch2); - final CompactSketch result = intersection.getResult(); - Assert.assertFalse(result.isEmpty()); - // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - final TupleSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 2.0); - } - } - - @Test - public void checkExactIntersectionWithTheta() { - final UpdateSketch thSkNull = null; - final UpdateSketch thSkEmpty = new UpdateSketchBuilder().build(); - final UpdateSketch thSk10 = new UpdateSketchBuilder().build(); - final UpdateSketch thSk15 = new UpdateSketchBuilder().build(); - for (int i = 0; i < 10; i++) { thSk10.update(i); } - for (int i = 0; i < 10; i++) { thSk15.update(i + 5); } //overlap = 5 - - DoubleSummary dsum = new DoubleSummaryFactory(mode).newSummary(); - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - CompactSketch result; - - try { intersection.getResult(); fail(); } - catch (final SketchesStateException e ) { } //OK. - - try { intersection.intersect(thSkNull, dsum); fail(); } - catch (final SketchesArgumentException e) { } //OK - - intersection.intersect(thSkEmpty, dsum); - result = intersection.getResult(); - Assert.assertTrue(result.isEmpty()); //Empty after empty first call - intersection.reset(); - - intersection.intersect(thSk10, dsum); - result = intersection.getResult(); - Assert.assertEquals(result.getEstimate(), 10.0); //Returns valid first call - intersection.reset(); - - intersection.intersect(thSk10, dsum); // Valid first call - intersection.intersect(thSkEmpty, dsum); - result = intersection.getResult(); - Assert.assertTrue(result.isEmpty()); //Returns Empty after empty second call - intersection.reset(); - - intersection.intersect(thSk10, dsum); - intersection.intersect(thSk15, dsum); - result = intersection.getResult(); - Assert.assertEquals(result.getEstimate(), 5.0); //Returns intersection - intersection.reset(); - - dsum = null; - try { intersection.intersect(thSk10, dsum); fail(); } - catch (final SketchesArgumentException e) { } - } - - @Test - public void checkExactIntersectionWithThetaDisjoint() { - final UpdateSketch thSkA = new UpdateSketchBuilder().setLogNominalEntries(10).build(); - final UpdateSketch thSkB = new UpdateSketchBuilder().setLogNominalEntries(10).build(); - int key = 0; - for (int i = 0; i < 32; i++) { thSkA.update(key++); } - for (int i = 0; i < 32; i++) { thSkB.update(key++); } - - final DoubleSummary dsum = new DoubleSummaryFactory(mode).newSummary(); - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - CompactSketch result; - - intersection.intersect(thSkA, dsum); - intersection.intersect(thSkB, dsum); - result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - - // an intersection with no entries must survive more updates - intersection.intersect(thSkA, dsum); - result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - intersection.reset(); - } - - @Test - public void checkEstimatingIntersectionWithThetaOverlapping() { - final UpdateSketch thSkA = new UpdateSketchBuilder().setLogNominalEntries(4).build(); - final UpdateSketch thSkB = new UpdateSketchBuilder().setLogNominalEntries(10).build(); - for (int i = 0; i < 64; i++) { thSkA.update(i); } //dense mode, low theta - for (int i = 32; i < 96; i++) { thSkB.update(i); } //exact overlapping - - final DoubleSummary dsum = new DoubleSummaryFactory(mode).newSummary(); - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - CompactSketch result; - - intersection.intersect(thSkA, dsum); - intersection.intersect(thSkB, dsum); - result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 14); - - thSkB.reset(); - for (int i = 100; i < 164; i++) { thSkB.update(i); } //exact, disjoint - intersection.intersect(thSkB, dsum); //remove existing entries - result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - intersection.intersect(thSkB, dsum); - result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - } - - @Test - public void intersectionEmpty() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - final Intersection intersection = - new Intersection<>(new DoubleSummarySetOperations(mode, mode)); - intersection.intersect(sketch); - final CompactSketch result = intersection.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java deleted file mode 100644 index d3f747f1f..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleTest.java +++ /dev/null @@ -1,421 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import static org.testng.Assert.assertEquals; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.Sketches; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.UpdatableSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class AdoubleTest { - private final DoubleSummary.Mode mode = Mode.Sum; - - @Test - public void isEmpty() { - final int lgK = 12; - final DoubleSketch sketch = new DoubleSketch(lgK, mode); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertNotNull(sketch.toString()); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertNotNull(it); - Assert.assertFalse(it.next()); - } - - @Test - public void checkLowK() { - final UpdatableSketchBuilder bldr = new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(Mode.Sum)); - bldr.setNominalEntries(16); - final UpdatableSketch sk = bldr.build(); - assertEquals(sk.getLgK(), 4); - } - - @SuppressWarnings("deprecation") - @Test - public void serDeTest() { - final int lgK = 12; - final int K = 1 << lgK; - final DoubleSketch a1Sk = new DoubleSketch(lgK, Mode.AlwaysOne); - final int m = 2 * K; - for (int key = 0; key < m; key++) { - a1Sk.update(key, 1.0); - } - final double est1 = a1Sk.getEstimate(); - final MemorySegment seg = MemorySegment.ofArray(a1Sk.toByteArray()); - final DoubleSketch a1Sk2 = new DoubleSketch(seg, Mode.AlwaysOne); - final double est2 = a1Sk2.getEstimate(); - assertEquals(est1, est2); - } - - @Test - public void checkStringKey() { - final int lgK = 12; - final int K = 1 << lgK; - final DoubleSketch a1Sk1 = new DoubleSketch(lgK, Mode.AlwaysOne); - final int m = K / 2; - for (int key = 0; key < m; key++) { - a1Sk1.update(Integer.toHexString(key), 1.0); - } - assertEquals(a1Sk1.getEstimate(), K / 2.0); - } - - - @Test - public void isEmptyWithSampling() { - final float samplingProbability = 0.1f; - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)) - .setSamplingProbability(samplingProbability).build(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - } - - @Test - public void sampling() { - final float samplingProbability = 0.001f; - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)).setSamplingProbability(samplingProbability).build(); - sketch.update("a", 1.0); - Assert.assertFalse(sketch.isEmpty()); - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertTrue(sketch.getUpperBound(1) > 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); - Assert.assertEquals((float)sketch.getTheta(), samplingProbability); - Assert.assertEquals((float)sketch.getTheta(), samplingProbability); - } - - @Test - public void exactMode() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)).build(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - for (int i = 1; i <= 4096; i++) { - sketch.update(i, 1.0); - } - Assert.assertFalse(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 4096.0); - Assert.assertEquals(sketch.getUpperBound(1), 4096.0); - Assert.assertEquals(sketch.getLowerBound(1), 4096.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - - int count = 0; - final TupleSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 1.0); - count++; - } - Assert.assertEquals(count, 4096); - - sketch.reset(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - } - - @Test - // The moment of going into the estimation mode is, to some extent, an implementation detail - // Here we assume that presenting as many unique values as twice the nominal - // size of the sketch will result in estimation mode - public void estimationMode() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)).build(); - Assert.assertEquals(sketch.getEstimate(), 0.0); - for (int i = 1; i <= 8192; i++) { - sketch.update(i, 1.0); - } - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); - Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); - Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); - - int count = 0; - final TupleSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 1.0); - count++; - } - Assert.assertTrue(count >= 4096); - - sketch.reset(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getTheta(), 1.0); -} - - @Test - public void estimationModeWithSamplingNoResizing() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)) - .setSamplingProbability(0.5f) - .setResizeFactor(ResizeFactor.X1).build(); - for (int i = 0; i < 16384; i++) { - sketch.update(i, 1.0); - } - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 16384, 16384 * 0.01); - Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); - Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); - } - - @Test - public void updatesOfAllKeyTypes() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch.update(1L, 1.0); - sketch.update(2.0, 1.0); - final byte[] bytes = { 3, 3 }; - sketch.update(bytes, 1.0); - sketch.update(ByteBuffer.wrap(bytes), 1.0); // same as previous - sketch.update(ByteBuffer.wrap(bytes, 0, 1), 1.0); // slice of previous - final int[] ints = { 4 }; - sketch.update(ints, 1.0); - final long[] longs = { 5L }; - sketch.update(longs, 1.0); - sketch.update("a", 1.0); - Assert.assertEquals(sketch.getEstimate(), 7.0); - } - - @Test - public void doubleSummaryDefaultSumMode() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)).build(); - { - sketch.update(1, 1.0); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 1.0); - Assert.assertFalse(it.next()); - } - { - sketch.update(1, 0.7); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 1.7); - Assert.assertFalse(it.next()); - } - { - sketch.update(1, 0.8); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 2.5); - Assert.assertFalse(it.next()); - } - } - - @Test - public void doubleSummaryMinMode() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(DoubleSummary.Mode.Min)).build(); - { - sketch.update(1, 1.0); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 1.0); - Assert.assertFalse(it.next()); - } - { - sketch.update(1, 0.7); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 0.7); - Assert.assertFalse(it.next()); - } - { - sketch.update(1, 0.8); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 0.7); - Assert.assertFalse(it.next()); - } - } - @Test - - public void doubleSummaryMaxMode() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(DoubleSummary.Mode.Max)).build(); - { - sketch.update(1, 1.0); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 1.0); - Assert.assertFalse(it.next()); - } - { - sketch.update(1, 0.7); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 1.0); - Assert.assertFalse(it.next()); - } - { - sketch.update(1, 2.0); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - final TupleSketchIterator it = sketch.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 2.0); - Assert.assertFalse(it.next()); - } - } - - @SuppressWarnings("deprecation") - @Test - public void serializeDeserializeExact() throws Exception { - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch1.update(1, 1.0); - - final UpdatableSketch sketch2 = Sketches.heapifyUpdatableSketch( - MemorySegment.ofArray(sketch1.toByteArray()), - new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode)); - - Assert.assertEquals(sketch2.getEstimate(), 1.0); - final TupleSketchIterator it = sketch2.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 1.0); - Assert.assertFalse(it.next()); - - // the same key, so still one unique - sketch2.update(1, 1.0); - Assert.assertEquals(sketch2.getEstimate(), 1.0); - - sketch2.update(2, 1.0); - Assert.assertEquals(sketch2.getEstimate(), 2.0); - } - - @SuppressWarnings("deprecation") - @Test - public void serializeDeserializeEstimationNoResizing() throws Exception { - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)).setResizeFactor(ResizeFactor.X1).build(); - for (int j = 0; j < 10; j++) { - for (int i = 0; i < 8192; i++) { - sketch1.update(i, 1.0); - } - } - sketch1.trim(); - final byte[] bytes = sketch1.toByteArray(); - - //for binary testing - //TestUtil.writeBytesToFile(bytes, "UpdatableSketchWithDoubleSummary4K.sk"); - - final Sketch sketch2 = - Sketches.heapifySketch(MemorySegment.ofArray(bytes), new DoubleSummaryDeserializer()); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); - Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); - final TupleSketchIterator it = sketch2.iterator(); - int count = 0; - while (it.next()) { - Assert.assertEquals(it.getSummary().getValue(), 10.0); - count++; - } - Assert.assertEquals(count, 4096); - } - - @SuppressWarnings("deprecation") - @Test - public void serializeDeserializeSampling() throws Exception { - final int sketchSize = 16384; - final int numberOfUniques = sketchSize; - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)) - .setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); - for (int i = 0; i < numberOfUniques; i++) { - sketch1.update(i, 1.0); - } - final Sketch sketch2 = Sketches.heapifySketch( - MemorySegment.ofArray(sketch1.toByteArray()), new DoubleSummaryDeserializer()); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); - Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); - Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); - } - - @Test - public void checkUpdatableSketch() { - final DoubleSummaryFactory dsumFact = new DoubleSummaryFactory(mode); - //DoubleSummary dsum = dsumFact.newSummary(); - final UpdatableSketchBuilder bldr = new UpdatableSketchBuilder<>(dsumFact); - final UpdatableSketch usk = bldr.build(); - final byte[] byteArr = new byte[0]; - usk.update(byteArr, 0.0); - final int[] intArr = new int[0]; - usk.update(intArr, 1.0); - final long[] longArr = new long[0]; - usk.update(longArr, 2.0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void invalidSamplingProbability() { - new UpdatableSketchBuilder<> - (new DoubleSummaryFactory(mode)).setSamplingProbability(2f).build(); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java deleted file mode 100644 index cfbd999af..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/adouble/AdoubleUnionTest.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import static org.testng.Assert.fail; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.Union; -import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.UpdatableSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.testng.Assert; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class AdoubleUnionTest { - private final DoubleSummary.Mode mode = Mode.Sum; - - @Test - public void unionEmptySampling() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build(); - sketch.update(1, 1.0); - Assert.assertEquals(sketch.getRetainedEntries(), 0); // not retained due to low sampling probability - - final Union union = new Union<>(new DoubleSummarySetOperations(mode, mode)); - union.union(sketch); - final CompactSketch result = union.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertFalse(result.isEmpty()); - Assert.assertTrue(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - } - - @Test - public void unionExactMode() { - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch1.update(1, 1.0); - sketch1.update(1, 1.0); - sketch1.update(1, 1.0); - sketch1.update(2, 1.0); - - final UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - sketch2.update(2, 1.0); - sketch2.update(2, 1.0); - sketch2.update(3, 1.0); - sketch2.update(3, 1.0); - sketch2.update(3, 1.0); - - final Union union = new Union<>(new DoubleSummarySetOperations(mode, mode)); - union.union(sketch1); - union.union(sketch2); - CompactSketch result = union.getResult(); - Assert.assertEquals(result.getEstimate(), 3.0); - - final TupleSketchIterator it = result.iterator(); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 3.0); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 3.0); - Assert.assertTrue(it.next()); - Assert.assertEquals(it.getSummary().getValue(), 3.0); - Assert.assertFalse(it.next()); - - union.reset(); - result = union.getResult(); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - } - - @Test - public void unionEstimationMode() { - int key = 0; - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, 1.0); - } - - key -= 4096; // overlap half of the entries - final UpdatableSketch sketch2 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, 1.0); - } - - final Union union = new Union<>(4096, new DoubleSummarySetOperations(mode, mode)); - union.union(sketch1); - union.union(sketch2); - final CompactSketch result = union.getResult(); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - } - - @Test - public void unionMixedMode() { - int key = 0; - final UpdatableSketch sketch1 = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - for (int i = 0; i < 1000; i++) { - sketch1.update(key++, 1.0); - //System.out.println("theta1=" + sketch1.getTheta() + " " + sketch1.getThetaLong()); - } - - key -= 500; // overlap half of the entries - final UpdatableSketch sketch2 = - new UpdatableSketchBuilder<> - (new DoubleSummaryFactory(mode)).setSamplingProbability(0.2f).build(); - for (int i = 0; i < 20000; i++) { - sketch2.update(key++, 1.0); - //System.out.println("theta2=" + sketch2.getTheta() + " " + sketch2.getThetaLong()); - } - - final Union union = new Union<>(4096, new DoubleSummarySetOperations(mode, mode)); - union.union(sketch1); - union.union(sketch2); - final CompactSketch result = union.getResult(); - Assert.assertEquals(result.getEstimate(), 20500.0, 20500 * 0.01); - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - } - - @Test - public void checkUnionUpdateWithTheta() { - final Union union = new Union<>(new DoubleSummarySetOperations(mode, mode)); - UpdateSketch usk = null; - DoubleSummary dsum = null; - - try { union.union(usk, dsum); fail(); } - catch (final SketchesArgumentException e) { } - - usk = new UpdateSketchBuilder().build(); - try { union.union(usk, dsum); fail(); } - catch (final SketchesArgumentException e) { } - - dsum = new DoubleSummaryFactory(mode).newSummary(); - for (int i = 0; i < 10; i++) { usk.update(i); } - union.union(usk, dsum); - Assert.assertEquals(union.getResult().getEstimate(), 10.0); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java b/src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java deleted file mode 100644 index b69ef74d3..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/adouble/FilterTest.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.adouble; - -import java.util.Random; - -import org.apache.datasketches.tuple2.Filter; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.Sketches; -import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.UpdatableSketchBuilder; -import org.apache.datasketches.tuple2.adouble.DoubleSummary.Mode; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class FilterTest { - private static final int numberOfElements = 100; - private static final Random random = new Random(1);//deterministic for this class - private final DoubleSummary.Mode mode = Mode.Sum; - - @Test - public void emptySketch() { - final Sketch sketch = Sketches.createEmptySketch(); - - final Filter filter = new Filter<>(o -> true); - - final Sketch filteredSketch = filter.filter(sketch); - - Assert.assertEquals(filteredSketch.getEstimate(), 0.0); - Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); - Assert.assertTrue(filteredSketch.isEmpty()); - Assert.assertEquals(filteredSketch.getLowerBound(1), 0.0); - Assert.assertEquals(filteredSketch.getUpperBound(1), 0.0); - } - - @Test - public void nullSketch() { - final Filter filter = new Filter<>(o -> true); - - final Sketch filteredSketch = filter.filter(null); - - Assert.assertEquals(filteredSketch.getEstimate(), 0.0); - Assert.assertEquals(filteredSketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertTrue(filteredSketch.isEmpty()); - Assert.assertEquals(filteredSketch.getLowerBound(1), 0.0); - Assert.assertEquals(filteredSketch.getUpperBound(1), 0.0); - } - - @Test - public void filledSketchShouldBehaveTheSame() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - - fillSketch(sketch, numberOfElements, 0.0); - - final Filter filter = new Filter<>(o -> true); - - final Sketch filteredSketch = filter.filter(sketch); - - Assert.assertEquals(filteredSketch.getEstimate(), sketch.getEstimate()); - Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); - Assert.assertFalse(filteredSketch.isEmpty()); - Assert.assertEquals(filteredSketch.getLowerBound(1), sketch.getLowerBound(1)); - Assert.assertEquals(filteredSketch.getUpperBound(1), sketch.getUpperBound(1)); - } - - @Test - public void filledSketchShouldFilterOutElements() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - - fillSketch(sketch, numberOfElements, 0.0); - fillSketch(sketch, 2 * numberOfElements, 1.0); - - final Filter filter = new Filter<>(o -> o.getValue() < 0.5); - - final Sketch filteredSketch = filter.filter(sketch); - - Assert.assertEquals(filteredSketch.getEstimate(), numberOfElements); - Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); - Assert.assertFalse(filteredSketch.isEmpty()); - Assert.assertTrue(filteredSketch.getLowerBound(1) <= filteredSketch.getEstimate()); - Assert.assertTrue(filteredSketch.getUpperBound(1) >= filteredSketch.getEstimate()); - } - - @Test - public void filteringInEstimationMode() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build(); - - final int n = 10000; - fillSketch(sketch, n, 0.0); - fillSketch(sketch, 2 * n, 1.0); - - final Filter filter = new Filter<>(o -> o.getValue() < 0.5); - - final Sketch filteredSketch = filter.filter(sketch); - - Assert.assertEquals(filteredSketch.getEstimate(), n, n * 0.05); - Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); - Assert.assertFalse(filteredSketch.isEmpty()); - Assert.assertTrue(filteredSketch.getLowerBound(1) <= filteredSketch.getEstimate()); - Assert.assertTrue(filteredSketch.getUpperBound(1) >= filteredSketch.getEstimate()); - } - - @Test - public void nonEmptySketchWithNoEntries() { - final UpdatableSketch sketch = - new UpdatableSketchBuilder<>( - new DoubleSummaryFactory(mode)).setSamplingProbability(0.0001f).build(); - sketch.update(0, 0.0); - - Assert.assertFalse(sketch.isEmpty()); - Assert.assertEquals(sketch.getRetainedEntries(), 0); - - final Filter filter = new Filter<>(o -> true); - - final Sketch filteredSketch = filter.filter(sketch); - - Assert.assertFalse(filteredSketch.isEmpty()); - Assert.assertEquals(filteredSketch.getEstimate(), sketch.getEstimate()); - Assert.assertEquals(filteredSketch.getThetaLong(), sketch.getThetaLong()); - Assert.assertEquals(filteredSketch.getLowerBound(1), sketch.getLowerBound(1)); - Assert.assertEquals(filteredSketch.getUpperBound(1), sketch.getUpperBound(1)); - } - - private static void fillSketch(final UpdatableSketch sketch, - final int numberOfElements, final Double sketchValue) { - - - for (int cont = 0; cont < numberOfElements; cont++) { - sketch.update(random.nextLong(), sketchValue); - } - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java deleted file mode 100644 index 635fe19b2..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/CornerCaseTupleSetOperationsTest.java +++ /dev/null @@ -1,630 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.testng.Assert.assertTrue; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.Util; -import org.apache.datasketches.theta2.UpdateSketch; -import org.apache.datasketches.theta2.UpdateSketchBuilder; -import org.apache.datasketches.tuple2.AnotB; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Intersection; -import org.apache.datasketches.tuple2.Union; -import org.testng.annotations.Test; - -public class CornerCaseTupleSetOperationsTest { - - /* Hash Values - * 9223372036854775807 Theta = 1.0 - * - * 6730918654704304314 hash(3L)[0] >>> 1 GT_MIDP - * 4611686018427387904 Theta for p = 0.5f = MIDP - * 2206043092153046979 hash(2L)[0] >>> 1 LT_MIDP_V - * 1498732507761423037 hash(5L)[0] >>> 1 LTLT_MIDP_V - * - * 1206007004353599230 hash(6L)[0] >>> 1 GT_LOWP_V - * 922337217429372928 Theta for p = 0.1f = LOWP - * 593872385995628096 hash(4L)[0] >>> 1 LT_LOWP_V - * 405753591161026837 hash(1L)[0] >>> 1 LTLT_LOWP_V - */ - - private static final long GT_MIDP_V = 3L; - private static final float MIDP_FLT = 0.5f; - - private static final long GT_LOWP_V = 6L; - private static final float LOWP_FLT = 0.1f; - private static final long LT_LOWP_V = 4L; - - - private IntegerSummary.Mode mode = IntegerSummary.Mode.Min; - private IntegerSummary integerSummary = new IntegerSummary(mode); - private IntegerSummarySetOperations setOperations = new IntegerSummarySetOperations(mode, mode); - - private enum SkType { - EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05 - EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value - ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value - DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value - } - - //================================= - - @Test - public void emptyEmpty() { - IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); - IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = true; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyExact() { - IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); - IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); - UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, GT_MIDP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void EmptyDegenerate() { - IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); - IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyEstimation() { - IntegerSketch tupleA = getTupleSketch(SkType.EMPTY, 0, 0); - IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void exactEmpty() { - IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); - IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactExact() { - IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); - IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, GT_MIDP_V); - UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, GT_MIDP_V); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactDegenerate() { - IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); - IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); //entries = 0 - UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactEstimation() { - IntegerSketch tupleA = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); - IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void estimationEmpty() { - IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationExact() { - IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationDegenerate() { - IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_V); - IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationEstimation() { - IntegerSketch tupleA = getTupleSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_V); - IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void degenerateEmpty() { - IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); //entries = 0 - IntegerSketch tupleB = getTupleSketch(SkType.EMPTY, 0, 0); - UpdateSketch thetaB = getThetaSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateExact() { - IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); //entries = 0 - IntegerSketch tupleB = getTupleSketch(SkType.EXACT, 0, LT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.EXACT, 0, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateDegenerate() { - IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_V); //entries = 0 - IntegerSketch tupleB = getTupleSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateEstimation() { - IntegerSketch tupleA = getTupleSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_V); //entries = 0 - IntegerSketch tupleB = getTupleSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - UpdateSketch thetaB = getThetaSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_V); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(tupleA, tupleB, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - //================================= - - private void checks( - IntegerSketch tupleA, - IntegerSketch tupleB, - UpdateSketch thetaB, - double expectedIntersectTheta, - int expectedIntersectCount, - boolean expectedIntersectEmpty, - double expectedAnotbTheta, - int expectedAnotbCount, - boolean expectedAnotbEmpty, - double expectedUnionTheta, - int expectedUnionCount, - boolean expectedUnionEmpty) { - CompactSketch csk; - Intersection inter = new Intersection<>(setOperations); - AnotB anotb = new AnotB<>(); - Union union = new Union<>(16, setOperations); - - //Intersection Stateless Tuple, Tuple Updatable - csk = inter.intersect(tupleA, tupleB); - checkResult("Intersect Stateless Tuple, Tuple", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - //Intersection Stateless Tuple, Tuple Compact - csk = inter.intersect(tupleA.compact(), tupleB.compact()); - checkResult("Intersect Stateless Tuple, Tuple", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - //Intersection Stateless Tuple, Theta Updatable - csk = inter.intersect(tupleA, thetaB, integerSummary); //Tuple, Theta - checkResult("Intersect Stateless Tuple, Theta", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - //Intersection Stateless Tuple, Theta Compact - csk = inter.intersect(tupleA.compact(), thetaB.compact(), integerSummary); - checkResult("Intersect Stateless Tuple, Theta", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - - //AnotB Stateless Tuple, Tuple Updatable - csk = AnotB.aNotB(tupleA, tupleB); - checkResult("AnotB Stateless Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateless Tuple, Tuple Compact - csk = AnotB.aNotB(tupleA.compact(), tupleB.compact()); - checkResult("AnotB Stateless Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateless Tuple, Theta Updatable - csk = AnotB.aNotB(tupleA, thetaB); - checkResult("AnotB Stateless Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateless Tuple, Theta Compact - csk = AnotB.aNotB(tupleA.compact(), thetaB.compact()); - checkResult("AnotB Stateless Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - - //AnotB Stateful Tuple, Tuple Updatable - anotb.setA(tupleA); - anotb.notB(tupleB); - csk = anotb.getResult(true); - checkResult("AnotB Stateful Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateful Tuple, Tuple Compact - anotb.setA(tupleA.compact()); - anotb.notB(tupleB.compact()); - csk = anotb.getResult(true); - checkResult("AnotB Stateful Tuple, Tuple", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateful Tuple, Theta Updatable - anotb.setA(tupleA); - anotb.notB(thetaB); - csk = anotb.getResult(true); - checkResult("AnotB Stateful Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateful Tuple, Theta Compact - anotb.setA(tupleA.compact()); - anotb.notB(thetaB.compact()); - csk = anotb.getResult(true); - checkResult("AnotB Stateful Tuple, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - - //Union Stateless Tuple, Tuple Updatable - csk = union.union(tupleA, tupleB); - checkResult("Union Stateless Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //Union Stateless Tuple, Tuple Compact - csk = union.union(tupleA.compact(), tupleB.compact()); - checkResult("Union Stateless Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //Union Stateless Tuple, Theta Updatable - csk = union.union(tupleA, thetaB, integerSummary); - checkResult("Union Stateless Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //Union Stateless Tuple, Theta Compact - csk = union.union(tupleA.compact(), thetaB.compact(), integerSummary); - checkResult("Union Stateless Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - - //Union Stateful Tuple, Tuple Updatable - union.union(tupleA); - union.union(tupleB); - csk = union.getResult(true); - checkResult("Union Stateful Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //AnotB Stateful Tuple, Tuple Compact - union.union(tupleA.compact()); - union.union(tupleB.compact()); - csk = union.getResult(true); - checkResult("Union Stateful Tuple, Tuple", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //AnotB Stateful Tuple, Theta Updatable - union.union(tupleA); - union.union(thetaB, integerSummary); - csk = union.getResult(true); - checkResult("Union Stateful Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //AnotB Stateful Tuple, Theta Compact - union.union(tupleA.compact()); - union.union(thetaB.compact(), integerSummary); - csk = union.getResult(true); - checkResult("Union Stateful Tuple, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - - } - - private static void checkResult( - String comment, - CompactSketch csk, - double expectedTheta, - int expectedEntries, - boolean expectedEmpty) { - double actualTheta = csk.getTheta(); - int actualEntries = csk.getRetainedEntries(); - boolean actualEmpty = csk.isEmpty(); - - boolean thetaOk = actualTheta == expectedTheta; - boolean entriesOk = actualEntries == expectedEntries; - boolean emptyOk = actualEmpty == expectedEmpty; - if (!thetaOk || !entriesOk || !emptyOk) { - StringBuilder sb = new StringBuilder(); - sb.append(comment + ": "); - if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); } - if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); } - if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); } - throw new IllegalArgumentException(sb.toString()); - } - } - - private static IntegerSketch getTupleSketch( - SkType skType, - float p, - long updateKey) { - - IntegerSketch sk; - switch(skType) { - case EMPTY: { // { 1.0, 0, T} p and value are not used - sk = new IntegerSketch(4, 2, 1.0f, IntegerSummary.Mode.Min); - break; - } - case EXACT: { // { 1.0, >0, F} p is not used - sk = new IntegerSketch(4, 2, 1.0f, IntegerSummary.Mode.Min); - sk.update(updateKey, 1); - break; - } - case ESTIMATION: { // {<1.0, >0, F} - checkValidUpdate(p, updateKey); - sk = new IntegerSketch(4, 2, p, IntegerSummary.Mode.Min); - sk.update(updateKey, 1); - break; - } - case DEGENERATE: { // {<1.0, 0, F} - checkInvalidUpdate(p, updateKey); - sk = new IntegerSketch(4, 2, p, IntegerSummary.Mode.Min); - sk.update(updateKey, 1); // > theta - break; - } - - default: { return null; } // should not happen - } - return sk; - } - - //NOTE: p and value arguments are used for every case - private static UpdateSketch getThetaSketch( - SkType skType, - float p, - long updateKey) { - UpdateSketchBuilder bldr = new UpdateSketchBuilder(); - bldr.setLogNominalEntries(4); - bldr.setResizeFactor(ResizeFactor.X4); - - UpdateSketch sk; - switch(skType) { - case EMPTY: { // { 1.0, 0, T} p and value are not used - sk = bldr.build(); - break; - } - case EXACT: { // { 1.0, >0, F} p is not used - sk = bldr.build(); - sk.update(updateKey); - break; - } - case ESTIMATION: { // {<1.0, >0, F} - checkValidUpdate(p, updateKey); - bldr.setP(p); - sk = bldr.build(); - sk.update(updateKey); - break; - } - case DEGENERATE: { // {<1.0, 0, F} - checkInvalidUpdate(p, updateKey); - bldr.setP(p); - sk = bldr.build(); - sk.update(updateKey); - break; - } - - default: { return null; } // should not happen - } - return sk; - } - - private static void checkValidUpdate(float p, long updateKey) { - assertTrue( getLongHash(updateKey) < (long) (p * Long.MAX_VALUE)); - } - - private static void checkInvalidUpdate(float p, long updateKey) { - assertTrue( getLongHash(updateKey) > (long) (p * Long.MAX_VALUE)); - } - - static long getLongHash(long v) { - return (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java deleted file mode 100644 index 30ba32a1f..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/EngagementTest.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import static java.lang.Math.exp; -import static java.lang.Math.log; -import static java.lang.Math.round; -import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.AlwaysOne; -import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.Sum; - -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.Union; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class EngagementTest { - public static final int numStdDev = 2; - - @Test - public void computeEngagementHistogram() { - final int lgK = 8; //Using a larger sketch >= 9 will produce exact results for this little example - final int K = 1 << lgK; - final int days = 30; - int v = 0; - final IntegerSketch[] skArr = new IntegerSketch[days]; - for (int i = 0; i < days; i++) { - skArr[i] = new IntegerSketch(lgK, AlwaysOne); - } - for (int i = 0; i <= days; i++) { //31 generating indices for symmetry - final int numIds = numIDs(days, i); - final int numDays = numDays(days, i); - final int myV = v++; - for (int d = 0; d < numDays; d++) { - for (int id = 0; id < numIds; id++) { - skArr[d].update(myV + id, 1); - } - } - v += numIds; - } - unionOps(K, Sum, skArr); - } - - private static int numIDs(final int totalDays, final int index) { - final double d = totalDays; - final double i = index; - return (int)round(exp(i * log(d) / d)); - } - - private static int numDays(final int totalDays, final int index) { - final double d = totalDays; - final double i = index; - return (int)round(exp((d - i) * log(d) / d)); - } - - private static void unionOps(final int K, final IntegerSummary.Mode mode, final IntegerSketch ... sketches) { - final IntegerSummarySetOperations setOps = new IntegerSummarySetOperations(mode, mode); - final Union union = new Union<>(K, setOps); - final int len = sketches.length; - - for (final IntegerSketch isk : sketches) { - union.union(isk); - } - final CompactSketch result = union.getResult(); - final TupleSketchIterator itr = result.iterator(); - - final int[] numDaysArr = new int[len + 1]; //zero index is ignored - - while (itr.next()) { - //For each unique visitor from the result sketch, get the # days visited - final int numDaysVisited = itr.getSummary().getValue(); - //increment the number of visitors that visited numDays - numDaysArr[numDaysVisited]++; //values range from 1 to 30 - } - - println("\nEngagement Histogram:"); - println("Number of Unique Visitors by Number of Days Visited"); - printf("%12s%12s%12s%12s\n","Days Visited", "Estimate", "LB", "UB"); - int sumVisits = 0; - final double theta = result.getTheta(); - for (int i = 0; i < numDaysArr.length; i++) { - final int visitorsAtDaysVisited = numDaysArr[i]; - if (visitorsAtDaysVisited == 0) { continue; } - sumVisits += visitorsAtDaysVisited * i; - - final double estVisitorsAtDaysVisited = visitorsAtDaysVisited / theta; - final double lbVisitorsAtDaysVisited = result.getLowerBound(numStdDev, visitorsAtDaysVisited); - final double ubVisitorsAtDaysVisited = result.getUpperBound(numStdDev, visitorsAtDaysVisited); - - printf("%12d%12.0f%12.0f%12.0f\n", - i, estVisitorsAtDaysVisited, lbVisitorsAtDaysVisited, ubVisitorsAtDaysVisited); - } - - //The estimate and bounds of the total number of visitors comes directly from the sketch. - final double visitors = result.getEstimate(); - final double lbVisitors = result.getLowerBound(numStdDev); - final double ubVisitors = result.getUpperBound(numStdDev); - printf("\n%12s%12s%12s%12s\n","Totals", "Estimate", "LB", "UB"); - printf("%12s%12.0f%12.0f%12.0f\n", "Visitors", visitors, lbVisitors, ubVisitors); - - //The total number of visits, however, is a scaled metric and takes advantage of the fact that - //the retained entries in the sketch is a uniform random sample of all unique visitors, and - //the the rest of the unique users will likely behave in the same way. - final double estVisits = sumVisits / theta; - final double lbVisits = estVisits * lbVisitors / visitors; - final double ubVisits = estVisits * ubVisitors / visitors; - printf("%12s%12.0f%12.0f%12.0f\n\n", "Visits", estVisits, lbVisits, ubVisits); - } - - /** - * @param o object to print - */ - private static void println(final Object o) { - printf("%s\n", o.toString()); - } - - /** - * @param fmt format - * @param args arguments - */ - private static void printf(final String fmt, final Object ... args) { - //System.out.printf(fmt, args); //Enable/Disable printing here - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java deleted file mode 100644 index 676d82556..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/IntegerSketchTest.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import static org.testng.Assert.assertEquals; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.tuple2.AnotB; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Intersection; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class IntegerSketchTest { - - @SuppressWarnings("deprecation") - @Test - public void serDeTest() { - final int lgK = 12; - final int K = 1 << lgK; - final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; - final IntegerSketch a1Sk = new IntegerSketch(lgK, a1Mode); - final int m = 2 * K; - for (int i = 0; i < m; i++) { - a1Sk.update(i, 1); - } - final double est1 = a1Sk.getEstimate(); - final MemorySegment seg = MemorySegment.ofArray(a1Sk.toByteArray()); - final IntegerSketch a1Sk2 = new IntegerSketch(seg, a1Mode); - final double est2 = a1Sk2.getEstimate(); - assertEquals(est1, est2); - } - - @Test - public void intersectTest() { - final int lgK = 12; - final int K = 1 << lgK; - final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; - final IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); - final IntegerSketch a1Sk2 = new IntegerSketch(lgK, a1Mode); - final int m = 2 * K; - for (int i = 0; i < m; i++) { - a1Sk1.update(i, 1); - a1Sk2.update(i + m/2, 1); - } - final Intersection inter = - new Intersection<>(new IntegerSummarySetOperations(a1Mode, a1Mode)); - inter.intersect(a1Sk1); - inter.intersect(a1Sk2); - final CompactSketch csk = inter.getResult(); - assertEquals(csk.getEstimate(), K * 1.0, K * .03); - } - - @Test - public void aNotBTest() { - final int lgK = 4; - final int u = 5; - final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; - final IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); - final IntegerSketch a1Sk2 = null;//new IntegerSketch(lgK, a1Mode); - final AnotB anotb = new AnotB<>(); - for (int i = 0; i < u; i++) { - a1Sk1.update(i, 1); - } - anotb.setA(a1Sk1); - anotb.notB(a1Sk2); - final CompactSketch cSk = anotb.getResult(true); - assertEquals((int)cSk.getEstimate(), u); - } - - @Test - public void checkMinMaxMode() { - final int lgK = 12; - final int K = 1 << lgK; - final IntegerSummary.Mode minMode = IntegerSummary.Mode.Min; - final IntegerSummary.Mode maxMode = IntegerSummary.Mode.Max; - final IntegerSketch a1Sk1 = new IntegerSketch(lgK, minMode); - final IntegerSketch a1Sk2 = new IntegerSketch(lgK, maxMode); - final int m = K / 2; - for (int key = 0; key < m; key++) { - a1Sk1.update(key, 1); - a1Sk1.update(key, 0); - a1Sk1.update(key, 2); - a1Sk2.update(key + m/2, 1); - a1Sk2.update(key + m/2, 0); - a1Sk2.update(key + m/2, 2); - } - final double est1 = a1Sk1.getEstimate(); - final double est2 = a1Sk2.getEstimate(); - assertEquals(est1, est2); - } - - @Test - public void checkStringKey() { - final int lgK = 12; - final int K = 1 << lgK; - final IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne; - final IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode); - final int m = K / 2; - for (int key = 0; key < m; key++) { - a1Sk1.update(Integer.toHexString(key), 1); - } - assertEquals(a1Sk1.getEstimate(), K / 2.0); - } - - /** - * @param o object to print - */ - static void println(final Object o) { - //System.out.println(o.toString()); //Disable - } - - /** - * @param fmt format - * @param args arguments - */ - static void printf(final String fmt, final Object ... args) { - //System.out.printf(fmt, args); //Disable - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java deleted file mode 100644 index edf6c6691..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/MikhailsBugTupleTest.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import org.apache.datasketches.tuple2.AnotB; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Intersection; -import org.testng.annotations.Test; - -/** - * Issue #368, from Mikhail Lavrinovich 12 OCT 2021 - * The failure was AnotB(estimating {<1.0,1,F}, Intersect(estimating{<1.0,1,F}, newDegenerative{<1.0,0,T}, - * Which should be equal to AnotB(estimating{<1.0,1,F}, new{1.0,0,T} = estimating{<1.0, 1, F}. The AnotB - * threw a null pointer exception because it was not properly handling sketches with zero entries. - */ -public class MikhailsBugTupleTest { - - @Test - public void mikhailsBug() { - IntegerSketch x = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); - IntegerSketch y = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); - x.update(1L, 1); - IntegerSummarySetOperations setOperations = - new IntegerSummarySetOperations(IntegerSummary.Mode.Min, IntegerSummary.Mode.Min); - Intersection intersection = new Intersection<>(setOperations); - CompactSketch intersect = intersection.intersect(x, y); - AnotB.aNotB(x, intersect); // NPE was here - } - - //@Test - public void withTuple() { - IntegerSketch x = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); - IntegerSketch y = new IntegerSketch(12, 2, 0.1f, IntegerSummary.Mode.Min); - x.update(1L, 1); - println("Tuple x: Estimating {<1.0,1,F}"); - println(x.toString()); - println("Tuple y: NewDegenerative {<1.0,0,T}"); - println(y.toString()); - IntegerSummarySetOperations setOperations = - new IntegerSummarySetOperations(IntegerSummary.Mode.Min, IntegerSummary.Mode.Min); - Intersection intersection = new Intersection<>(setOperations); - CompactSketch intersect = intersection.intersect(x, y); - println("Tuple Intersect(Estimating, NewDegen) = new {1.0, 0, T}"); - println(intersect.toString()); - CompactSketch csk = AnotB.aNotB(x, intersect); - println("Tuple AnotB(Estimating, New) = estimating {<1.0, 1, F}"); - println(csk.toString()); - } - - /** - * Println an object - * @param o object to print - */ - private static void println(Object o) { - //System.out.println(o.toString()); //disable here - } -} \ No newline at end of file diff --git a/src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java b/src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java deleted file mode 100644 index 12d0e52f6..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/aninteger/ParameterLeakageTest.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.aninteger; - -import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.Min; -import static org.apache.datasketches.tuple2.aninteger.IntegerSummary.Mode.Sum; - -import org.apache.datasketches.tuple2.AnotB; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Intersection; -//import org.apache.datasketches.tuple2.UpdatableSketch; -import org.apache.datasketches.tuple2.Sketch; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.Union; -import org.testng.annotations.Test; - -/** - * These tests check to make sure that no summary objects, which are mutable, and created - * as needed internally within a tuple sketch never leak into the result sketch. - * - * @author Lee Rhodes - * - */ -public class ParameterLeakageTest { - IntegerSummarySetOperations setOps = new IntegerSummarySetOperations(Sum, Min); - - @Test - public void checkUnion() { - IntegerSketch sk1 = new IntegerSketch(4, Sum); - sk1.update(1, 1); - IntegerSummary sk1sum = captureSummaries(sk1)[0]; - - IntegerSketch sk2 = new IntegerSketch(4, Sum); - sk2.update(2, 1); - IntegerSummary sk2sum = captureSummaries(sk2)[0]; - - - Union union = new Union<>(setOps); - - CompactSketch csk = union.union(sk1, sk2); - IntegerSummary[] summaries = captureSummaries(csk); - println("Union Count: " + summaries.length); - - for (IntegerSummary isum : summaries) { - if ((isum == sk1sum) || (isum == sk2sum)) { - throw new IllegalArgumentException("Parameter Leakage"); - } - } - } - - @Test - public void checkIntersectStateless() { - IntegerSketch sk1 = new IntegerSketch(4, Sum); - sk1.update(1, 1); - IntegerSummary sk1sum = captureSummaries(sk1)[0]; - - IntegerSketch sk2 = new IntegerSketch(4, Sum); - sk2.update(1, 1); - IntegerSummary sk2sum = captureSummaries(sk2)[0]; - - Intersection intersect = new Intersection<>(setOps); - - CompactSketch csk = intersect.intersect(sk1, sk2); - IntegerSummary[] summaries = captureSummaries(csk); - println("Intersect Stateless Count: " + summaries.length); - - for (IntegerSummary isum : summaries) { - if ((isum == sk1sum) || (isum == sk2sum)) { - throw new IllegalArgumentException("Parameter Leakage"); - } - } - } - - @Test - public void checkIntersectStateful() { - IntegerSketch sk1 = new IntegerSketch(4, Sum); - sk1.update(1, 1); - IntegerSummary sk1sum = captureSummaries(sk1)[0]; - - IntegerSketch sk2 = new IntegerSketch(4, Sum); - sk2.update(1, 1); - IntegerSummary sk2sum = captureSummaries(sk2)[0]; - - Intersection intersect = new Intersection<>(setOps); - - intersect.intersect(sk1); - intersect.intersect(sk2); - CompactSketch csk = intersect.getResult(); - - IntegerSummary[] summaries = captureSummaries(csk); - println("Intersect Stateful Count: " + summaries.length); - - for (IntegerSummary isum : summaries) { - if ((isum == sk1sum) || (isum == sk2sum)) { - throw new IllegalArgumentException("Parameter Leakage"); - } - } - } - - @Test - public void checkAnotbStateless() { - IntegerSketch sk1 = new IntegerSketch(4, Sum); - sk1.update(1, 1); - CompactSketch csk1 = sk1.compact(); - IntegerSummary sk1sum = captureSummaries(csk1)[0]; - - IntegerSketch sk2 = new IntegerSketch(4, Sum); //EMPTY - - CompactSketch csk = AnotB.aNotB(csk1, sk2); - IntegerSummary[] summaries = captureSummaries(csk); - println("AnotB Stateless Count: " + summaries.length); - - for (IntegerSummary isum : summaries) { - if (isum == sk1sum) { - throw new IllegalArgumentException("Parameter Leakage"); - } - } - } - - @Test - public void checkAnotbStateful() { - IntegerSketch sk1 = new IntegerSketch(4, Sum); - sk1.update(1, 1); - CompactSketch csk1 = sk1.compact(); - IntegerSummary sk1sum = captureSummaries(csk1)[0]; - - IntegerSketch sk2 = new IntegerSketch(4, Sum); //EMPTY - - AnotB anotb = new AnotB<>(); - - anotb.setA(csk1); - anotb.notB(sk2); - - CompactSketch csk = anotb.getResult(true); - IntegerSummary[] summaries = captureSummaries(csk); - println("AnotB Stateful Count: " + summaries.length); - - for (IntegerSummary isum : summaries) { - if (isum == sk1sum) { - throw new IllegalArgumentException("Parameter Leakage"); - } - } - } - - private static IntegerSummary[] captureSummaries(Sketch sk) { - int entries = sk.getRetainedEntries(); - IntegerSummary[] intSumArr = new IntegerSummary[entries]; - int cnt = 0; - TupleSketchIterator it = sk.iterator(); - while (it.next()) { - intSumArr[cnt] = it.getSummary(); - cnt++; - } - return intSumArr; - } - - /** - * @param o Object to print - */ - static void println(Object o) { - //System.out.println(o.toString()); //disable - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java deleted file mode 100644 index 067375ec7..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/AodSketchCrossLanguageTest.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; -import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; -import static org.apache.datasketches.common.TestUtil.cppPath; -import static org.apache.datasketches.common.TestUtil.javaPath; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; -import java.io.IOException; -import java.nio.file.Files; - -import org.testng.annotations.Test; - -/** - * Serialize binary sketches to be tested by C++ code. - * Test deserialization of binary sketches serialized by C++ code. - */ -public class AodSketchCrossLanguageTest { - - @Test(groups = {GENERATE_JAVA_FILES}) - public void generateBinariesForCompatibilityTestingOneValue() throws IOException { - final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; - for (int n: nArr) { - final ArrayOfDoublesUpdatableSketch sk = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < n; i++) { - sk.update(i, new double[] {i}); - } - Files.newOutputStream(javaPath.resolve("aod_1_n" + n + "_java.sk")).write(sk.compact().toByteArray()); - } - } - - @Test(groups = {GENERATE_JAVA_FILES}) - public void generateBinariesForCompatibilityTestingThreeValues() throws IOException { - final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; - for (int n: nArr) { - final ArrayOfDoublesUpdatableSketch sk = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(3).build(); - for (int i = 0; i < n; i++) { - sk.update(i, new double[] {i, i, i}); - } - Files.newOutputStream(javaPath.resolve("aod_3_n" + n + "_java.sk")).write(sk.compact().toByteArray()); - } - } - - @Test(groups = {GENERATE_JAVA_FILES}) - public void generateBinariesForCompatibilityTestingNonEmptyNoEntries() throws IOException { - final ArrayOfDoublesUpdatableSketch sk = - new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); - sk.update(1, new double[] {1}); - assertFalse(sk.isEmpty()); - assertEquals(sk.getRetainedEntries(), 0); - Files.newOutputStream(javaPath.resolve("aod_1_non_empty_no_entries_java.sk")).write(sk.compact().toByteArray()); - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeFromCppOneValue() throws IOException { - final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; - for (int n: nArr) { - final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_1_n" + n + "_cpp.sk")); - final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); - assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); - assertEquals(sketch.getEstimate(), n, n * 0.03); - assertEquals(sketch.getNumValues(), 1); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - assertTrue(it.getKey() < sketch.getThetaLong()); - } - } - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeFromCppThreeValues() throws IOException { - final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; - for (int n: nArr) { - final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_3_n" + n + "_cpp.sk")); - final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); - assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); - assertEquals(sketch.getEstimate(), n, n * 0.03); - assertEquals(sketch.getNumValues(), 3); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - assertTrue(it.getKey() < sketch.getThetaLong()); - assertEquals(it.getValues()[0], it.getValues()[1]); - assertEquals(it.getValues()[0], it.getValues()[2]); - } - } - } - - @Test(groups = {CHECK_CPP_FILES}) - public void deserializeFromCppOneValueNonEmptyNoEntries() throws IOException { - final byte[] bytes = Files.readAllBytes(cppPath.resolve("aod_1_non_empty_no_entries_cpp.sk")); - final ArrayOfDoublesSketch sketch = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(bytes)); - assertFalse(sketch.isEmpty()); - assertEquals(sketch.getRetainedEntries(), 0); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java deleted file mode 100644 index dde5f28b9..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesAnotBTest.java +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ArrayOfDoublesAnotBTest { - - @Test - public void nullOrEmptyInput() { - // calling getResult() before calling update() should yield an empty set - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - - ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - try { - aNotB.update(sketch, null); - fail(); - } catch (SketchesArgumentException e) {} - - try { - aNotB.update(null, sketch); - fail(); - } catch (SketchesArgumentException e) {} - - aNotB.update(sketch, sketch); - result = aNotB.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - - @Test - public void emptyA() { - ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketchB.update(1, new double[] {1.0}); - sketchB.update(2, new double[] {1.0}); - sketchB.update(3, new double[] {1.0}); - sketchB.update(4, new double[] {1.0}); - sketchB.update(5, new double[] {1.0}); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - - ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); - aNotB.update(sketchA, sketchB); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - - @Test - public void emptyB() { - ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketchA.update(1, new double[] {1.0}); - sketchA.update(2, new double[] {1.0}); - sketchA.update(3, new double[] {1.0}); - sketchA.update(4, new double[] {1.0}); - sketchA.update(5, new double[] {1.0}); - ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); - - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketchA, sketchB); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 5); - Assert.assertEquals(result.getEstimate(), 5.0); - Assert.assertEquals(result.getLowerBound(1), 5.0); - Assert.assertEquals(result.getUpperBound(1), 5.0); - - ArrayOfDoublesSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - } - - @Test - public void aSameAsB() { - ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch.update(1, new double[] {1.0}); - sketch.update(2, new double[] {1.0}); - sketch.update(3, new double[] {1.0}); - sketch.update(4, new double[] {1.0}); - sketch.update(5, new double[] {1.0}); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketch, sketch); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - - @Test - public void exactMode() { - ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketchA.update(1, new double[] {1}); - sketchA.update(2, new double[] {1}); - sketchA.update(3, new double[] {1}); - sketchA.update(4, new double[] {1}); - sketchA.update(5, new double[] {1}); - - ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketchB.update(3, new double[] {1}); - sketchB.update(4, new double[] {1}); - sketchB.update(5, new double[] {1}); - sketchB.update(6, new double[] {1}); - sketchB.update(7, new double[] {1}); - - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketchA, sketchB); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 2); - Assert.assertEquals(result.getEstimate(), 2.0); - Assert.assertEquals(result.getLowerBound(1), 2.0); - Assert.assertEquals(result.getUpperBound(1), 2.0); - ArrayOfDoublesSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - } - - @Test - public void exactModeTwoDoubles() { - ArrayOfDoublesUpdatableSketchBuilder bldr = new ArrayOfDoublesUpdatableSketchBuilder(); - bldr.setNominalEntries(16); - bldr.setNumberOfValues(2); - bldr.setResizeFactor(ResizeFactor.X1); - - double[] valuesArr1 = {1.0, 2.0}; - double[] valuesArr2 = {2.0, 4.0}; - ArrayOfDoublesUpdatableSketch sketch1 = bldr.build(); - sketch1.update("a", valuesArr1); - sketch1.update("b", valuesArr2); - sketch1.update("c", valuesArr1); - sketch1.update("d", valuesArr1); - ArrayOfDoublesUpdatableSketch sketch2 = bldr.build(); - sketch2.update("c", valuesArr2); - sketch2.update("d", valuesArr2); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketch1, sketch2); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 2); - double[] resultArr = new double[] {2.0,4.0,1.0,2.0}; //order specific to this test - Assert.assertEquals(result.getValuesAsOneDimension(), resultArr); - } - - @Test - public void exactModeCustomSeed() { - long seed = 1234567890; - ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - sketchA.update(1, new double[] {1}); - sketchA.update(2, new double[] {1}); - sketchA.update(3, new double[] {1}); - sketchA.update(4, new double[] {1}); - sketchA.update(5, new double[] {1}); - - ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - sketchB.update(3, new double[] {1}); - sketchB.update(4, new double[] {1}); - sketchB.update(5, new double[] {1}); - sketchB.update(6, new double[] {1}); - sketchB.update(7, new double[] {1}); - - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildAnotB(); - aNotB.update(sketchA, sketchB); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 2); - Assert.assertEquals(result.getEstimate(), 2.0); - Assert.assertEquals(result.getLowerBound(1), 2.0); - Assert.assertEquals(result.getUpperBound(1), 2.0); - ArrayOfDoublesSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - } - - @Test - public void estimationMode() { - int key = 0; - ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketchA.update(key++, new double[] {1}); - } - - key -= 4096; // overlap half of the entries - ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketchB.update(key++, new double[] {1}); - } - - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketchA, sketchB); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - ArrayOfDoublesSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - - // same operation, but compact sketches and off-heap result - aNotB.update(sketchA.compact(), sketchB.compact()); - result = aNotB.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - } - - @Test - public void estimationModeLargeB() { - int key = 0; - ArrayOfDoublesUpdatableSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 10000; i++) { - sketchA.update(key++, new double[] {1}); - } - - key -= 2000; // overlap - ArrayOfDoublesUpdatableSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 100000; i++) { - sketchB.update(key++, new double[] {1}); - } - - final int expected = 10000 - 2000; - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketchA, sketchB); - ArrayOfDoublesCompactSketch result = aNotB.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), expected, expected * 0.1); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - ArrayOfDoublesSketchIterator it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - - // same operation, but compact sketches and off-heap result - aNotB.update(sketchA.compact(), sketchB.compact()); - result = aNotB.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), expected, expected * 0.1); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - it = result.iterator(); - while (it.next()) { - Assert.assertEquals(it.getValues(), new double[] {1}); - } - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleSeedA() { - ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(sketch, null); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleSeedB() { - ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - aNotB.update(null, sketch); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleSeeds() { - ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); - ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(2).build(); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(3).buildAnotB(); - aNotB.update(sketchA, sketchB); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleNumValues() { - ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(1).build(); - ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(3).buildAnotB(); - aNotB.update(sketchA, sketchB); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java deleted file mode 100644 index 49699311d..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesCompactSketchTest.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.testng.Assert.assertEquals; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.Util; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ArrayOfDoublesCompactSketchTest { - - @Test - public void heapToDirectExactTwoDoubles() { - ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - sketch1.update("a", new double[] {1, 2}); - sketch1.update("b", new double[] {1, 2}); - sketch1.update("c", new double[] {1, 2}); - sketch1.update("d", new double[] {1, 2}); - sketch1.update("a", new double[] {1, 2}); - sketch1.update("b", new double[] {1, 2}); - sketch1.update("c", new double[] {1, 2}); - sketch1.update("d", new double[] {1, 2}); - ArrayOfDoublesCompactSketch csk = sketch1.compact(); - MemorySegment seg = MemorySegment.ofArray(csk.toByteArray()); - ArrayOfDoublesSketch sketch2 = new DirectArrayOfDoublesCompactSketch(seg); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 4.0); - Assert.assertEquals(sketch2.getUpperBound(1), 4.0); - Assert.assertEquals(sketch2.getLowerBound(1), 4.0); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 4); - for (double[] array: values) { - Assert.assertEquals(array.length, 2); - Assert.assertEquals(array[0], 2.0); - Assert.assertEquals(array[1], 4.0); - } - } - - @Test - public void directToHeapExactTwoDoubles() { - ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(MemorySegment.ofArray(new byte[1000000])); - sketch1.update("a", new double[] {1, 2}); - sketch1.update("b", new double[] {1, 2}); - sketch1.update("c", new double[] {1, 2}); - sketch1.update("d", new double[] {1, 2}); - sketch1.update("a", new double[] {1, 2}); - sketch1.update("b", new double[] {1, 2}); - sketch1.update("c", new double[] {1, 2}); - sketch1.update("d", new double[] {1, 2}); - ArrayOfDoublesSketch sketch2 = - new HeapArrayOfDoublesCompactSketch( - MemorySegment.ofArray(sketch1.compact(MemorySegment.ofArray(new byte[1000000])).toByteArray())); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 4.0); - Assert.assertEquals(sketch2.getUpperBound(1), 4.0); - Assert.assertEquals(sketch2.getLowerBound(1), 4.0); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 4); - for (double[] array: values) { - Assert.assertEquals(array.length, 2); - Assert.assertEquals(array[0], 2.0); - Assert.assertEquals(array[1], 4.0); - } - } - - @SuppressWarnings("unused") - @Test - public void checkGetValuesAndKeysMethods() { - ArrayOfDoublesUpdatableSketchBuilder bldr = new ArrayOfDoublesUpdatableSketchBuilder(); - bldr.setNominalEntries(16).setNumberOfValues(2); - - HeapArrayOfDoublesQuickSelectSketch hqssk = (HeapArrayOfDoublesQuickSelectSketch) bldr.build(); - hqssk.update("a", new double[] {1, 2}); - hqssk.update("b", new double[] {3, 4}); - hqssk.update("c", new double[] {5, 6}); - hqssk.update("d", new double[] {7, 8}); - final double[][] values = hqssk.getValues(); - final double[] values1d = hqssk.getValuesAsOneDimension(); - final long[] keys = hqssk.getKeys(); - - HeapArrayOfDoublesCompactSketch hcsk = (HeapArrayOfDoublesCompactSketch)hqssk.compact(); - final double[][] values2 = hcsk.getValues(); - final double[] values1d2 = hcsk.getValuesAsOneDimension(); - final long[] keys2 = hcsk.getKeys(); - assertEquals(values2, values); - assertEquals(values1d2, values1d); - assertEquals(keys2, keys); - - MemorySegment hqsskSeg = MemorySegment.ofArray(hqssk.toByteArray()); - - DirectArrayOfDoublesQuickSelectSketchR dqssk = - (DirectArrayOfDoublesQuickSelectSketchR)ArrayOfDoublesSketch.wrap(hqsskSeg, Util.DEFAULT_UPDATE_SEED); - final double[][] values3 = dqssk.getValues(); - final double[] values1d3 = dqssk.getValuesAsOneDimension(); - final long[] keys3 = dqssk.getKeys(); - assertEquals(values3, values); - assertEquals(values1d3, values1d); - assertEquals(keys3, keys); - - MemorySegment hcskSeg = MemorySegment.ofArray(hcsk.toByteArray()); - - DirectArrayOfDoublesCompactSketch dcsk2 = - (DirectArrayOfDoublesCompactSketch)ArrayOfDoublesSketch.wrap(hcskSeg, Util.DEFAULT_UPDATE_SEED); - final double[][] values4 = dqssk.getValues(); - final double[] values1d4 = dqssk.getValuesAsOneDimension(); - final long[] keys4 = dqssk.getKeys(); - assertEquals(values4, values); - assertEquals(values1d4, values1d); - assertEquals(keys4, keys); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java deleted file mode 100644 index 94329f935..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesIntersectionTest.java +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.testng.Assert.fail; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ArrayOfDoublesIntersectionTest { - - private static ArrayOfDoublesCombiner combiner = new ArrayOfDoublesCombiner() { - - @Override - public double[] combine(final double[] a, final double[] b) { - for (int i = 0; i < a.length; i++) { - a[i] += b[i]; - } - return a; - } - }; - - @Test - public void nullInput() { - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - try { - intersection.intersect(null, null); - fail(); - } catch (SketchesArgumentException e) {} - - } - - @Test - public void empty() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - intersection.intersect(sketch1, null); - final ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getValues().length, 0); - } - - @Test - public void degenerateWithExact() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); - sketch1.update("a", new double[] {1}); // this happens to get rejected because of sampling with low probability - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch2.update(1, new double[] {1}); - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - intersection.intersect(sketch1, null); - intersection.intersect(sketch2, null); - final ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertFalse(result.isEmpty()); //Degenerate - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 184.0); - Assert.assertEquals(result.getValues().length, 0); - } - - @Test - public void heapExactWithEmpty() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1}); - sketch1.update(2, new double[] {1}); - sketch1.update(3, new double[] {1}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - intersection.intersect(sketch1, null); - intersection.intersect(sketch2, null); - final ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - - @Test - public void directExactWithEmpty() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder() - .build(MemorySegment.ofArray(new byte[1000000])); - sketch1.update(1, new double[] {1}); - sketch1.update(2, new double[] {1}); - sketch1.update(3, new double[] {1}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder() - .build(MemorySegment.ofArray(new byte[1000000])); - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder(). - buildIntersection(MemorySegment.ofArray(new byte[1000000])); - intersection.intersect(sketch1, null); - intersection.intersect(sketch2, null); - final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertTrue(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - } - - @Test - public void heapExactMode() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1}); - sketch1.update(1, new double[] {1}); - sketch1.update(2, new double[] {1}); - sketch1.update(2, new double[] {1}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch2.update(2, new double[] {1}); - sketch2.update(2, new double[] {1}); - sketch2.update(3, new double[] {1}); - sketch2.update(3, new double[] {1}); - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - intersection.intersect(sketch1, combiner); - intersection.intersect(sketch2, combiner); - ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 1); - Assert.assertEquals(result.getEstimate(), 1.0); - Assert.assertEquals(result.getLowerBound(1), 1.0); - Assert.assertEquals(result.getUpperBound(1), 1.0); - final double[][] values = result.getValues(); - for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i][0], 4.0); - } - - intersection.reset(); - try { - intersection.intersect(null, null); - fail(); - } catch (SketchesArgumentException e) { } - } - - @Test - public void heapDisjointEstimationMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - intersection.intersect(sketch1, combiner); - intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertFalse(result.isEmpty()); //Degenerate case - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 3.0); - Assert.assertEquals(result.getValues().length, 0); - Assert.assertTrue(result.thetaLong_ < Long.MAX_VALUE); - } - - @Test - public void directDisjointEstimationMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder(). - buildIntersection(MemorySegment.ofArray(new byte[1000000])); - intersection.intersect(sketch1, combiner); - intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 0); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getUpperBound(1), 3.0); - Assert.assertEquals(result.getValues().length, 0); - } - - @Test - public void heapEstimationMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - intersection.intersect(sketch1, combiner); - intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - final double[][] values = result.getValues(); - for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i][0], 2.0); - } - } - - @Test - public void directEstimationMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(MemorySegment.ofArray(new byte[1000000])); - intersection.intersect(sketch1, combiner); - intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 4096.0, 4096 * 0.03); // crude estimate of RSE(95%) = 2 / sqrt(result.getRetainedEntries()) - Assert.assertTrue(result.getLowerBound(1) <= result.getEstimate()); - Assert.assertTrue(result.getUpperBound(1) > result.getEstimate()); - final double[][] values = result.getValues(); - for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i][0], 2.0); - } - } - - @Test - public void heapExactModeCustomSeed() { - final long seed = 1234567890; - - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - sketch1.update(1, new double[] {1}); - sketch1.update(1, new double[] {1}); - sketch1.update(2, new double[] {1}); - sketch1.update(2, new double[] {1}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - sketch2.update(2, new double[] {1}); - sketch2.update(2, new double[] {1}); - sketch2.update(3, new double[] {1}); - sketch2.update(3, new double[] {1}); - - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildIntersection(); - intersection.intersect(sketch1, combiner); - intersection.intersect(sketch2, combiner); - final ArrayOfDoublesCompactSketch result = intersection.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getRetainedEntries(), 1); - Assert.assertEquals(result.getEstimate(), 1.0); - Assert.assertEquals(result.getLowerBound(1), 1.0); - Assert.assertEquals(result.getUpperBound(1), 1.0); - final double[][] values = result.getValues(); - for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i][0], 4.0); - } - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleSeeds() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); - final ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().setSeed(2).buildIntersection(); - intersection.intersect(sketch, combiner); - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java deleted file mode 100644 index 4992aa4c5..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesQuickSelectSketchTest.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ArrayOfDoublesQuickSelectSketchTest { - - @Test(expectedExceptions = SketchesArgumentException.class) - public void invalidSamplingProbability() { - new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(2f); - } - - @Test - public void heapToDirectExactTwoDoubles() { - double[] valuesArr = {1.0, 2.0}; - ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - sketch1.update("a", valuesArr); - sketch1.update("b", valuesArr); - sketch1.update("c", valuesArr); - sketch1.update("d", valuesArr); - sketch1.update("a", valuesArr); - noopUpdates(sketch1, valuesArr); - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); - sketch2.update("b", valuesArr); - sketch2.update("c", valuesArr); - sketch2.update("d", valuesArr); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 4.0); - Assert.assertEquals(sketch2.getUpperBound(1), 4.0); - Assert.assertEquals(sketch2.getLowerBound(1), 4.0); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 4); - for (double[] array: values) { - Assert.assertEquals(array.length, 2); - Assert.assertEquals(array[0], 2.0); - Assert.assertEquals(array[1], 4.0); - } - } - - @Test - public void heapToDirectWithSeed() { - long seed = 1; - double[] values = {1.0}; - - ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - sketch1.update("a", values); - sketch1.update("b", values); - sketch1.update("c", values); - - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray()), seed); - sketch2.update("b", values); - sketch2.update("c", values); - sketch2.update("d", values); - - Assert.assertEquals(sketch2.getEstimate(), 4.0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInsertExceptions() { - ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - sketch1.update("a", new double[] {1.0}); - } - - @Test - public void directToHeapExactTwoDoubles() { - double[] valuesArr = {1.0, 2.0}; - ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder(). - setNumberOfValues(2).build(MemorySegment.ofArray(new byte[1000000])); - sketch1.update("a", valuesArr); - sketch1.update("b", valuesArr); - sketch1.update("c", valuesArr); - sketch1.update("d", valuesArr); - sketch1.update("a", valuesArr); - noopUpdates(sketch1, valuesArr); - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); - sketch2.update("b", valuesArr); - sketch2.update("c", valuesArr); - sketch2.update("d", valuesArr); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 4.0); - Assert.assertEquals(sketch2.getUpperBound(1), 4.0); - Assert.assertEquals(sketch2.getLowerBound(1), 4.0); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 4); - for (double[] array: values) { - Assert.assertEquals(array.length, 2); - Assert.assertEquals(array[0], 2.0); - Assert.assertEquals(array[1], 4.0); - } - } - - @Test - public void directToHeapWithSeed() { - long seed = 1; - double[] values = {1.0}; - - ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build( - MemorySegment.ofArray(new byte[1000000])); - sketch1.update("a", values); - sketch1.update("b", values); - sketch1.update("c", values); - - ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray()), seed); - sketch2.update("b", values); - sketch2.update("c", values); - sketch2.update("d", values); - - Assert.assertEquals(sketch2.getEstimate(), 4.0); - } - - @Test - public void maxBytes() { - Assert.assertEquals(ArrayOfDoublesQuickSelectSketch.getMaxBytes(1024, 2), 49184); - } - - private static void noopUpdates(ArrayOfDoublesUpdatableSketch sketch, double[] valuesArr) { - byte[] byteArr = null; - sketch.update(byteArr, valuesArr); - byteArr = new byte[0]; - sketch.update(byteArr, valuesArr); - int[] intArr = null; - sketch.update(intArr, valuesArr); - intArr = new int[0]; - sketch.update(intArr, valuesArr); - long[] longArr = null; - sketch.update(longArr, valuesArr); - longArr = new long[0]; - sketch.update(longArr, valuesArr); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java deleted file mode 100644 index ace9e02c0..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/ArrayOfDoublesUnionTest.java +++ /dev/null @@ -1,513 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; -import java.util.Arrays; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class ArrayOfDoublesUnionTest { - - @Test - public void heapExactMode() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(2, new double[] {1.0}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch2.update(2, new double[] {1.0}); - sketch2.update(2, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union.union(sketch1); - union.union(sketch2); - final int maxBytes = ArrayOfDoublesUnion.getMaxBytes( - ArrayOfDoublesSetOperationBuilder.DEFAULT_NOMINAL_ENTRIES, - ArrayOfDoublesSetOperationBuilder.DEFAULT_NUMBER_OF_VALUES); - Assert.assertEquals(maxBytes, 131120); // 48 bytes preamble + 2 * nominal entries * (key size + value size) - ArrayOfDoublesCompactSketch result = union.getResult(); - Assert.assertEquals(result.getEstimate(), 3.0); - double[][] values = result.getValues(); - Assert.assertEquals(values[0][0], 3.0); - Assert.assertEquals(values[1][0], 3.0); - Assert.assertEquals(values[2][0], 3.0); - - final MemorySegment wseg = MemorySegment.ofArray(union.toByteArray()); - final ArrayOfDoublesUnion wrappedUnion = ArrayOfDoublesSketches.wrapUnion(wseg); - result = wrappedUnion.getResult(); - Assert.assertEquals(result.getEstimate(), 3.0); - values = result.getValues(); - Assert.assertEquals(values[0][0], 3.0); - Assert.assertEquals(values[1][0], 3.0); - Assert.assertEquals(values[2][0], 3.0); - - - union.reset(); - result = union.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - } - - @Test - public void heapEstimationMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union.union(sketch1); - union.union(sketch2); - ArrayOfDoublesCompactSketch result = union.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertTrue(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - - union.reset(); - result = union.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - } - - @Test - public void heapEstimationModeFullOverlapTwoValuesAndDownsizing() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0, 2.0}); - } - - key = 0; // full overlap - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0, 2.0}); - } - - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setNumberOfValues(2).setNominalEntries(1024).buildUnion(); - union.union(sketch1); - union.union(sketch2); - final ArrayOfDoublesCompactSketch result = union.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertTrue(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 8192.0, 8192 * 0.01); - Assert.assertEquals(result.getRetainedEntries(), 1024); // union was downsampled - - final ArrayOfDoublesSketchIterator it = result.iterator(); - final double[] expected = {2, 4}; - while (it.next()) { - Assert.assertEquals(it.getValues(), expected, Arrays.toString(it.getValues()) + " != " + Arrays.toString(expected)); - } - } - - @Test - public void heapMixedMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 1000; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 500; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.2f).build(); - for (int i = 0; i < 20000; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union.union(sketch1); - union.union(sketch2); - final ArrayOfDoublesCompactSketch result = union.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertTrue(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 20500.0, 20500 * 0.01); - } - - @Test - public void heapSerializeDeserialize() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union1.union(sketch1); - union1.union(sketch2); - - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(union1.toByteArray())); - ArrayOfDoublesCompactSketch result = union2.getResult(); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - - union2.reset(); - result = union2.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - final double[][] values = result.getValues(); - for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i][0], 2.0); - } - } - - @Test - public void heapSerializeDeserializeWithSeed() { - final long seed = 1; - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildUnion(); - union1.union(sketch1); - union1.union(sketch2); - - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(union1.toByteArray()), seed); - final ArrayOfDoublesCompactSketch result = union2.getResult(); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - } - - @Test - public void directSerializeDeserialize() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build( - MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build( - MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().buildUnion( - MemorySegment.ofArray(new byte[1000000])); - union1.union(sketch1); - union1.union(sketch2); - - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(union1.toByteArray())); - ArrayOfDoublesCompactSketch result = union2.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - - union2.reset(); - result = union2.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - final double[][] values = result.getValues(); - for (int i = 0; i < values.length; i++) { - Assert.assertEquals(values[i][0], 2.0); - } - } - - @Test - public void directSerializeDeserializeWithSeed() { - final long seed = 1; - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) - .build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) - .build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed) - .buildUnion(MemorySegment.ofArray(new byte[1000000])); - union1.union(sketch1); - union1.union(sketch2); - - final ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(union1.toByteArray()), seed); - final ArrayOfDoublesCompactSketch result = union2.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - } - - @Test - public void directExactMode() { - final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(2, new double[] {1.0}); - - final ArrayOfDoublesUpdatableSketch sketch2 = - new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - sketch2.update(2, new double[] {1.0}); - sketch2.update(2, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - - final ArrayOfDoublesUnion union = - new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); - union.union(sketch1); - union.union(sketch2); - ArrayOfDoublesCompactSketch result = union.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertEquals(result.getEstimate(), 3.0); - final double[][] values = result.getValues(); - Assert.assertEquals(values[0][0], 3.0); - Assert.assertEquals(values[1][0], 3.0); - Assert.assertEquals(values[2][0], 3.0); - - union.reset(); - result = union.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - } - - @Test - public void directEstimationMode() { - int key = 0; - final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch1.update(key++, new double[] {1.0}); - } - - key -= 4096; // overlap half of the entries - final ArrayOfDoublesUpdatableSketch sketch2 = - new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - sketch2.update(key++, new double[] {1.0}); - } - - final ArrayOfDoublesUnion union = - new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); - union.union(sketch1); - union.union(sketch2); - ArrayOfDoublesCompactSketch result = union.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); - - union.reset(); - result = union.getResult(); - Assert.assertTrue(result.isEmpty()); - Assert.assertFalse(result.isEstimationMode()); - Assert.assertEquals(result.getEstimate(), 0.0); - Assert.assertEquals(result.getUpperBound(1), 0.0); - Assert.assertEquals(result.getLowerBound(1), 0.0); - Assert.assertEquals(result.getTheta(), 1.0); - } - - @Test - public void heapToDirect() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(2, new double[] {1.0}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch2.update(2, new double[] {1.0}); - sketch2.update(2, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - - final ArrayOfDoublesUnion heapUnion = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - heapUnion.union(sketch1); - - final ArrayOfDoublesUnion directUnion = - ArrayOfDoublesUnion.wrap(MemorySegment.ofArray(heapUnion.toByteArray())); - directUnion.union(sketch2); //throws - - final ArrayOfDoublesCompactSketch result = directUnion.getResult(MemorySegment.ofArray(new byte[1000000])); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 3.0); - final double[][] values = result.getValues(); - Assert.assertEquals(values.length, 3); - Assert.assertEquals(values[0][0], 3.0); - Assert.assertEquals(values[1][0], 3.0); - Assert.assertEquals(values[2][0], 3.0); - } - - @Test - public void directToHeap() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(1, new double[] {1.0}); - sketch1.update(2, new double[] {1.0}); - - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch2.update(2, new double[] {1.0}); - sketch2.update(2, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - sketch2.update(3, new double[] {1.0}); - - final ArrayOfDoublesUnion directUnion = - new ArrayOfDoublesSetOperationBuilder().buildUnion(MemorySegment.ofArray(new byte[1000000])); - directUnion.union(sketch1); - - final ArrayOfDoublesUnion heapUnion = ArrayOfDoublesUnion.heapify(MemorySegment.ofArray(directUnion.toByteArray())); - heapUnion.union(sketch2); - - final ArrayOfDoublesCompactSketch result = heapUnion.getResult(); - Assert.assertFalse(result.isEmpty()); - Assert.assertEquals(result.getEstimate(), 3.0); - final double[][] values = result.getValues(); - Assert.assertEquals(values.length, 3); - Assert.assertEquals(values[0][0], 3.0); - Assert.assertEquals(values[1][0], 3.0); - Assert.assertEquals(values[2][0], 3.0); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleSeeds() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setSeed(2).buildUnion(); - union.union(sketch); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleInputSketchFewerValues() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setNumberOfValues(2).buildUnion(); - union.union(sketch); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void incompatibleInputSketchMoreValues() { - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); - final ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - union.union(sketch); - } - - @Test - public void directDruidUsageOneSketch() { - final MemorySegment seg = MemorySegment.ofArray(new byte[1_000_000]); - new ArrayOfDoublesSetOperationBuilder().buildUnion(seg); // just set up MemorySegment to wrap later - - final int n = 100_000; // estimation mode - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < n; i++) { - sketch.update(i, new double[] {1.0}); - } - sketch.trim(); // pretend this is a result from a union - - // as Druid wraps MemorySegment - MemorySegment seg2 = MemorySegment.ofArray(new byte[1_000_000]); - ArrayOfDoublesCompactSketch dcsk = sketch.compact(seg2); - ArrayOfDoublesUnion union = ArrayOfDoublesSketches.wrapUnion(seg); //empty union - union.union(dcsk); - //ArrayOfDoublesSketches.wrapUnion(seg).union(sketch.compact(MemorySegment.ofArray(new byte[1_000_000]))); - - final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(seg).getResult(); - Assert.assertEquals(result.getEstimate(), sketch.getEstimate());//expected [98045.91060164096] but found [4096.0] - Assert.assertEquals(result.isEstimationMode(), sketch.isEstimationMode()); - } - - @Test - public void directDruidUsageTwoSketches() { - final MemorySegment seg = MemorySegment.ofArray(new byte[1000000]); - new ArrayOfDoublesSetOperationBuilder().buildUnion(seg); // just set up MemorySegment to wrap later - - int key = 0; - - final int n1 = 100000; // estimation mode - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < n1; i++) { - sketch1.update(key++, new double[] {1.0}); - } - // as Druid wraps MemorySegment - ArrayOfDoublesSketches.wrapUnion(seg).union(sketch1.compact(MemorySegment.ofArray(new byte[1000000]))); - - final int n2 = 1000000; // estimation mode - final ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < n2; i++) { - sketch2.update(key++, new double[] {1.0}); - } - // as Druid wraps MemorySegment - ArrayOfDoublesSketches.wrapUnion(seg).union(sketch2.compact(MemorySegment.ofArray(new byte[1000000]))); - - // build one sketch that must be the same as union - key = 0; // reset to have the same keys - final int n = n1 + n2; - final ArrayOfDoublesUpdatableSketch expected = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < n; i++) { - expected.update(key++, new double[] {1.0}); - } - expected.trim(); // union result is trimmed, so we need to trim this sketch for valid comparison - - final ArrayOfDoublesSketch result = ArrayOfDoublesUnion.wrap(seg).getResult(); - Assert.assertEquals(result.getEstimate(), expected.getEstimate()); - Assert.assertEquals(result.isEstimationMode(), expected.isEstimationMode()); - Assert.assertEquals(result.getUpperBound(1), expected.getUpperBound(1)); - Assert.assertEquals(result.getLowerBound(1), expected.getLowerBound(1)); - Assert.assertEquals(result.getRetainedEntries(), expected.getRetainedEntries()); - Assert.assertEquals(result.getNumValues(), expected.getNumValues()); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java deleted file mode 100644 index d03e9aa52..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/CornerCaseArrayOfDoublesSetOperationsTest.java +++ /dev/null @@ -1,581 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import static org.apache.datasketches.common.Util.zeroPad; -import static org.apache.datasketches.hash.MurmurHash3.hash; -import static org.testng.Assert.assertTrue; - -import org.apache.datasketches.common.Util; -import org.testng.annotations.Test; - -public class CornerCaseArrayOfDoublesSetOperationsTest { - //Stateful Intersection with intersect(sketch A, combiner), followed by getResult() - //Essentially Stateless AnotB with update(Sketch A, Sketch B), followed by getResult() - //Stateful Union with union(Sketch A), followed by getResult() - - /* Hashes and Hash Equivalents - * Top8bits Hex Decimal - * MAX: 01111111, 7fffffffffffffff, 9223372036854775807 - * GT_MIDP: 01011101, 5d6906dac1b340ba, 6730918654704304314 3L - * MIDP_THETALONG:01000000, 4000000000000000, 4611686018427387904 - * GT_LOWP: 00010000, 10bc98fb132116fe, 1206007004353599230 6L - * LOWP_THETALONG:00010000, 1000000000000000, 1152921504606846976 - * LT_LOWP: 00001000, 83ddbc9e12ede40, 593872385995628096 4L - */ - - - private static final float MIDP_FLT = 0.5f; - private static final float LOWP_FLT = 0.125f; - private static final long GT_MIDP_KEY = 3L; - private static final long GT_LOWP_KEY = 6L; - private static final long LT_LOWP_KEY = 4L; - - private static final long MAX_LONG = Long.MAX_VALUE; - - private static final long HASH_GT_MIDP = getLongHash(GT_MIDP_KEY); - private static final long MIDP_THETALONG = (long)(MAX_LONG * MIDP_FLT); - - private static final long HASH_GT_LOWP = getLongHash(GT_LOWP_KEY); - private static final long LOWP_THETALONG = (long)(MAX_LONG * LOWP_FLT); - private static final long HASH_LT_LOWP = getLongHash(LT_LOWP_KEY); - - private static final String LS = System.getProperty("line.separator"); - - private enum SkType { - EMPTY, // { 1.0, 0, T} Bin: 101 Oct: 05 - EXACT, // { 1.0, >0, F} Bin: 110 Oct: 06, specify only value - ESTIMATION, // {<1.0, >0, F} Bin: 010 Oct: 02, specify only value - DEGENERATE // {<1.0, 0, F} Bin: 000 Oct: 0, specify p, value - } - - private static class MinCombiner implements ArrayOfDoublesCombiner { - MinCombiner() {} - - @Override - public double[] combine(double[] a, double[] b) { - return new double[] { Math.min(a[0], b[0]) }; - } - } - - private static MinCombiner minCombiner = new MinCombiner(); - - //=================================f - - @Test - public void emptyEmpty() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = true; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyExact() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyDegenerate() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void emptyEstimation() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EMPTY, 0, 0); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void exactEmpty() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactExact() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, GT_MIDP_KEY); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = 1.0; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = true; - final double expectedUnionTheta = 1.0; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactDegenerate() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); //entries = 0 - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void exactEstimation() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void estimationEmpty() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationExact() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationDegenerate() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 1; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void estimationEstimation() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.ESTIMATION, MIDP_FLT, LT_LOWP_KEY); - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 1; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - - @Test - public void degenerateEmpty() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); //entries = 0 - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EMPTY, 0, 0); - final double expectedIntersectTheta = 1.0; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = true; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateExact() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); //entries = 0 - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.EXACT, 0, LT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateDegenerate() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_KEY); //entries = 0 - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.DEGENERATE, LOWP_FLT, GT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 0; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - @Test - public void degenerateEstimation() { - ArrayOfDoublesUpdatableSketch thetaA = getSketch(SkType.DEGENERATE, MIDP_FLT, GT_MIDP_KEY); //entries = 0 - ArrayOfDoublesUpdatableSketch thetaB = getSketch(SkType.ESTIMATION, LOWP_FLT, LT_LOWP_KEY); - final double expectedIntersectTheta = LOWP_FLT; - final int expectedIntersectCount = 0; - final boolean expectedIntersectEmpty = false; - final double expectedAnotbTheta = LOWP_FLT; - final int expectedAnotbCount = 0; - final boolean expectedAnotbEmpty = false; - final double expectedUnionTheta = LOWP_FLT; - final int expectedUnionCount = 1; - final boolean expectedUnionEmpty = false; - - checks(thetaA, thetaB, - expectedIntersectTheta, expectedIntersectCount, expectedIntersectEmpty, - expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty, - expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - //================================= - //================================= - - private static void checks( - ArrayOfDoublesUpdatableSketch tupleA, - ArrayOfDoublesUpdatableSketch tupleB, - double expectedIntersectTheta, - int expectedIntersectCount, - boolean expectedIntersectEmpty, - double expectedAnotbTheta, - int expectedAnotbCount, - boolean expectedAnotbEmpty, - double expectedUnionTheta, - int expectedUnionCount, - boolean expectedUnionEmpty) { - ArrayOfDoublesCompactSketch csk; - ArrayOfDoublesIntersection inter = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); - ArrayOfDoublesAnotB anotb = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); - ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); - - //Intersection Tuple, Tuple Updatable Stateful - inter.intersect(tupleA, minCombiner); - inter.intersect(tupleB, minCombiner); - csk = inter.getResult(); - inter.reset(); - checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - //Intersection Tuple, Tuple Compact Stateful - inter.intersect(tupleA.compact(), minCombiner); - inter.intersect(tupleB.compact(), minCombiner); - csk = inter.getResult(); - inter.reset(); - checkResult("Intersect Stateless Theta, Theta", csk, expectedIntersectTheta, expectedIntersectCount, - expectedIntersectEmpty); - - //AnotB Stateless Tuple, Tuple Updatable - anotb.update(tupleA, tupleB); - csk = anotb.getResult(); - checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - //AnotB Stateless Tuple, Tuple Compact - anotb.update(tupleA, tupleB); - csk = anotb.getResult(); - checkResult("AnotB Stateless Theta, Theta", csk, expectedAnotbTheta, expectedAnotbCount, expectedAnotbEmpty); - - //Union Stateful Tuple, Tuple Updatable - union.union(tupleA); - union.union(tupleB); - csk = union.getResult(); - union.reset(); - checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - //Union Stateful Tuple, Tuple Compact - union.union(tupleA.compact()); - union.union(tupleB.compact()); - csk = union.getResult(); - union.reset(); - checkResult("Union Stateless Theta, Theta", csk, expectedUnionTheta, expectedUnionCount, expectedUnionEmpty); - } - - private static void checkResult( - String comment, - ArrayOfDoublesCompactSketch csk, - double expectedTheta, - int expectedEntries, - boolean expectedEmpty) { - double actualTheta = csk.getTheta(); - int actualEntries = csk.getRetainedEntries(); - boolean actualEmpty = csk.isEmpty(); - - boolean thetaOk = actualTheta == expectedTheta; - boolean entriesOk = actualEntries == expectedEntries; - boolean emptyOk = actualEmpty == expectedEmpty; - if (!thetaOk || !entriesOk || !emptyOk) { - StringBuilder sb = new StringBuilder(); - sb.append(comment + ": "); - if (!thetaOk) { sb.append("Theta: expected " + expectedTheta + ", got " + actualTheta + "; "); } - if (!entriesOk) { sb.append("Entries: expected " + expectedEntries + ", got " + actualEntries + "; "); } - if (!emptyOk) { sb.append("Empty: expected " + expectedEmpty + ", got " + actualEmpty + "."); } - throw new IllegalArgumentException(sb.toString()); - } - } - - private static ArrayOfDoublesUpdatableSketch getSketch( - SkType skType, - float p, - long updateKey) { - - ArrayOfDoublesUpdatableSketchBuilder bldr = new ArrayOfDoublesUpdatableSketchBuilder(); - bldr.setNominalEntries(16); - //Assume defaults: 1 double value, resize factor, seed - double[] summaryVal = {1.0}; - - ArrayOfDoublesUpdatableSketch sk; - switch(skType) { - case EMPTY: { // { 1.0, 0, T} p and value are not used - sk = bldr.build(); - break; - } - case EXACT: { // { 1.0, >0, F} p is not used - sk = bldr.build(); - sk.update(updateKey, summaryVal); - break; - } - case ESTIMATION: { // {<1.0, >0, F} - checkValidUpdate(p, updateKey); - bldr.setSamplingProbability(p); - sk = bldr.build(); - sk.update(updateKey, summaryVal); - break; - } - case DEGENERATE: { // {<1.0, 0, F} - checkInvalidUpdate(p, updateKey); - bldr.setSamplingProbability(p); - sk = bldr.build(); - sk.update(updateKey, summaryVal); // > theta - break; - } - - default: { return null; } // should not happen - } - return sk; - } - - private static void checkValidUpdate(float p, long updateKey) { - assertTrue( getLongHash(updateKey) < (long) (p * Long.MAX_VALUE)); - } - - private static void checkInvalidUpdate(float p, long updateKey) { - assertTrue( getLongHash(updateKey) > (long) (p * Long.MAX_VALUE)); - } - - //******************************************* - //Helper functions for setting the hash values - - //@Test - public void printTable() { - println(" Top8bits Hex Decimal"); - printf("MAX: %8s, %16x, %19d" + LS, getTop8(MAX_LONG), MAX_LONG, MAX_LONG); - printf("GT_MIDP: %8s, %16x, %19d" + LS, getTop8(HASH_GT_MIDP), HASH_GT_MIDP, HASH_GT_MIDP); - printf("MIDP_THETALONG:%8s, %16x, %19d" + LS, getTop8(MIDP_THETALONG), MIDP_THETALONG, MIDP_THETALONG); - printf("GT_LOWP: %8s, %16x, %19d" + LS, getTop8(HASH_GT_LOWP), HASH_GT_LOWP, HASH_GT_LOWP); - printf("LOWP_THETALONG:%8s, %16x, %19d" + LS, getTop8(LOWP_THETALONG), LOWP_THETALONG, LOWP_THETALONG); - printf("LT_LOWP: %8s, %16x, %19d" + LS, getTop8(HASH_LT_LOWP), HASH_LT_LOWP, HASH_LT_LOWP); - println(LS +"Doubles"); - - println(LS + "Longs"); - for (long v = 1L; v < 10; v++) { - long hash = (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; - printLong(v, hash); - } - } - - static long getLongHash(long v) { - return (hash(v, Util.DEFAULT_UPDATE_SEED)[0]) >>> 1; - } - - static void printLong(long v, long hash) { - System.out.printf(" %8d, %8s, %16x, %19d" + LS,v, getTop8(hash), hash, hash); - } - - static String getTop8(final long v) { - int i = (int) (v >>> 56); - String s = Integer.toBinaryString(i); - return zeroPad(s, 8); - } - - private static void println(Object o) { - System.out.println(o.toString()); - } - - private static void printf(String fmt, Object ...args) { - System.out.printf(fmt, args); - } -} - diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java deleted file mode 100644 index 39a584fec..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesCompactSketchTest.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class DirectArrayOfDoublesCompactSketchTest { - - @Test - public void emptyFromQuickSelectSketch() { - ArrayOfDoublesUpdatableSketch us = - new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - ArrayOfDoublesCompactSketch sketch = us.compact(MemorySegment.ofArray(new byte[1000000])); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getRetainedEntries(), 0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertNotNull(sketch.getValues()); - Assert.assertEquals(sketch.getValues().length, 0); - ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - public void exactModeFromQuickSelectSketch() { - ArrayOfDoublesUpdatableSketch us = - new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - us.update(1, new double[] {1.0}); - us.update(2, new double[] {1.0}); - us.update(3, new double[] {1.0}); - us.update(1, new double[] {1.0}); - us.update(2, new double[] {1.0}); - us.update(3, new double[] {1.0}); - ArrayOfDoublesCompactSketch sketch = us.compact(MemorySegment.ofArray(new byte[1000000])); - Assert.assertFalse(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 3.0); - Assert.assertEquals(sketch.getLowerBound(1), 3.0); - Assert.assertEquals(sketch.getUpperBound(1), 3.0); - Assert.assertEquals(sketch.getRetainedEntries(), 3); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(Util.DEFAULT_UPDATE_SEED)); - double[][] values = sketch.getValues(); - Assert.assertEquals(values.length, 3); - for (double[] array: values) { - Assert.assertEquals(array[0], 2.0); - } - } - - @Test - public void serializeDeserializeSmallExact() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - us.update("a", new double[] {1.0}); - us.update("b", new double[] {1.0}); - us.update("c", new double[] {1.0}); - ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); - ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 3.0); - Assert.assertEquals(sketch2.getLowerBound(1), 3.0); - Assert.assertEquals(sketch2.getUpperBound(1), 3.0); - Assert.assertEquals(sketch2.getRetainedEntries(), 3); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 3); - for (double[] array: values) { - Assert.assertEquals(array[0], 1.0); - } - } - - @Test - public void serializeDeserializeEstimation() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - us.update(i, new double[] {1.0}); - } - ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); - ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); - Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void deserializeWithWrongSeed() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < 8192; i++) { - us.update(i, new double[] {1.0}); - } - ArrayOfDoublesCompactSketch sketch1 = us.compact(MemorySegment.ofArray(new byte[1000000])); - ArrayOfDoublesSketches.wrapSketch(MemorySegment.ofArray(sketch1.toByteArray()), 123); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void fromQuickSelectSketchNotEnoughMemorySegment() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(MemorySegment.ofArray(new byte[1000000])); - us.update(1, new double[] {1.0}); - us.compact(MemorySegment.ofArray(new byte[39])); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java deleted file mode 100644 index 67aaffd05..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchTest.java +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; - -import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class DirectArrayOfDoublesQuickSelectSketchTest { - @Test - public void isEmpty() { - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - public void isEmptyWithSampling() { - final float samplingProbability = 0.1f; - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - setSamplingProbability(samplingProbability). - build(MemorySegment.ofArray(new byte[1000000])); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertTrue(((DirectArrayOfDoublesQuickSelectSketch)sketch).isInSamplingMode()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - } - - @Test - // very low probability of being sampled - // once the an input value is chosen so that it is rejected, the test will continue to work - // unless the hash function and the seed are the same - public void sampling() { - final float samplingProbability = 0.001f; - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - setSamplingProbability(samplingProbability). - build(MemorySegment.ofArray(new byte[1000000])); - sketch.update("a", new double[] {1.0}); - Assert.assertFalse(sketch.isEmpty()); - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertTrue(sketch.getUpperBound(1) > 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); - Assert.assertEquals( - (float)(sketch.getThetaLong() / (double) Long.MAX_VALUE), samplingProbability); - Assert.assertEquals((float)sketch.getTheta(), samplingProbability); - } - - @Test - public void exactMode() { - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - for (int i = 0; i < 4096; i++) { - sketch.update(i, new double[] {1.0}); - } - Assert.assertFalse(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 4096.0); - Assert.assertEquals(sketch.getUpperBound(1), 4096.0); - Assert.assertEquals(sketch.getLowerBound(1), 4096.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - - final double[][] values = sketch.getValues(); - Assert.assertEquals(values.length, 4096); - int count = 0; - for (int i = 0; i < values.length; i++) { - if (values[i] != null) { - count++; - } - } - Assert.assertEquals(count, 4096); - for (int i = 0; i < 4096; i++) { - Assert.assertEquals(values[i][0], 1.0); - } - - sketch.reset(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - // The moment of going into the estimation mode is, to some extent, an implementation detail - // Here we assume that presenting as many unique values as twice the nominal size of the sketch - // will result in estimation mode - public void estimationMode() { - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[4096 * 2 * 16 + 32])); - Assert.assertEquals(sketch.getEstimate(), 0.0); - for (int i = 1; i <= 8192; i++) { - sketch.update(i, new double[] {1.0}); - } - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); - Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); - Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); - - final double[][] values = sketch.getValues(); - Assert.assertTrue(values.length >= 4096); - int count = 0; - for (final double[] array: values) { - if (array != null) { - count++; - Assert.assertEquals(array.length, 1); - Assert.assertEquals(array[0], 1.0); - } - } - Assert.assertEquals(count, values.length); - - sketch.reset(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertEquals(sketch.getSamplingProbability(), 1.0F); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - public void updatesOfAllKeyTypes() { - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - sketch.update(1L, new double[] {1.0}); - sketch.update(2.0, new double[] {1.0}); - final byte[] bytes = new byte[] {3, 4}; - sketch.update(bytes, new double[] {1.0}); - sketch.update(ByteBuffer.wrap(bytes), new double[] {1.0}); // same as previous - sketch.update(ByteBuffer.wrap(bytes, 0, 1), new double[] {1.0}); // slice - sketch.update(new int[] {4}, new double[] {1.0}); - sketch.update(new long[] {5L}, new double[] {1.0}); - sketch.update("a", new double[] {1.0}); - Assert.assertEquals(sketch.getEstimate(), 7.0); - } - - @Test - public void doubleSum() { - final ArrayOfDoublesUpdatableSketch sketch = - new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - sketch.update(1, new double[] {1.0}); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - Assert.assertEquals(sketch.getValues()[0][0], 1.0); - sketch.update(1, new double[] {0.7}); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - Assert.assertEquals(sketch.getValues()[0][0], 1.7); - sketch.update(1, new double[] {0.8}); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - Assert.assertEquals(sketch.getValues()[0][0], 2.5); - } - - @Test - public void serializeDeserializeExact() throws Exception { - final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder(). - build(MemorySegment.ofArray(new byte[1000000])); - sketch1.update(1, new double[] {1.0}); - - final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); - - Assert.assertEquals(sketch2.getEstimate(), 1.0); - final double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 1); - Assert.assertEquals(values[0][0], 1.0); - - // the same key, so still one unique - sketch2.update(1, new double[] {1.0}); - Assert.assertEquals(sketch2.getEstimate(), 1.0); - - sketch2.update(2, new double[] {1.0}); - Assert.assertEquals(sketch2.getEstimate(), 2.0); - } - - @Test - public void serializeDeserializeEstimationNoResize() throws Exception { - final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder().setResizeFactor(ResizeFactor.X1). - build(MemorySegment.ofArray(new byte[1000000])); - for (int j = 0; j < 10; j++) { - for (int i = 0; i < 8192; i++) { - sketch1.update(i, new double[] {1.0}); - } - } - final byte[] byteArray = sketch1.toByteArray(); - - //for visual testing - //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); - - final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(byteArray)); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); - Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); - final double[][] values = sketch2.getValues(); - Assert.assertTrue(values.length >= 4096); - for (final double[] array: values) { - Assert.assertEquals(array[0], 10.0); - } - } - - @Test - public void serializeDeserializeSampling() { - final int sketchSize = 16384; - final int numberOfUniques = sketchSize; - final ArrayOfDoublesUpdatableSketch sketch1 = - new ArrayOfDoublesUpdatableSketchBuilder(). - setNominalEntries(sketchSize).setSamplingProbability(0.5f). - build(MemorySegment.ofArray(new byte[1000000])); - for (int i = 0; i < numberOfUniques; i++) { - sketch1.update(i, new double[] {1.0}); - } - final ArrayOfDoublesSketch sketch2 = - ArrayOfDoublesSketch.wrap(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); - Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); - Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void memorySegmentNotLargeEnough() { - new ArrayOfDoublesUpdatableSketchBuilder(). - setNominalEntries(32).build(MemorySegment.ofArray(new byte[1055])); - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java deleted file mode 100644 index 77d5a5fe0..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesCompactSketchTest.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class HeapArrayOfDoublesCompactSketchTest { - - @Test - public void emptyFromQuickSelectSketch() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); - ArrayOfDoublesCompactSketch sketch = us.compact(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getRetainedEntries(), 0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - Assert.assertNotNull(sketch.getValues()); - Assert.assertEquals(sketch.getValues().length, 0); - ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - public void exactModeFromQuickSelectSketch() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); - us.update(1, new double[] {1.0}); - us.update(2, new double[] {1.0}); - us.update(3, new double[] {1.0}); - us.update(1, new double[] {1.0}); - us.update(2, new double[] {1.0}); - us.update(3, new double[] {1.0}); - ArrayOfDoublesCompactSketch sketch = us.compact(); - Assert.assertFalse(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 3.0); - Assert.assertEquals(sketch.getLowerBound(1), 3.0); - Assert.assertEquals(sketch.getUpperBound(1), 3.0); - Assert.assertEquals(sketch.getRetainedEntries(), 3); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - double[][] values = sketch.getValues(); - Assert.assertEquals(values.length, 3); - for (double[] array: values) { - Assert.assertEquals(array[0], 2.0); - } - } - - @Test - public void serializeDeserializeSmallExact() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); - us.update("a", new double[] {1.0}); - us.update("b", new double[] {1.0}); - us.update("c", new double[] {1.0}); - ArrayOfDoublesCompactSketch sketch1 = us.compact(); - ArrayOfDoublesSketch sketch2 = - ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertFalse(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 3.0); - Assert.assertEquals(sketch2.getLowerBound(1), 3.0); - Assert.assertEquals(sketch2.getUpperBound(1), 3.0); - Assert.assertEquals(sketch2.getRetainedEntries(), 3); - Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch2.getTheta(), 1.0); - double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 3); - for (double[] array: values) { - Assert.assertEquals(array[0], 1.0); - } - } - - @Test - public void serializeDeserializeEstimation() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - us.update(i, new double[] {1.0}); - } - MemorySegment wseg = MemorySegment.ofArray(us.toByteArray()); - ArrayOfDoublesUpdatableSketch wrappedUS = ArrayOfDoublesSketches.wrapUpdatableSketch(wseg); - Assert.assertFalse(wrappedUS.isEmpty()); - Assert.assertTrue(wrappedUS.isEstimationMode()); - Assert.assertEquals(wrappedUS.getEstimate(), us.getEstimate()); - Assert.assertEquals(wrappedUS.getThetaLong(), us.getThetaLong()); - - ArrayOfDoublesUpdatableSketch heapUS = ArrayOfDoublesSketches.heapifyUpdatableSketch(wseg); - Assert.assertFalse(heapUS.isEmpty()); - Assert.assertTrue(heapUS.isEstimationMode()); - Assert.assertEquals(heapUS.getEstimate(), us.getEstimate()); - Assert.assertEquals(heapUS.getThetaLong(), us.getThetaLong()); - - ArrayOfDoublesCompactSketch sketch1 = us.compact(); - ArrayOfDoublesSketch sketch2 = - ArrayOfDoublesSketches.heapifySketch(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertFalse(sketch2.isEmpty()); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); - Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void deserializeWithWrongSeed() { - ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); - for (int i = 0; i < 8192; i++) { - us.update(i, new double[] {1.0}); - } - ArrayOfDoublesCompactSketch sketch1 = us.compact(); - MemorySegment seg = MemorySegment.ofArray(sketch1.toByteArray()); - ArrayOfDoublesSketches.heapifySketch(seg, 123); - } -} diff --git a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java deleted file mode 100644 index 5a6b1d3f5..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/arrayofdoubles/HeapArrayOfDoublesQuickSelectSketchTest.java +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.arrayofdoubles; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.common.ResizeFactor; -import org.testng.Assert; -import org.testng.annotations.Test; - -public class HeapArrayOfDoublesQuickSelectSketchTest { - @Test - public void isEmpty() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - Assert.assertNotNull(sketch.toString()); - } - - @Test - public void isEmptyWithSampling() { - final float samplingProbability = 0.1f; - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - } - - @Test - public void sampling() { - final float samplingProbability = 0.001f; - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); - sketch.update("a", new double[] {1.0}); - Assert.assertFalse(sketch.isEmpty()); - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertTrue(sketch.getUpperBound(1) > 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); - Assert.assertEquals((float)(sketch.getThetaLong() / (double) Long.MAX_VALUE), samplingProbability); - Assert.assertEquals((float)sketch.getTheta(), samplingProbability); - } - - @Test - public void exactMode() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - for (int i = 1; i <= 4096; i++) { - sketch.update(i, new double[] {1.0}); - } - Assert.assertFalse(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 4096.0); - Assert.assertEquals(sketch.getUpperBound(1), 4096.0); - Assert.assertEquals(sketch.getLowerBound(1), 4096.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - - final double[][] values = sketch.getValues(); - Assert.assertEquals(values.length, 4096); - int count = 0; - for (int i = 0; i < values.length; i++) { - if (values[i] != null) { - count++; - } - } - Assert.assertEquals(count, 4096); - for (int i = 0; i < 4096; i++) { - Assert.assertEquals(values[i][0], 1.0); - } - - sketch.reset(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - // The moment of going into the estimation mode is, to some extent, an implementation detail - // Here we assume that presenting as many unique values as twice the nominal size of the sketch will result in estimation mode - public void estimationMode() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - Assert.assertEquals(sketch.getEstimate(), 0.0); - for (int i = 1; i <= 8192; i++) { - sketch.update(i, new double[] {1.0}); - } - Assert.assertTrue(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); - Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); - Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); - Assert.assertTrue(sketch.getRetainedEntries() > 4096); - sketch.trim(); - Assert.assertEquals(sketch.getRetainedEntries(), 4096); - - final double[][] values = sketch.getValues(); - int count = 0; - for (final double[] array: values) { - if (array != null) { - count++; - Assert.assertEquals(array.length, 1); - Assert.assertEquals(array[0], 1.0); - } - } - Assert.assertEquals(count, values.length); - - sketch.reset(); - Assert.assertTrue(sketch.isEmpty()); - Assert.assertFalse(sketch.isEstimationMode()); - Assert.assertEquals(sketch.getEstimate(), 0.0); - Assert.assertEquals(sketch.getUpperBound(1), 0.0); - Assert.assertEquals(sketch.getLowerBound(1), 0.0); - Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); - Assert.assertEquals(sketch.getTheta(), 1.0); - final ArrayOfDoublesSketchIterator it = sketch.iterator(); - while (it.next()) { - Assert.fail("empty sketch expected"); - } - } - - @Test - public void updatesOfAllKeyTypes() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch.update(1L, new double[] {1.0}); - sketch.update(2.0, new double[] {1.0}); - sketch.update(new byte[] {3}, new double[] {1.0}); - sketch.update(new int[] {4}, new double[] {1.0}); - sketch.update(new long[] {5L}, new double[] {1.0}); - sketch.update("a", new double[] {1.0}); - Assert.assertEquals(sketch.getEstimate(), 6.0); - } - - @Test - public void doubleSum() { - final ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch.update(1, new double[] {1.0}); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - Assert.assertEquals(sketch.getValues()[0][0], 1.0); - sketch.update(1, new double[] {0.7}); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - Assert.assertEquals(sketch.getValues()[0][0], 1.7); - sketch.update(1, new double[] {0.8}); - Assert.assertEquals(sketch.getRetainedEntries(), 1); - Assert.assertEquals(sketch.getValues()[0][0], 2.5); - } - - @Test - public void serializeDeserializeExact() { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); - sketch1.update(1, new double[] {1.0}); - - final ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); - - Assert.assertEquals(sketch2.getEstimate(), 1.0); - final double[][] values = sketch2.getValues(); - Assert.assertEquals(values.length, 1); - Assert.assertEquals(values[0][0], 1.0); - - // the same key, so still one unique - sketch2.update(1, new double[] {1.0}); - Assert.assertEquals(sketch2.getEstimate(), 1.0); - - sketch2.update(2, new double[] {1.0}); - Assert.assertEquals(sketch2.getEstimate(), 2.0); - } - - @Test - public void serializeDeserializeEstimationNoResize() throws Exception { - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). - setResizeFactor(ResizeFactor.X1).build(); - for (int j = 0; j < 10; j++) { - for (int i = 0; i < 8192; i++) { - sketch1.update(i, new double[] {1.0}); - } - } - final byte[] byteArray = sketch1.toByteArray(); - - //for visual testing - //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); - - final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(MemorySegment.ofArray(byteArray)); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); - Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); - final double[][] values = sketch2.getValues(); - Assert.assertTrue(values.length >= 4096); - for (final double[] array: values) { - Assert.assertEquals(array[0], 10.0); - } - } - - @Test - public void serializeDeserializeSampling() { - final int sketchSize = 16384; - final int numberOfUniques = sketchSize; - final ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); - for (int i = 0; i < numberOfUniques; i++) { - sketch1.update(i, new double[] {1.0}); - } - final ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(MemorySegment.ofArray(sketch1.toByteArray())); - Assert.assertTrue(sketch2.isEstimationMode()); - Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); - Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); - Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketchTest.java deleted file mode 100644 index 0617d6758..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSketchTest.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; - -import org.apache.datasketches.tuple2.AnotB; -import org.apache.datasketches.tuple2.CompactSketch; -import org.apache.datasketches.tuple2.Intersection; -import org.apache.datasketches.tuple2.TupleSketchIterator; -import org.apache.datasketches.tuple2.Union; -import org.testng.annotations.Test; - -/** - * @author Lee Rhodes - */ -public class ArrayOfStringsSketchTest { - private static final String LS = System.getProperty("line.separator"); - - @SuppressWarnings("deprecation") - @Test - public void checkSketch() { - ArrayOfStringsSketch sketch1 = new ArrayOfStringsSketch(); - String[][] strArrArr = {{"a","b"},{"c","d"},{"e","f"}}; - int len = strArrArr.length; - for (int i = 0; i < len; i++) { - sketch1.update(strArrArr[i], strArrArr[i]); - } - sketch1.update(strArrArr[0], strArrArr[0]); //insert duplicate - printSummaries(sketch1.iterator()); - byte[] array = sketch1.toByteArray(); - MemorySegment wseg = MemorySegment.ofArray(array); - ArrayOfStringsSketch sketch2 = new ArrayOfStringsSketch(wseg); - printSummaries(sketch2.iterator()); - checkSummaries(sketch2, sketch2); - - String[] strArr3 = {"g", "h" }; - sketch2.update(strArr3, strArr3); - - Union union = new Union<>(new ArrayOfStringsSummarySetOperations()); - union.union(sketch1); - union.union(sketch2); - CompactSketch csk = union.getResult(); - //printSummaries(csk.iterator()); - assertEquals(csk.getRetainedEntries(), 4); - - Intersection inter = - new Intersection<>(new ArrayOfStringsSummarySetOperations()); - inter.intersect(sketch1); - inter.intersect(sketch2); - csk = inter.getResult(); - assertEquals(csk.getRetainedEntries(), 3); - - AnotB aNotB = new AnotB<>(); - aNotB.setA(sketch2); - aNotB.notB(sketch1); - csk = aNotB.getResult(true); - assertEquals(csk.getRetainedEntries(), 1); - - } - - private static void checkSummaries(ArrayOfStringsSketch sk1, ArrayOfStringsSketch sk2) { - TupleSketchIterator it1 = sk1.iterator(); - TupleSketchIterator it2 = sk2.iterator(); - while(it1.next() && it2.next()) { - ArrayOfStringsSummary sum1 = it1.getSummary(); - ArrayOfStringsSummary sum2 = it2.getSummary(); - assertTrue(sum1.equals(sum2)); - } - } - - static void printSummaries(TupleSketchIterator it) { - while (it.next()) { - String[] strArr = it.getSummary().getValue(); - for (String s : strArr) { - print(s + ", "); - } - println(""); - } - } - - @Test - public void checkCopyCtor() { - ArrayOfStringsSketch sk1 = new ArrayOfStringsSketch(); - String[][] strArrArr = {{"a","b"},{"c","d"},{"e","f"}}; - int len = strArrArr.length; - for (int i = 0; i < len; i++) { - sk1.update(strArrArr[i], strArrArr[i]); - } - assertEquals(sk1.getRetainedEntries(), 3); - final ArrayOfStringsSketch sk2 = sk1.copy(); - assertEquals(sk2.getRetainedEntries(), 3); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - print(s + LS); - } - - /** - * @param s value to print - */ - static void print(String s) { - //System.out.print(s); //disable here - } - -} diff --git a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java b/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java deleted file mode 100644 index 308d154da..000000000 --- a/src/test/java/org/apache/datasketches/tuple2/strings/ArrayOfStringsSummaryTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.tuple2.strings; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import java.lang.foreign.MemorySegment; - -import org.testng.annotations.Test; -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.tuple2.DeserializeResult; - -/** - * @author Lee Rhodes - */ -public class ArrayOfStringsSummaryTest { - - @Test - public void checkToByteArray() { - String[] strArr = new String[] {"abcd", "abcd", "abcd"}; - ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(strArr); - ArrayOfStringsSummary copy = nsum.copy(); - assertTrue(copy.equals(nsum)); - byte[] out = nsum.toByteArray(); - - MemorySegment seg = MemorySegment.ofArray(out); - ArrayOfStringsSummary nsum2 = new ArrayOfStringsSummary(seg); - String[] nodesArr = nsum2.getValue(); - for (String s : nodesArr) { - println(s); - } - - println("\nfromMemorySegment(seg)"); - DeserializeResult dres = ArrayOfStringsSummaryDeserializer.fromMemorySegment(seg); - ArrayOfStringsSummary nsum3 = dres.getObject(); - nodesArr = nsum3.getValue(); - for (String s : nodesArr) { - println(s); - } - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkNumNodes() { - ArrayOfStringsSummary.checkNumNodes(200); - } - - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkInBytes() { - MemorySegment seg = MemorySegment.ofArray(new byte[100]); - ArrayOfStringsSummary.checkInBytes(seg, 200); - } - - @SuppressWarnings("unlikely-arg-type") - @Test - public void checkHashCode() { - String[] strArr = new String[] {"abcd", "abcd", "abcd"}; - ArrayOfStringsSummary sum1 = new ArrayOfStringsSummary(strArr); - ArrayOfStringsSummary sum2 = new ArrayOfStringsSummary(strArr); - int hc1 = sum1.hashCode(); - int hc2 = sum2.hashCode(); - assertEquals(hc1, hc2); - assertTrue(sum1.equals(sum2)); - assertFalse(sum1.equals(hc2)); - assertFalse(sum1.equals(null)); - } - - @Test - public void printlnTest() { - println("PRINTING: "+this.getClass().getName()); - } - - /** - * @param s value to print - */ - static void println(String s) { - //System.out.println(s); - } - -} From fbab0a0a3157d2cf7047d48782f9829744140962 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 17:37:09 -0700 Subject: [PATCH 24/25] enable github workflows --- .github/workflows/auto-jdk-matrix.yml | 14 +++++++------- .github/workflows/auto-os-matrix.yml | 14 +++++++------- .github/workflows/check_cpp_files.yml | 14 +++++++------- .github/workflows/codeql-analysis.yml | 14 +++++++------- .github/workflows/javadoc.yml | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 176d1fd6e..0afbaf065 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -1,13 +1,13 @@ name: Auto JDK Matrix Test & Install on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml index 413b7a957..83ecc1ffe 100644 --- a/.github/workflows/auto-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -1,13 +1,13 @@ name: Auto OS Matrix Test & Install on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml index 778859d0d..243eda985 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/check_cpp_files.yml @@ -1,13 +1,13 @@ name: CPP SerDe Compatibility Test on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f3fde1de0..bb42fe345 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,13 +1,13 @@ name: "CodeQL" on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 977c87b27..4862d64e4 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -1,8 +1,8 @@ name: JavaDoc on: -# push: -# branches: main + push: + branches: main workflow_dispatch: jobs: From 90727a895b20323d32e502e15c01b9cfc34c5d0f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 27 Jun 2025 18:02:57 -0700 Subject: [PATCH 25/25] Disable the GHA workflows. --- .github/workflows/auto-jdk-matrix.yml | 13 ++++++------- .github/workflows/auto-os-matrix.yml | 14 +++++++------- .github/workflows/check_cpp_files.yml | 14 +++++++------- .github/workflows/codeql-analysis.yml | 14 +++++++------- .github/workflows/javadoc.yml | 4 ++-- 5 files changed, 29 insertions(+), 30 deletions(-) diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 0afbaf065..a556d3e82 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -1,13 +1,12 @@ name: Auto JDK Matrix Test & Install on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml index 83ecc1ffe..413b7a957 100644 --- a/.github/workflows/auto-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -1,13 +1,13 @@ name: Auto OS Matrix Test & Install on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml index 243eda985..778859d0d 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/check_cpp_files.yml @@ -1,13 +1,13 @@ name: CPP SerDe Compatibility Test on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index bb42fe345..f3fde1de0 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,13 +1,13 @@ name: "CodeQL" on: - push: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] - pull_request: - paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] - # The branches below must be a subset of the branches above - branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# push: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] +# pull_request: +# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] +# # The branches below must be a subset of the branches above +# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 4862d64e4..977c87b27 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -1,8 +1,8 @@ name: JavaDoc on: - push: - branches: main +# push: +# branches: main workflow_dispatch: jobs: