diff --git a/.asf.yaml b/.asf.yaml
index a149bf396..57e258daa 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -45,11 +45,11 @@ github:
edit_comment_discussion: "Re: [D] {title} ({repository})"
delete_comment_discussion: "Re: [D] {title} ({repository})"
- notifications:
- commits: commits@dataskethces.apache.org
- issues: dev@dataskethces.apache.org
- discussions: dev@dataskethces.apache.org
- pullrequests_status: dev@dataskethces.apache.org
- pullrequests_comment: dev@dataskethces.apache.org
- # Send dependabot PRs to commits@ instead
- pullrequests_bot_dependabot: commits@dataskethces.apache.org
+notifications:
+ commits: commits@dataskethces.apache.org
+ issues: dev@dataskethces.apache.org
+ discussions: dev@dataskethces.apache.org
+ pullrequests_status: dev@dataskethces.apache.org
+ pullrequests_comment: dev@dataskethces.apache.org
+ # Send dependabot PRs to commits@ instead
+ pullrequests_bot_dependabot: commits@dataskethces.apache.org
diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml
index 0afbaf065..a556d3e82 100644
--- a/.github/workflows/auto-jdk-matrix.yml
+++ b/.github/workflows/auto-jdk-matrix.yml
@@ -1,13 +1,12 @@
name: Auto JDK Matrix Test & Install
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
env:
diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml
index 83ecc1ffe..413b7a957 100644
--- a/.github/workflows/auto-os-matrix.yml
+++ b/.github/workflows/auto-os-matrix.yml
@@ -1,13 +1,13 @@
name: Auto OS Matrix Test & Install
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
env:
diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml
index 243eda985..778859d0d 100644
--- a/.github/workflows/check_cpp_files.yml
+++ b/.github/workflows/check_cpp_files.yml
@@ -1,13 +1,13 @@
name: CPP SerDe Compatibility Test
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
jobs:
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index bb42fe345..f3fde1de0 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -1,13 +1,13 @@
name: "CodeQL"
on:
- push:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
- pull_request:
- paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
- # The branches below must be a subset of the branches above
- branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# push:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
+# pull_request:
+# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ]
+# # The branches below must be a subset of the branches above
+# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ]
workflow_dispatch:
jobs:
diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml
index 4862d64e4..977c87b27 100644
--- a/.github/workflows/javadoc.yml
+++ b/.github/workflows/javadoc.yml
@@ -1,8 +1,8 @@
name: JavaDoc
on:
- push:
- branches: main
+# push:
+# branches: main
workflow_dispatch:
jobs:
diff --git a/pom.xml b/pom.xml
index 84e032947..760acd58d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,10 +83,10 @@ under the License.
If both segment are off-heap, they both must have the same starting address and the same size. For on-heap segments, both segments must be based on or derived from the same array object and neither segment
+ * can be read-only. Returns false if either argument is null; In order to perform set operations on two sketches it is critical that the same hash
+ * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship
+ * between the original source key value and the hashed bit string would be violated. Once
+ * you have developed a history of stored sketches you are stuck with it.
+ *
+ * WARNING: This seed is used internally by library sketches in different
+ * packages and thus must be declared public. However, this seed value must not be used by library
+ * users with the MurmurHash3 function. It should be viewed as existing for exclusive, private
+ * use by the library.
+ *
+ * See Default Update Seed
+ */
+ public static final long DEFAULT_UPDATE_SEED = 9001L;
+
private Util() {}
//Byte Conversions
@@ -257,7 +281,7 @@ public static String characterPad(final String s, final int fieldLength, final c
final int sLen = s.length();
if (sLen < fieldLength) {
final char[] cArr = new char[fieldLength - sLen];
- fill(cArr, padChar);
+ java.util.Arrays.fill(cArr, padChar);
final String addstr = String.valueOf(cArr);
return (postpend) ? s.concat(addstr) : addstr.concat(s);
}
@@ -798,4 +822,183 @@ public static If both segment are off-heap, they both must have the same starting address and the same size. For on-heap segments, both segments must be based on or derived from the same array object and neither segment
+ * can be read-only. Returns false if either argument is null; If aligned is true, the returned MemorySegment will be constructed from a long[] array,
+ * and, as a result, it will have a memory alignment of 8 bytes.
+ * If the requested capacity is not exactly divisible by eight, the returned size
+ * will be rolled up to the next multiple of eight bytes. If aligned is false, the returned MemorySegment will be constructed from a byte[] array,
+ * and have a memory alignment of 1 byte.
+ *
+ * @param capacityBytes The new capacity being requested. It must not be negative and cannot exceed Integer.MAX_VALUE.
+ * @param aligned if true, the new heap segment will have an alignment of 8 bytes, otherwise the alignment will be 1 byte.
+ * @return a new MemorySegment with the requested capacity and alignment.
+ */
+ public static MemorySegment alignedHeapSegment(final int capacityBytes, final boolean aligned) {
+ if (aligned) {
+ final int lenLongs = capacityBytes >>> 3;
+ final long[] array = ((capacityBytes & 0x7) == 0)
+ ? new long[lenLongs]
+ : new long[lenLongs + 1];
+ return MemorySegment.ofArray(array);
+ }
+ return MemorySegment.ofArray(new byte[capacityBytes]);
+ }
+
+ /**
+ * Sets the bits defined by the bitMask
+ * @param seg the given MemorySegment
+ * @param offsetBytes offset bytes relative to this MemorySegment start
+ * @param bitMask the bits set to one will be set
+ */
+ public static void setBits(final MemorySegment seg, final long offsetBytes, final byte bitMask) {
+ final byte b = seg.get(JAVA_BYTE, offsetBytes);
+ seg.set(JAVA_BYTE, offsetBytes, (byte)(b | bitMask));
+ }
+
+ /**
+ * Computes and checks the 16-bit seed hash from the given long seed.
+ * The seed hash may not be zero in order to maintain compatibility with older serialized
+ * versions that did not have this concept.
+ * @param seed See Update Hash Seed
+ * @return the seed hash.
+ */
+ public static short computeSeedHash(final long seed) {
+ final long[] seedArr = {seed};
+ final short seedHash = (short)(hash(seedArr, 0L)[0] & 0xFFFFL);
+ if (seedHash == 0) {
+ throw new SketchesArgumentException(
+ "The given seed: " + seed + " produced a seedHash of zero. "
+ + "You must choose a different seed.");
+ }
+ return seedHash;
+ }
+
+ /**
+ * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException.
+ * @param seedHashA the seedHash A
+ * @param seedHashB the seedHash B
+ * @return seedHashA if they are equal
+ */
+ public static short checkSeedHashes(final short seedHashA, final short seedHashB) {
+ if (seedHashA != seedHashB) {
+ throw new SketchesArgumentException(
+ "Incompatible Seed Hashes. " + Integer.toHexString(seedHashA & 0XFFFF)
+ + ", " + Integer.toHexString(seedHashB & 0XFFFF));
+ }
+ return seedHashA;
+ }
+
}
diff --git a/src/main/java/org/apache/datasketches/cpc/BitMatrix.java b/src/main/java/org/apache/datasketches/cpc/BitMatrix.java
index 419640a4f..ecf27b70f 100644
--- a/src/main/java/org/apache/datasketches/cpc/BitMatrix.java
+++ b/src/main/java/org/apache/datasketches/cpc/BitMatrix.java
@@ -23,7 +23,7 @@
import java.util.Arrays;
-import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.common.Util;
/**
* Used only in test.
@@ -38,7 +38,7 @@ class BitMatrix {
private boolean numCouponsInvalid; //only used if we allowed merges
BitMatrix(final int lgK) {
- this(lgK, ThetaUtil.DEFAULT_UPDATE_SEED);
+ this(lgK, Util.DEFAULT_UPDATE_SEED);
}
BitMatrix(final int lgK, final long seed) {
diff --git a/src/main/java/org/apache/datasketches/cpc/CompressedState.java b/src/main/java/org/apache/datasketches/cpc/CompressedState.java
index 6ea6fde4b..b88a71812 100644
--- a/src/main/java/org/apache/datasketches/cpc/CompressedState.java
+++ b/src/main/java/org/apache/datasketches/cpc/CompressedState.java
@@ -47,9 +47,9 @@
import static org.apache.datasketches.cpc.PreambleUtil.putSparseHybridMerged;
import static org.apache.datasketches.cpc.RuntimeAsserts.rtAssert;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.thetacommon.ThetaUtil;
/**
* @author Lee Rhodes
@@ -82,7 +82,7 @@ private CompressedState(final int lgK, final short seedHash) {
}
static CompressedState compress(final CpcSketch source) {
- final short seedHash = ThetaUtil.computeSeedHash(source.seed);
+ final short seedHash = Util.computeSeedHash(source.seed);
final CompressedState target = new CompressedState(source.lgK, seedHash);
target.fiCol = source.fiCol;
target.mergeFlag = source.mergeFlag;
diff --git a/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java b/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java
index 4f0a93352..f34054d48 100644
--- a/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java
+++ b/src/main/java/org/apache/datasketches/cpc/CompressionCharacterization.java
@@ -31,9 +31,9 @@
import java.io.PrintWriter;
import org.apache.datasketches.common.SuppressFBWarnings;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.thetacommon.ThetaUtil;
/**
* This code is used both by unit tests, for short running tests,
@@ -222,7 +222,7 @@ private void doTrialsAtLgKAtN(final int lgK, final long n, final int totalTrials
for (int trial = 0; trial < trialsPerWave; trial++) {
final CompressedState state = compressedStates2[trial];
CpcSketch uncSk = null;
- uncSk = CpcSketch.uncompress(state, ThetaUtil.DEFAULT_UPDATE_SEED);
+ uncSk = CpcSketch.uncompress(state, Util.DEFAULT_UPDATE_SEED);
unCompressedSketches[trial] = uncSk;
}
diff --git a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java
index 4ed89dd19..a87fd1448 100644
--- a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java
+++ b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java
@@ -34,9 +34,9 @@
import java.util.Arrays;
import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.thetacommon.ThetaUtil;
/**
* This is a unique-counting sketch that implements the
@@ -89,7 +89,7 @@ public final class CpcSketch {
* Constructor with default log_base2 of k
*/
public CpcSketch() {
- this(DEFAULT_LG_K, ThetaUtil.DEFAULT_UPDATE_SEED);
+ this(DEFAULT_LG_K, Util.DEFAULT_UPDATE_SEED);
}
/**
@@ -97,7 +97,7 @@ public CpcSketch() {
* @param lgK the given log_base2 of k
*/
public CpcSketch(final int lgK) {
- this(lgK, ThetaUtil.DEFAULT_UPDATE_SEED);
+ this(lgK, Util.DEFAULT_UPDATE_SEED);
}
/**
@@ -233,7 +233,7 @@ public double getUpperBound(final int kappa) {
* @return the given Memory as a CpcSketch on the Java heap.
*/
public static CpcSketch heapify(final Memory mem) {
- return heapify(mem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ return heapify(mem, Util.DEFAULT_UPDATE_SEED);
}
/**
@@ -242,7 +242,7 @@ public static CpcSketch heapify(final Memory mem) {
* @return the given byte array as a CpcSketch on the Java heap.
*/
public static CpcSketch heapify(final byte[] byteArray) {
- return heapify(byteArray, ThetaUtil.DEFAULT_UPDATE_SEED);
+ return heapify(byteArray, Util.DEFAULT_UPDATE_SEED);
}
/**
@@ -662,7 +662,7 @@ else if (col < (sketch.windowOffset + 8)) { // track the 8 bits inside the windo
//also used in test
static CpcSketch uncompress(final CompressedState source, final long seed) {
- ThetaUtil.checkSeedHashes(ThetaUtil.computeSeedHash(seed), source.seedHash);
+ Util.checkSeedHashes(Util.computeSeedHash(seed), source.seedHash);
final CpcSketch sketch = new CpcSketch(source.lgK, seed);
sketch.numCoupons = source.numCoupons;
sketch.windowOffset = source.getWindowOffset();
@@ -723,7 +723,7 @@ public String toString() {
*/
public String toString(final boolean detail) {
final int numPairs = (pairTable == null) ? 0 : pairTable.getNumPairs();
- final int seedHash = Short.toUnsignedInt(ThetaUtil.computeSeedHash(seed));
+ final int seedHash = Short.toUnsignedInt(Util.computeSeedHash(seed));
final double errConst = mergeFlag ? log(2) : sqrt(log(2) / 2.0);
final double rse = errConst / Math.sqrt(1 << lgK);
final StringBuilder sb = new StringBuilder();
diff --git a/src/main/java/org/apache/datasketches/cpc/CpcUnion.java b/src/main/java/org/apache/datasketches/cpc/CpcUnion.java
index 4b944c00f..8aeb48bbf 100644
--- a/src/main/java/org/apache/datasketches/cpc/CpcUnion.java
+++ b/src/main/java/org/apache/datasketches/cpc/CpcUnion.java
@@ -27,7 +27,7 @@
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SketchesStateException;
-import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.common.Util;
/*
* The merging logic is somewhat involved, so it will be summarized here.
@@ -102,7 +102,7 @@ public class CpcUnion {
* Construct this unioning object with the default LgK and the default update seed.
*/
public CpcUnion() {
- this(CpcSketch.DEFAULT_LG_K, ThetaUtil.DEFAULT_UPDATE_SEED);
+ this(CpcSketch.DEFAULT_LG_K, Util.DEFAULT_UPDATE_SEED);
}
/**
@@ -110,7 +110,7 @@ public CpcUnion() {
* @param lgK The given log2 of K.
*/
public CpcUnion(final int lgK) {
- this(lgK, ThetaUtil.DEFAULT_UPDATE_SEED);
+ this(lgK, Util.DEFAULT_UPDATE_SEED);
}
/**
diff --git a/src/main/java/org/apache/datasketches/fdt/FdtSketch.java b/src/main/java/org/apache/datasketches/fdt/FdtSketch.java
index e4cac32a1..9dc6aecca 100644
--- a/src/main/java/org/apache/datasketches/fdt/FdtSketch.java
+++ b/src/main/java/org/apache/datasketches/fdt/FdtSketch.java
@@ -19,10 +19,10 @@
package org.apache.datasketches.fdt;
+import java.lang.foreign.MemorySegment;
import java.util.List;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.strings.ArrayOfStringsSketch;
@@ -59,14 +59,14 @@ public FdtSketch(final int lgK) {
/**
* Used by deserialization.
- * @param mem the image of a FdtSketch
+ * @param seg the image of a FdtSketch
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
- FdtSketch(final Memory mem) {
- super(mem);
+ FdtSketch(final MemorySegment seg) {
+ super(seg);
}
/**
diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
index 7c166a29d..a56a5eeef 100644
--- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
@@ -26,11 +26,11 @@
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.common.SketchesStateException;
+import org.apache.datasketches.hash.XxHash;
import org.apache.datasketches.memory.Buffer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.memory.XxHash;
/**
* A Bloom filter is a data structure that can be used for probabilistic
diff --git a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java
index 355724e2e..67e946b01 100644
--- a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java
+++ b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeItemHashMap.java
@@ -25,7 +25,7 @@
import java.lang.reflect.Array;
-import org.apache.datasketches.thetacommon.QuickSelect;
+import org.apache.datasketches.common.QuickSelect;
/**
* Implements a linear-probing based hash map of (key, value) pairs and is distinguished by a
diff --git a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java
index dd33589d4..358f85bce 100644
--- a/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java
+++ b/src/main/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMap.java
@@ -24,8 +24,8 @@
import static org.apache.datasketches.common.Util.INVERSE_GOLDEN;
import static org.apache.datasketches.frequencies.Util.hash;
+import org.apache.datasketches.common.QuickSelect;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.thetacommon.QuickSelect;
/**
* Implements a linear-probing based hash map of (key, value) pairs and is distinguished by a
diff --git a/src/main/java/org/apache/datasketches/hash/XxHash.java b/src/main/java/org/apache/datasketches/hash/XxHash.java
index a93d4d348..2185471a8 100644
--- a/src/main/java/org/apache/datasketches/hash/XxHash.java
+++ b/src/main/java/org/apache/datasketches/hash/XxHash.java
@@ -19,41 +19,187 @@
package org.apache.datasketches.hash;
-import org.apache.datasketches.memory.Memory;
+import static org.apache.datasketches.memory.internal.XxHash64.hash;
+import static org.apache.datasketches.memory.internal.XxHash64.hashBytes;
+import static org.apache.datasketches.memory.internal.XxHash64.hashChars;
+import static org.apache.datasketches.memory.internal.XxHash64.hashDoubles;
+import static org.apache.datasketches.memory.internal.XxHash64.hashFloats;
+import static org.apache.datasketches.memory.internal.XxHash64.hashInts;
+import static org.apache.datasketches.memory.internal.XxHash64.hashLongs;
+import static org.apache.datasketches.memory.internal.XxHash64.hashShorts;
+
+import org.apache.datasketches.memory.internal.XxHash64;
/**
* The XxHash is a fast, non-cryptographic, 64-bit hash function that has
* excellent avalanche and 2-way bit independence properties.
+ * This java version adapted the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
+ * referenced below as inspiration.
+ *
+ * The C++ source repository:
+ *
+ * https://github.com/Cyan4973/xxHash. It has a BSD 2-Clause License:
+ *
+ * http://www.opensource.org/licenses/bsd-license.php. See LICENSE.
*
- * This class wraps the
- * Memory Component XxHash
- * implementation.
+ * Portions of this code were adapted from
+ *
+ * OpenHFT/Zero-Allocation-Hashing, which has an Apache 2 license as does this site. See LICENSE.
*
* @author Lee Rhodes
*/
-public class XxHash {
+public final class XxHash {
+
+ private XxHash() { /* singleton */ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetBytes starting at this offset
+ * @param lengthBytes continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashByteArr(
+ final byte[] arr,
+ final int offsetBytes,
+ final int lengthBytes,
+ final long seed) {
+ return hashBytes(arr, offsetBytes, lengthBytes, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetShorts starting at this offset
+ * @param lengthShorts continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashShortArr(
+ final short[] arr,
+ final int offsetShorts,
+ final int lengthShorts,
+ final long seed) {
+ return hashShorts(arr, offsetShorts, lengthShorts, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetChars starting at this offset
+ * @param lengthChars continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashCharArr(
+ final char[] arr,
+ final int offsetChars,
+ final int lengthChars,
+ final long seed) {
+ return hashChars(arr, offsetChars, lengthChars, seed);
+ }
/**
- * Compute the hash of the given Memory object.
- * @param mem The given Memory object
- * @param offsetBytes Starting at this offset in bytes
- * @param lengthBytes Continuing for this number of bytes
- * @param seed use this seed for the hash function
- * @return return the resulting 64-bit hash value.
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetInts starting at this offset
+ * @param lengthInts continuing for this length
+ * @param seed the given seed
+ * @return the hash
*/
- public static long hash(final Memory mem, final long offsetBytes, final long lengthBytes,
+ public static long hashIntArr(
+ final int[] arr,
+ final int offsetInts,
+ final int lengthInts,
final long seed) {
- return mem.xxHash64(offsetBytes, lengthBytes, seed);
+ return hashInts(arr, offsetInts, lengthInts, seed);
}
/**
- * Returns a 64-bit hash.
- * @param in a long
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetLongs starting at this offset
+ * @param lengthLongs continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashLongArr(
+ final long[] arr,
+ final int offsetLongs,
+ final int lengthLongs,
+ final long seed) {
+ return hashLongs(arr, offsetLongs, lengthLongs, seed);
+ }
+
+ /**
+ * Returns a 64-bit hash from a single long. This method has been optimized for speed when only
+ * a single hash of a long is required.
+ * @param in A long.
* @param seed A long valued seed.
+ * @return the hash.
+ */
+ public static long hashLong(
+ final long in,
+ final long seed) {
+ return hash(in, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetFloats starting at this offset
+ * @param lengthFloats continuing for this length
+ * @param seed the given seed
* @return the hash
*/
- public static long hash(final long in, final long seed) {
- return org.apache.datasketches.memory.XxHash.hashLong(in, seed);
+ public static long hashFloatArr(
+ final float[] arr,
+ final int offsetFloats,
+ final int lengthFloats,
+ final long seed) {
+ return hashFloats(arr, offsetFloats, lengthFloats, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param arr the given array
+ * @param offsetDoubles starting at this offset
+ * @param lengthDoubles continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashDoubleArr(
+ final double[] arr,
+ final int offsetDoubles,
+ final int lengthDoubles,
+ final long seed) {
+ return hashDoubles(arr, offsetDoubles, lengthDoubles, seed);
+ }
+
+ /**
+ * Hash the given arr starting at the given offset and continuing for the given length using the
+ * given seed.
+ * @param str the given string
+ * @param offsetChars starting at this offset
+ * @param lengthChars continuing for this length
+ * @param seed the given seed
+ * @return the hash
+ */
+ public static long hashString(
+ final String str,
+ final int offsetChars,
+ final int lengthChars,
+ final long seed) {
+ return XxHash64.hashString(str, offsetChars, lengthChars, seed);
}
}
+
diff --git a/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java b/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java
index c602948a8..ad21fc5ae 100644
--- a/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java
+++ b/src/main/java/org/apache/datasketches/hll/BaseHllSketch.java
@@ -28,8 +28,8 @@
import java.nio.ByteBuffer;
+import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.thetacommon.ThetaUtil;
/**
* Although this class is package-private, it provides a single place to define and document
@@ -299,7 +299,7 @@ public abstract String toString(boolean summary, boolean detail, boolean auxDeta
*/
public void update(final long datum) {
final long[] data = { datum };
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -314,7 +314,7 @@ public void update(final long datum) {
public void update(final double datum) {
final double d = (datum == 0.0) ? 0.0 : datum; // canonicalize -0.0, 0.0
final long[] data = { Double.doubleToLongBits(d) };// canonicalize all NaN & +/- infinity forms
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -334,7 +334,7 @@ public void update(final double datum) {
public void update(final String datum) {
if ((datum == null) || datum.isEmpty()) { return; }
final byte[] data = datum.getBytes(UTF_8);
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -351,7 +351,7 @@ public void update(final String datum) {
*/
public void update(final ByteBuffer data) {
if ((data == null) || (data.remaining() == 0)) { return; }
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -362,7 +362,7 @@ public void update(final ByteBuffer data) {
*/
public void update(final byte[] data) {
if ((data == null) || (data.length == 0)) { return; }
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -376,7 +376,7 @@ public void update(final byte[] data) {
*/
public void update(final char[] data) {
if ((data == null) || (data.length == 0)) { return; }
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -387,7 +387,7 @@ public void update(final char[] data) {
*/
public void update(final int[] data) {
if ((data == null) || (data.length == 0)) { return; }
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
/**
@@ -398,7 +398,7 @@ public void update(final int[] data) {
*/
public void update(final long[] data) {
if ((data == null) || (data.length == 0)) { return; }
- couponUpdate(coupon(hash(data, ThetaUtil.DEFAULT_UPDATE_SEED)));
+ couponUpdate(coupon(hash(data, Util.DEFAULT_UPDATE_SEED)));
}
private static final int coupon(final long[] hash) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
index 1427f6279..47bad1c67 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
@@ -60,7 +60,7 @@ public interface DoublesSortedView extends SortedView {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
@@ -129,7 +129,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit)
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
index eec699d94..0667a6748 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
@@ -60,7 +60,7 @@ public interface FloatsSortedView extends SortedView {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
QuantilesUtil.checkFloatsSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
@@ -129,7 +129,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit)
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(float[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
index e7e3521c7..eaeceeb92 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
@@ -60,7 +60,7 @@ public interface LongsSortedView extends SortedView {
* @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getCDF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) {
QuantilesUtil.checkLongsSplitPointsOrder(splitPoints);
final int len = splitPoints.length + 1;
final double[] buckets = new double[len];
@@ -129,7 +129,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
* @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
* @throws IllegalArgumentException if sketch is empty.
*/
- default double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
+ default double[] getPMF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) {
final double[] buckets = getCDF(splitPoints, searchCrit);
final int len = buckets.length;
for (int i = len; i-- > 1; ) {
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
index 5672c2a02..82b293b3a 100644
--- a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
+++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
@@ -53,7 +53,7 @@ public interface PartitioningFeature This a stateless operation and has no impact on the internal state of this operator.
* Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)},
* {@link #notB(Sketch)}, {@link #getResult(boolean)}, or
- * {@link #getResult(boolean, WritableMemory, boolean)} methods. If either argument is null an exception is thrown. This a stateless operation and has no impact on the internal state of this operator.
* Thus, this is not an accumulating update and does not interact with the {@link #setA(Sketch)},
* {@link #notB(Sketch)}, {@link #getResult(boolean)}, or
- * {@link #getResult(boolean, WritableMemory, boolean)} methods. If either argument is null an exception is thrown. The resulting sketch will not retain any link to the source Memory and all of its data will be
+ * The resulting sketch will not retain any link to the source MemorySegment and all of its data will be
* copied to the heap CompactSketch. This method assumes that the sketch image was created with the correct hash seed, so it is not checked.
@@ -71,65 +74,63 @@ public abstract class CompactSketch extends Sketch {
* However, Serial Version 1 sketch images do not have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED. The resulting sketch will not retain any link to the source Memory and all of its data will be
+ * The resulting sketch will not retain any link to the source MemorySegment and all of its data will be
* copied to the heap CompactSketch. This method checks if the given expectedSeed was used to create the source Memory image.
+ * This method checks if the given expectedSeed was used to create the source MemorySegment image.
* However, SerialVersion 1 sketch images cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed. Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
- * This is actually faster and consumes less overall memory.
This method assumes that the sketch image was created with the correct hash seed, so it is not checked. * However, Serial Version 1 sketch images do not have a seedHash field, * so the resulting on-heap CompactSketch will be given the hash of the DEFAULT_UPDATE_SEED.
* - * @param srcMem an image of a Sketch. - * See Memory. - * @return a CompactSketch backed by the given Memory except as above. + * @param srcSeg an image of a Sketch. + * @return a CompactSketch backed by the given MemorySegment except as above. */ - public static CompactSketch wrap(final Memory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED, false); + public static CompactSketch wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED, false); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -166,44 +166,43 @@ public static CompactSketch wrap(final Memory srcMem) { * *Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.
+ * This is actually faster and consumes less overall space. * - *This method checks if the given expectedSeed was used to create the source Memory image. + *
This method checks if the given expectedSeed was used to create the source MemorySegment image. * However, SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.
* - * @param srcMem an image of a Sketch that was created using the given expectedSeed. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a CompactSketch backed by the given Memory except as above. + * @return a CompactSketch backed by the given MemorySegment except as above. */ - public static CompactSketch wrap(final Memory srcMem, final long expectedSeed) { - return wrap(srcMem, expectedSeed, true); + public static CompactSketch wrap(final MemorySegment srcSeg, final long expectedSeed) { + return wrap(srcSeg, expectedSeed, true); } - private static CompactSketch wrap(final Memory srcMem, final long seed, final boolean enforceSeed) { - final int serVer = extractSerVer(srcMem); - final int familyID = extractFamilyID(srcMem); + private static CompactSketch wrap(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int serVer = extractSerVer(srcSeg); + final int familyID = extractFamilyID(srcSeg); final Family family = Family.idToFamily(familyID); if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { - return DirectCompactCompressedSketch.wrapInstance(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return DirectCompactCompressedSketch.wrapInstance(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } else if (serVer == 3) { - if (PreambleUtil.isEmptyFlag(srcMem)) { - return EmptyCompactSketch.getHeapInstance(srcMem); + if (PreambleUtil.isEmptyFlag(srcSeg)) { + return EmptyCompactSketch.getHeapInstance(srcSeg); } - if (otherCheckForSingleItem(srcMem)) { - return SingleItemSketch.heapify(srcMem, enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + if (otherCheckForSingleItem(srcSeg)) { + return SingleItemSketch.heapify(srcSeg, enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } //not empty & not singleItem - final int flags = extractFlags(srcMem); + final int flags = extractFlags(srcSeg); final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; if (!compactFlag) { throw new SketchesArgumentException( @@ -214,22 +213,22 @@ else if (serVer == 3) { throw new SketchesArgumentException( "Corrupted: COMPACT family sketch image must have Read-Only flag set"); } - return DirectCompactSketch.wrapInstance(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return DirectCompactSketch.wrapInstance(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } //end of serVer 3 else if (serVer == 1) { - return ForwardCompatibility.heapify1to3(srcMem, seedHash); + return ForwardCompatibility.heapify1to3(srcSeg, seedHash); } else if (serVer == 2) { - return ForwardCompatibility.heapify2to3(srcMem, - enforceSeed ? seedHash : (short) extractSeedHash(srcMem)); + return ForwardCompatibility.heapify2to3(srcSeg, + enforceSeed ? seedHash : (short) extractSeedHash(srcSeg)); } throw new SketchesArgumentException( "Corrupted: Serialization Version " + serVer + " not recognized."); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -240,23 +239,22 @@ else if (serVer == 2) { * *Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.
+ * This is actually faster and consumes less overall space. * - *This method checks if the DEFAULT_UPDATE_SEED was used to create the source Memory image. + *
This method checks if the DEFAULT_UPDATE_SEED was used to create the source MemorySegment image. * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.
* * @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED. - * See Memory * - * @return a CompactSketch backed by the given Memory except as above. + * @return a CompactSketch backed by the given MemorySegment except as above. */ public static CompactSketch wrap(final byte[] bytes) { - return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false); + return wrap(bytes, Util.DEFAULT_UPDATE_SEED, false); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -267,17 +265,16 @@ public static CompactSketch wrap(final byte[] bytes) { * *Wrapping any subclass of this class that is empty or contains only a single item will * result in heapified forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.
+ * This is actually faster and consumes less overall space. * - *This method checks if the given expectedSeed was used to create the source Memory image. + *
This method checks if the given expectedSeed was used to create the source MemorySegment image. * Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field, * so the resulting heapified CompactSketch will be given the hash of the expectedSeed.
* * @param bytes a byte array image of a Sketch that was created using the given expectedSeed. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a CompactSketch backed by the given Memory except as above. + * @return a CompactSketch backed by the given MemorySegment except as above. */ public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) { return wrap(bytes, expectedSeed, true); @@ -290,17 +287,17 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo if (family != Family.COMPACT) { throw new IllegalArgumentException("Corrupted: " + family + " is not Compact!"); } - final short seedHash = ThetaUtil.computeSeedHash(seed); + final short seedHash = Util.computeSeedHash(seed); if (serVer == 4) { return WrappedCompactCompressedSketch.wrapInstance(bytes, seedHash); } else if (serVer == 3) { final int flags = bytes[FLAGS_BYTE]; if ((flags & EMPTY_FLAG_MASK) > 0) { - return EmptyCompactSketch.getHeapInstance(Memory.wrap(bytes)); + return EmptyCompactSketch.getHeapInstance(MemorySegment.ofArray(bytes)); } final int preLongs = bytes[PREAMBLE_LONGS_BYTE]; if (otherCheckForSingleItem(preLongs, serVer, familyId, flags)) { - return SingleItemSketch.heapify(Memory.wrap(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); + return SingleItemSketch.heapify(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); } //not empty & not singleItem final boolean compactFlag = (flags & COMPACT_FLAG_MASK) > 0; @@ -316,9 +313,9 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo return WrappedCompactSketch.wrapInstance(bytes, enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); } else if (serVer == 1) { - return ForwardCompatibility.heapify1to3(Memory.wrap(bytes), seedHash); + return ForwardCompatibility.heapify1to3(MemorySegment.ofArray(bytes), seedHash); } else if (serVer == 2) { - return ForwardCompatibility.heapify2to3(Memory.wrap(bytes), + return ForwardCompatibility.heapify2to3(MemorySegment.ofArray(bytes), enforceSeed ? seedHash : getShortLE(bytes, SEED_HASH_SHORT)); } throw new SketchesArgumentException( @@ -328,7 +325,7 @@ private static CompactSketch wrap(final byte[] bytes, final long seed, final boo //Sketch Overrides @Override - public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem); + public abstract CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg); @Override public int getCompactBytes() { @@ -345,11 +342,26 @@ public Family getFamily() { return Family.COMPACT; } + @Override + public boolean hasMemorySegment() { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).hasMemorySegment()); + } + @Override public boolean isCompact() { return true; } + @Override + public boolean isDirect() { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isDirect()); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return (this instanceof DirectCompactSketch && ((DirectCompactSketch)this).isSameResource(that)); + } + @Override public double getEstimate() { return Sketch.estimate(getThetaLong(), getRetainedEntries()); @@ -390,23 +402,23 @@ private byte[] toByteArrayV4() { final int sizeBytes = preambleLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(compressedBits); final byte[] bytes = new byte[sizeBytes]; - final WritableMemory mem = WritableMemory.writableWrap(bytes); + final MemorySegment wseg = MemorySegment.ofArray(bytes); int offsetBytes = 0; - mem.putByte(offsetBytes++, (byte) preambleLongs); - mem.putByte(offsetBytes++, (byte) 4); // to do: add constant - mem.putByte(offsetBytes++, (byte) Family.COMPACT.getID()); - mem.putByte(offsetBytes++, (byte) entryBits); - mem.putByte(offsetBytes++, (byte) numEntriesBytes); - mem.putByte(offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK)); - mem.putShort(offsetBytes, getSeedHash()); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) preambleLongs); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) 4); // to do: add constant + wseg.set(JAVA_BYTE, offsetBytes++, (byte) Family.COMPACT.getID()); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) entryBits); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) numEntriesBytes); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) (COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | ORDERED_FLAG_MASK)); + wseg.set(JAVA_SHORT_UNALIGNED, offsetBytes, getSeedHash()); offsetBytes += Short.BYTES; if (isEstimationMode()) { - mem.putLong(offsetBytes, getThetaLong()); + wseg.set(JAVA_LONG_UNALIGNED, offsetBytes, getThetaLong()); offsetBytes += Long.BYTES; } int numEntries = getRetainedEntries(); for (int i = 0; i < numEntriesBytes; i++) { - mem.putByte(offsetBytes++, (byte) (numEntries & 0xff)); + wseg.set(JAVA_BYTE, offsetBytes++, (byte) (numEntries & 0xff)); numEntries >>>= 8; } long previous = 0; @@ -434,32 +446,32 @@ private byte[] toByteArrayV4() { return bytes; } - private static CompactSketch heapifyV4(final Memory srcMem, final long seed, final boolean enforceSeed) { - final int preLongs = extractPreLongs(srcMem); - final int entryBits = extractEntryBitsV4(srcMem); - final int numEntriesBytes = extractNumEntriesBytesV4(srcMem); - final short seedHash = (short) extractSeedHash(srcMem); - if (enforceSeed) { PreambleUtil.checkMemorySeedHash(srcMem, seed); } + private static CompactSketch heapifyV4(final MemorySegment srcSeg, final long seed, final boolean enforceSeed) { + final int preLongs = extractPreLongs(srcSeg); + final int entryBits = extractEntryBitsV4(srcSeg); + final int numEntriesBytes = extractNumEntriesBytesV4(srcSeg); + final short seedHash = (short) extractSeedHash(srcSeg); + if (enforceSeed) { PreambleUtil.checkSegmentSeedHash(srcSeg, seed); } int offsetBytes = 8; long theta = Long.MAX_VALUE; if (preLongs > 1) { - theta = extractThetaLongV4(srcMem); + theta = extractThetaLongV4(srcSeg); offsetBytes += Long.BYTES; } int numEntries = 0; for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(srcMem.getByte(offsetBytes++)) << (i << 3); + numEntries |= Byte.toUnsignedInt(srcSeg.get(JAVA_BYTE, offsetBytes++)) << (i << 3); } final long[] entries = new long[numEntries]; final byte[] bytes = new byte[entryBits]; // temporary buffer for unpacking int i; for (i = 0; i + 7 < numEntries; i += 8) { - srcMem.getByteArray(offsetBytes, bytes, 0, entryBits); + MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, entryBits); BitPacking.unpackBitsBlock8(entries, i, bytes, 0, entryBits); offsetBytes += entryBits; } if (i < numEntries) { - srcMem.getByteArray(offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits)); + MemorySegment.copy(srcSeg, JAVA_BYTE, offsetBytes, bytes, 0, wholeBytesToHoldBits((numEntries - i) * entryBits)); int offsetBits = 0; offsetBytes = 0; for (; i < numEntries; i++) { diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java b/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java index 9ae9c9c57..c3d8bfaa5 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentBackgroundThetaPropagation.java @@ -29,7 +29,7 @@ * * @author eshcar */ -class ConcurrentBackgroundThetaPropagation implements Runnable { +final class ConcurrentBackgroundThetaPropagation implements Runnable { // Shared sketch to absorb the data private final ConcurrentSharedThetaSketch sharedThetaSketch; diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java index dbdedebd5..d151ffdf5 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketch.java @@ -19,15 +19,16 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import java.lang.foreign.MemorySegment; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.memory.WritableMemory; /** * A concurrent shared sketch that is based on DirectQuickSelectSketch. @@ -63,18 +64,18 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch private volatile long epoch_; /** - * Construct a new sketch instance and initialize the given Memory as its backing store. + * Construct a new sketch instance and initialize the given MemorySegment as its backing store. * * @param lgNomLongs See lgNomLongs. * @param seed See Update Hash Seed. * @param maxConcurrencyError the max error value including error induced by concurrency. - * @param dstMem the given Memory object destination. It cannot be null. + * @param dstSeg the given MemorySegment object destination. It cannot be null. */ ConcurrentDirectQuickSelectSketch(final int lgNomLongs, final long seed, - final double maxConcurrencyError, final WritableMemory dstMem) { + final double maxConcurrencyError, final MemorySegment dstSeg) { super(lgNomLongs, seed, 1.0F, //p ResizeFactor.X1, //rf, - null, dstMem, false); //unionGadget + dstSeg, false); //unionGadget volatileThetaLong_ = Long.MAX_VALUE; volatileEstimate_ = 0; @@ -86,11 +87,10 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch } ConcurrentDirectQuickSelectSketch(final UpdateSketch sketch, final long seed, - final double maxConcurrencyError, final WritableMemory dstMem) { + final double maxConcurrencyError, final MemorySegment dstSeg) { super(sketch.getLgNomLongs(), seed, 1.0F, //p ResizeFactor.X1, //rf, - null, //mem Req Svr - dstMem, + dstSeg, false); //unionGadget exactLimit_ = ConcurrentSharedThetaSketch.computeExactLimit(1L << getLgNomLongs(), @@ -101,7 +101,7 @@ final class ConcurrentDirectQuickSelectSketch extends DirectQuickSelectSketch for (final long hashIn : sketch.getCache()) { propagate(hashIn); } - wmem_.putLong(THETA_LONG, sketch.getThetaLong()); + wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, sketch.getThetaLong()); updateVolatileTheta(); updateEstimationSnapshot(); } diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java index e7b3ddaac..d3b706aa8 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java @@ -23,6 +23,7 @@ import static org.apache.datasketches.theta.UpdateReturnState.ConcurrentPropagated; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; +import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.datasketches.common.ResizeFactor; @@ -148,8 +149,8 @@ public double getUpperBound(final int numStdDev) { } @Override - public boolean hasMemory() { - return shared.hasMemory(); + public boolean hasMemorySegment() { + return shared.hasMemorySegment(); } @Override @@ -167,6 +168,11 @@ public boolean isEstimationMode() { return shared.isEstimationMode(); } + @Override + public boolean isSameResource(final MemorySegment that) { + return shared.isSameResource(that); + } + //End of proxies @Override diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java index cdc843f8b..3dda0c31f 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java @@ -19,10 +19,10 @@ package org.apache.datasketches.theta; +import java.lang.foreign.MemorySegment; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.datasketches.common.MemoryStatus; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.MemorySegmentStatus; /** * An internal interface to define the API of a concurrent shared theta sketch. @@ -31,12 +31,12 @@ * * @author eshcar */ -interface ConcurrentSharedThetaSketch extends MemoryStatus { +interface ConcurrentSharedThetaSketch extends MemorySegmentStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; - static long computeExactLimit(long k, double error) { + static long computeExactLimit(final long k, final double error) { return 2 * Math.min(k, (long) Math.ceil(1.0 / Math.pow(Math.max(error,MIN_ERROR), 2.0))); } @@ -150,7 +150,7 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s CompactSketch compact(); - CompactSketch compact(boolean ordered, WritableMemory wmem); + CompactSketch compact(boolean ordered, MemorySegment wseg); UpdateSketch rebuild(); diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java index 64c0fafd4..2bf154215 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactCompressedSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractEntryBitsV4; import static org.apache.datasketches.theta.PreambleUtil.extractNumEntriesBytesV4; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; @@ -26,57 +27,57 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLongV4; import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Util; /** * An off-heap (Direct), compact, compressed, read-only sketch. It is not empty, not a single item and ordered. * *This sketch can only be associated with a Serialization Version 4 format binary image.
* - *This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *
This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
*/ -class DirectCompactCompressedSketch extends DirectCompactSketch { +final class DirectCompactCompressedSketch extends DirectCompactSketch { /** - * Construct this sketch with the given memory. - * @param mem Read-only Memory object. + * Construct this sketch with the given MemorySegment. + * @param seg Read-only MemorySegment object. */ - DirectCompactCompressedSketch(final Memory mem) { - super(mem); + DirectCompactCompressedSketch(final MemorySegment seg) { + super(seg); } /** - * Wraps the given Memory, which must be a SerVer 4 compressed CompactSketch image. - * Must check the validity of the Memory before calling. - * @param srcMem See Memory + * Wraps the given MemorySegment, which must be a SerVer 4 compressed CompactSketch image. + * Must check the validity of the MemorySegment before calling. + * @param srcSeg The source MemorySegment * @param seedHash The update seedHash. * See Seed Hash. * @return this sketch */ - static DirectCompactCompressedSketch wrapInstance(final Memory srcMem, final short seedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); - return new DirectCompactCompressedSketch(srcMem); + static DirectCompactCompressedSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { + Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + return new DirectCompactCompressedSketch(srcSeg); } //Sketch Overrides @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - if (dstMem != null) { - mem_.copyTo(0, dstMem, 0, getCurrentBytes()); - return new DirectCompactSketch(dstMem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg != null) { + MemorySegment.copy(seg_, 0, dstSeg, 0, getCurrentBytes()); + return new DirectCompactSketch(dstSeg); } - return CompactSketch.heapify(mem_); + return CompactSketch.heapify(seg_); } @Override public int getCurrentBytes() { - final int preLongs = extractPreLongs(mem_); - final int entryBits = extractEntryBitsV4(mem_); - final int numEntriesBytes = extractNumEntriesBytesV4(mem_); + final int preLongs = extractPreLongs(seg_); + final int entryBits = extractEntryBitsV4(seg_); + final int numEntriesBytes = extractNumEntriesBytesV4(seg_); return preLongs * Long.BYTES + numEntriesBytes + wholeBytesToHoldBits(getRetainedEntries() * entryBits); } @@ -88,20 +89,20 @@ public int getRetainedEntries(final boolean valid) { //compact is always valid // number of entries is stored using variable length encoding // most significant bytes with all zeros are not stored // one byte in the preamble has the number of non-zero bytes used - final int preLongs = extractPreLongs(mem_); // if > 1 then the second long has theta - final int numEntriesBytes = extractNumEntriesBytesV4(mem_); + final int preLongs = extractPreLongs(seg_); // if > 1 then the second long has theta + final int numEntriesBytes = extractNumEntriesBytesV4(seg_); int offsetBytes = preLongs > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE; int numEntries = 0; for (int i = 0; i < numEntriesBytes; i++) { - numEntries |= Byte.toUnsignedInt(mem_.getByte(offsetBytes++)) << (i << 3); + numEntries |= Byte.toUnsignedInt(seg_.get(JAVA_BYTE, offsetBytes++)) << (i << 3); } return numEntries; } @Override public long getThetaLong() { - final int preLongs = extractPreLongs(mem_); - return (preLongs > 1) ? extractThetaLongV4(mem_) : Long.MAX_VALUE; + final int preLongs = extractPreLongs(seg_); + return (preLongs > 1) ? extractThetaLongV4(seg_) : Long.MAX_VALUE; } @Override @@ -116,11 +117,11 @@ public boolean isOrdered() { @Override public HashIterator iterator() { - return new MemoryCompactCompressedHashIterator( - mem_, - (extractPreLongs(mem_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) - + extractNumEntriesBytesV4(mem_), - extractEntryBitsV4(mem_), + return new MemorySegmentCompactCompressedHashIterator( + seg_, + (extractPreLongs(seg_) > 1 ? START_PACKED_DATA_ESTIMATION_MODE : START_PACKED_DATA_EXACT_MODE) + + extractNumEntriesBytesV4(seg_), + extractEntryBitsV4(seg_), getRetainedEntries() ); } diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java index 15b03311b..693329110 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java @@ -19,8 +19,10 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.CompactOperations.checkIllegalCurCountAndEmpty; -import static org.apache.datasketches.theta.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta.CompactOperations.segmentToCompact; import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; @@ -29,9 +31,9 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.Util; /** * An off-heap (Direct), compact, read-only sketch. The internal hash array can be either ordered @@ -39,78 +41,78 @@ * *This sketch can only be associated with a Serialization Version 3 format binary image.
* - *This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *
This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
* * @author Lee Rhodes */ class DirectCompactSketch extends CompactSketch { - final Memory mem_; + final MemorySegment seg_; /** - * Construct this sketch with the given memory. - * @param mem Read-only Memory object with the order bit properly set. + * Construct this sketch with the given MemorySegment. + * @param seg Read-only MemorySegment object with the order bit properly set. */ - DirectCompactSketch(final Memory mem) { - mem_ = mem; + DirectCompactSketch(final MemorySegment seg) { + seg_ = seg; } /** - * Wraps the given Memory, which must be a SerVer 3, CompactSketch image. - * Must check the validity of the Memory before calling. The order bit must be set properly. - * @param srcMem See Memory + * Wraps the given MemorySegment, which must be a SerVer 3, CompactSketch image. + * Must check the validity of the MemorySegment before calling. The order bit must be set properly. + * @param srcSeg the given MemorySegment * @param seedHash The update seedHash. * See Seed Hash. * @return this sketch */ - static DirectCompactSketch wrapInstance(final Memory srcMem, final short seedHash) { - ThetaUtil.checkSeedHashes((short) extractSeedHash(srcMem), seedHash); - return new DirectCompactSketch(srcMem); + static DirectCompactSketch wrapInstance(final MemorySegment srcSeg, final short seedHash) { + Util.checkSeedHashes((short) extractSeedHash(srcSeg), seedHash); + return new DirectCompactSketch(srcSeg); } //Sketch Overrides @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - return memoryToCompact(mem_, dstOrdered, dstMem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + return segmentToCompact(seg_, dstOrdered, dstSeg); } @Override public int getCurrentBytes() { - if (otherCheckForSingleItem(mem_)) { return 16; } - final int preLongs = extractPreLongs(mem_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_); + if (otherCheckForSingleItem(seg_)) { return 16; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); return (preLongs + curCount) << 3; } @Override public int getRetainedEntries(final boolean valid) { //compact is always valid - if (otherCheckForSingleItem(mem_)) { return 1; } - final int preLongs = extractPreLongs(mem_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_); + if (otherCheckForSingleItem(seg_)) { return 1; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); return curCount; } @Override public long getThetaLong() { - final int preLongs = extractPreLongs(mem_); - return (preLongs > 2) ? extractThetaLong(mem_) : Long.MAX_VALUE; + final int preLongs = extractPreLongs(seg_); + return (preLongs > 2) ? extractThetaLong(seg_) : Long.MAX_VALUE; } @Override - public boolean hasMemory() { - return mem_ != null; + public boolean hasMemorySegment() { + return seg_ != null && seg_.scope().isAlive(); } @Override public boolean isDirect() { - return hasMemory() ? mem_.isDirect() : false; + return hasMemorySegment() && seg_.isNative(); } @Override public boolean isEmpty() { - final boolean emptyFlag = PreambleUtil.isEmptyFlag(mem_); + final boolean emptyFlag = PreambleUtil.isEmptyFlag(seg_); final long thetaLong = getThetaLong(); final int curCount = getRetainedEntries(true); return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE)); @@ -118,17 +120,18 @@ public boolean isEmpty() { @Override public boolean isOrdered() { - return (extractFlags(mem_) & ORDERED_FLAG_MASK) > 0; + return (extractFlags(seg_) & ORDERED_FLAG_MASK) > 0; } @Override - public boolean isSameResource(final Memory that) { - return hasMemory() ? mem_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(seg_, that); + } @Override public HashIterator iterator() { - return new MemoryHashIterator(mem_, getRetainedEntries(true), getThetaLong()); + return new MemorySegmentHashIterator(seg_, getRetainedEntries(true), getThetaLong()); } @Override @@ -136,7 +139,7 @@ public byte[] toByteArray() { checkIllegalCurCountAndEmpty(isEmpty(), getRetainedEntries()); final int outBytes = getCurrentBytes(); final byte[] byteArrOut = new byte[outBytes]; - mem_.getByteArray(0, byteArrOut, 0, outBytes); + MemorySegment.copy(seg_, JAVA_BYTE, 0, byteArrOut, 0, outBytes); return byteArrOut; } @@ -144,12 +147,12 @@ public byte[] toByteArray() { @Override long[] getCache() { - if (otherCheckForSingleItem(mem_)) { return new long[] { mem_.getLong(8) }; } - final int preLongs = extractPreLongs(mem_); - final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_); + if (otherCheckForSingleItem(seg_)) { return new long[] { seg_.get(JAVA_LONG_UNALIGNED, 8) }; } + final int preLongs = extractPreLongs(seg_); + final int curCount = (preLongs == 1) ? 0 : extractCurCount(seg_); if (curCount > 0) { final long[] cache = new long[curCount]; - mem_.getLongArray(preLongs << 3, cache, 0, curCount); + MemorySegment.copy(seg_, JAVA_LONG_UNALIGNED, preLongs << 3, cache, 0, curCount); return cache; } return new long[0]; @@ -157,21 +160,21 @@ long[] getCache() { @Override int getCompactPreambleLongs() { - return extractPreLongs(mem_); + return extractPreLongs(seg_); } @Override int getCurrentPreambleLongs() { - return extractPreLongs(mem_); + return extractPreLongs(seg_); } @Override - Memory getMemory() { - return mem_; + MemorySegment getMemorySegment() { + return seg_; } @Override short getSeedHash() { - return (short) extractSeedHash(mem_); + return (short) extractSeedHash(seg_); } } diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index af073a5ee..7356d4d5f 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -19,6 +19,10 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; @@ -30,7 +34,7 @@ import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs; import static org.apache.datasketches.theta.PreambleUtil.extractLgNomLongs; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; -import static org.apache.datasketches.theta.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta.PreambleUtil.getSegBytes; import static org.apache.datasketches.theta.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID; import static org.apache.datasketches.theta.PreambleUtil.insertFlags; @@ -53,12 +57,12 @@ import static org.apache.datasketches.theta.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -66,35 +70,31 @@ * The default Theta Sketch using the QuickSelect algorithm. * This subclass implements methods, which affect the state (update, rebuild, reset) * - *This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *
This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
* * @author Lee Rhodes * @author Kevin Lang */ class DirectQuickSelectSketch extends DirectQuickSelectSketchR { - MemoryRequestServer memReqSvr_ = null; //never serialized private DirectQuickSelectSketch( final long seed, - final WritableMemory wmem) { - super(seed, wmem); + final MemorySegment wseg) { + super(seed, wseg); } /** - * Construct a new sketch instance and initialize the given Memory as its backing store. + * Construct a new sketch instance and initialize the given MemorySegment as its backing store. * * @param lgNomLongs See lgNomLongs. * @param seed See Update Hash Seed. * @param p * See Sampling Probability, p - * @param rf Currently internally fixed at 2. Unless dstMem is not configured with a valid - * MemoryRequest, in which case the rf is effectively 1, which is no resizing at all and the - * dstMem must be large enough for a full sketch. + * @param rf Resize Factor * See Resize Factor - * @param memReqSvr the given MemoryRequestServer - * @param dstMem the given Memory object destination. It cannot be null. + * @param dstSeg the given MemorySegment object destination. It cannot be null. * It will be cleared prior to use. * @param unionGadget true if this sketch is implementing the Union gadget function. * Otherwise, it is behaving as a normal QuickSelectSketch. @@ -104,19 +104,17 @@ private DirectQuickSelectSketch( final long seed, final float p, final ResizeFactor rf, - final MemoryRequestServer memReqSvr, - final WritableMemory dstMem, + final MemorySegment dstSeg, final boolean unionGadget) { this( - checkMemSize(lgNomLongs, rf, dstMem, unionGadget), + checkSegSize(lgNomLongs, rf, dstSeg, unionGadget), //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J lgNomLongs, seed, p, rf, - memReqSvr, - dstMem, + dstSeg, unionGadget); } @@ -126,10 +124,9 @@ private DirectQuickSelectSketch( final long seed, final float p, final ResizeFactor rf, - final MemoryRequestServer memReqSvr, - final WritableMemory dstMem, + final MemorySegment dstSeg, final boolean unionGadget) { - super(seed, dstMem); + super(seed, dstSeg); //Choose family, preambleLongs final Family family; final int preambleLongs; @@ -148,86 +145,83 @@ private DirectQuickSelectSketch( //@formatter:off //Build preamble - insertPreLongs(dstMem, preambleLongs); //byte 0 - insertLgResizeFactor(dstMem, lgRF); //byte 0 - insertSerVer(dstMem, SER_VER); //byte 1 - insertFamilyID(dstMem, family.getID()); //byte 2 - insertLgNomLongs(dstMem, lgNomLongs); //byte 3 - insertLgArrLongs(dstMem, lgArrLongs); //byte 4 + insertPreLongs(dstSeg, preambleLongs); //byte 0 + insertLgResizeFactor(dstSeg, lgRF); //byte 0 + insertSerVer(dstSeg, SER_VER); //byte 1 + insertFamilyID(dstSeg, family.getID()); //byte 2 + insertLgNomLongs(dstSeg, lgNomLongs); //byte 3 + insertLgArrLongs(dstSeg, lgArrLongs); //byte 4 //flags: bigEndian = readOnly = compact = ordered = false; empty = true : 00100 = 4 - insertFlags(dstMem, EMPTY_FLAG_MASK); //byte 5 - insertSeedHash(dstMem, ThetaUtil.computeSeedHash(seed)); //bytes 6,7 - insertCurCount(dstMem, 0); //bytes 8-11 - insertP(dstMem, p); //bytes 12-15 + insertFlags(dstSeg, EMPTY_FLAG_MASK); //byte 5 + insertSeedHash(dstSeg, Util.computeSeedHash(seed)); //bytes 6,7 + insertCurCount(dstSeg, 0); //bytes 8-11 + insertP(dstSeg, p); //bytes 12-15 final long thetaLong = (long)(p * LONG_MAX_VALUE_AS_DOUBLE); - insertThetaLong(dstMem, thetaLong); //bytes 16-23 + insertThetaLong(dstSeg, thetaLong); //bytes 16-23 if (unionGadget) { - insertUnionThetaLong(dstMem, thetaLong); + insertUnionThetaLong(dstSeg, thetaLong); } //@formatter:on //clear hash table area - dstMem.clear(preambleLongs << 3, 8 << lgArrLongs); + dstSeg.asSlice(preambleLongs << 3, Long.BYTES << lgArrLongs).fill((byte)0); hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); - memReqSvr_ = memReqSvr; } - private static final boolean checkMemSize( - final int lgNomLongs, final ResizeFactor rf, final Memory dstMem, final boolean unionGadget) { + private static final boolean checkSegSize( + final int lgNomLongs, final ResizeFactor rf, final MemorySegment dstSeg, final boolean unionGadget) { final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); final int lgRF = rf.lg(); final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); - final long curMemCapBytes = dstMem.getCapacity(); - if (curMemCapBytes < minReqBytes) { + final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); + final long curSegCapBytes = dstSeg.byteSize(); + if (curSegCapBytes < minReqBytes) { throw new SketchesArgumentException( - "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); + "MemorySegment capacity is too small: " + curSegCapBytes + " < " + minReqBytes); } return true; } /** - * Wrap a sketch around the given source Memory containing sketch data that originated from + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from * this sketch. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * @param srcSeg The given MemorySegment object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketch writableWrap(final WritableMemory srcMem, final long seed) { - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static DirectQuickSelectSketch writableWrap(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - UpdateSketch.checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); + UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); - if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) { + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { //If incorrect it sets it to X2 which always works. - insertLgResizeFactor(srcMem, ResizeFactor.X2.lg()); + insertLgResizeFactor(srcSeg, ResizeFactor.X2.lg()); } final DirectQuickSelectSketch dqss = - new DirectQuickSelectSketch(seed, srcMem); + new DirectQuickSelectSketch(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } /** - * Fast-wrap a sketch around the given source Memory containing sketch data that originated from - * this sketch. This does NO validity checking of the given Memory. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. This does NO validity checking of the given MemorySegment. + * @param srcSeg The given MemorySegment must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, final long seed) { - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static DirectQuickSelectSketch fastWritableWrap(final MemorySegment srcSeg, final long seed) { + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 final DirectQuickSelectSketch dqss = - new DirectQuickSelectSketch(seed, srcMem); + new DirectQuickSelectSketch(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -239,9 +233,9 @@ static DirectQuickSelectSketch fastWritableWrap(final WritableMemory srcMem, fin @Override public UpdateSketch rebuild() { final int lgNomLongs = getLgNomLongs(); - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; if (getRetainedEntries(true) > (1 << lgNomLongs)) { - quickSelectAndRebuild(wmem_, preambleLongs, lgNomLongs); + quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); } return this; } @@ -253,15 +247,15 @@ public void reset() { //lgArrLongs stays the same //thetaLongs resets to p final int arrLongs = 1 << getLgArrLongs(); - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final int preBytes = preambleLongs << 3; - wmem_.clear(preBytes, arrLongs * 8L); //clear data array + wseg_.asSlice(preBytes, arrLongs * 8L).fill((byte)0); //flags: bigEndian = readOnly = compact = ordered = false; empty = true. - wmem_.putByte(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); - wmem_.putInt(RETAINED_ENTRIES_INT, 0); - final float p = wmem_.getFloat(P_FLOAT); + wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); + wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, 0); + final float p = wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); final long thetaLong = (long) (p * LONG_MAX_VALUE_AS_DOUBLE); - wmem_.putLong(THETA_LONG, thetaLong); + wseg_.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); } //restricted methods @@ -270,7 +264,7 @@ public void reset() { UpdateReturnState hashUpdate(final long hash) { HashOperations.checkHashCorruption(hash); - wmem_.putByte(FLAGS_BYTE, (byte) (wmem_.getByte(FLAGS_BYTE) & ~EMPTY_FLAG_MASK)); + wseg_.set(JAVA_BYTE, FLAGS_BYTE, (byte) (wseg_.get(JAVA_BYTE, FLAGS_BYTE) & ~EMPTY_FLAG_MASK)); final long thetaLong = getThetaLong(); final int lgNomLongs = getLgNomLongs(); //The over-theta test @@ -279,64 +273,54 @@ UpdateReturnState hashUpdate(final long hash) { } final int lgArrLongs = getLgArrLongs(); - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; //The duplicate test final int index = - HashOperations.hashSearchOrInsertMemory(wmem_, lgArrLongs, hash, preambleLongs << 3); + HashOperations.hashSearchOrInsertMemorySegment(wseg_, lgArrLongs, hash, preambleLongs << 3); if (index >= 0) { return RejectedDuplicate; //Duplicate, not inserted } //insertion occurred, increment curCount final int curCount = getRetainedEntries(true) + 1; - wmem_.putInt(RETAINED_ENTRIES_INT, curCount); //update curCount + wseg_.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); //update curCount if (isOutOfSpace(curCount)) { //we need to do something, we are out of space if (lgArrLongs > lgNomLongs) { //at full size, rebuild //Assumes no dirty values, changes thetaLong, curCount_ - assert (lgArrLongs == (lgNomLongs + 1)) - : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs; + assert (lgArrLongs == (lgNomLongs + 1)) : "lgArr: " + lgArrLongs + ", lgNom: " + lgNomLongs; //rebuild, refresh curCount based on # values in the hashtable. - quickSelectAndRebuild(wmem_, preambleLongs, lgNomLongs); + quickSelectAndRebuild(wseg_, preambleLongs, lgNomLongs); return InsertedCountIncrementedRebuilt; } //end of rebuild, exit - else { //Not at full size, resize. Should not get here if lgRF = 0 and memCap is too small. + else { //Not at full size, resize. Should not get here if lgRF = 0 and segCap is too small. final int lgRF = getLgRF(); - final int actLgRF = actLgResizeFactor(wmem_.getCapacity(), lgArrLongs, preambleLongs, lgRF); + final int actLgRF = actLgResizeFactor(wseg_.byteSize(), lgArrLongs, preambleLongs, lgRF); int tgtLgArrLongs = Math.min(lgArrLongs + actLgRF, lgNomLongs + 1); - if (actLgRF > 0) { //Expand in current Memory + if (actLgRF > 0) { //Expand in current MemorySegment //lgArrLongs will change; thetaLong, curCount will not - resize(wmem_, preambleLongs, lgArrLongs, tgtLgArrLongs); + resize(wseg_, preambleLongs, lgArrLongs, tgtLgArrLongs); hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; - } //end of Expand in current memory, exit. + } //end of Expand in current MemorySegment, exit. else { - //Request more memory, then resize. lgArrLongs will change; thetaLong, curCount will not + //Request more space, then resize. lgArrLongs will change; thetaLong, curCount will not final int preBytes = preambleLongs << 3; tgtLgArrLongs = Math.min(lgArrLongs + lgRF, lgNomLongs + 1); final int tgtArrBytes = 8 << tgtLgArrLongs; final int reqBytes = tgtArrBytes + preBytes; + final MemorySegment newDstSeg = MemorySegment.ofArray(new byte[reqBytes]); - memReqSvr_ = (memReqSvr_ == null) ? wmem_.getMemoryRequestServer() : memReqSvr_; - - if (memReqSvr_ == null) { //in case the MRS is not enabled or null. - throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); - } - - final WritableMemory newDstMem = memReqSvr_.request(wmem_, reqBytes); - - moveAndResize(wmem_, preambleLongs, lgArrLongs, newDstMem, tgtLgArrLongs, thetaLong); - - memReqSvr_.requestClose(wmem_); + moveAndResize(wseg_, preambleLongs, lgArrLongs, newDstSeg, tgtLgArrLongs, thetaLong); + wseg_ = newDstSeg; - wmem_ = newDstMem; hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, tgtLgArrLongs); return InsertedCountIncrementedResized; - } //end of Request more memory to resize + } //end of Request more space to resize } //end of resize } //end of isOutOfSpace return InsertedCountIncremented; diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index fb2aed2a5..2c25ee618 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -19,6 +19,10 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.CompactOperations.checkIllegalCurCountAndEmpty; import static org.apache.datasketches.theta.CompactOperations.computeCompactPreLongs; import static org.apache.datasketches.theta.CompactOperations.correctThetaOnCompact; @@ -37,20 +41,21 @@ import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** * The default Theta Sketch using the QuickSelect algorithm. * This is the read-only implementation with non-functional methods, which affect the state. * - *This implementation uses data in a given Memory that is owned and managed by the caller. - * This Memory can be off-heap, which if managed properly will greatly reduce the need for + *
This implementation uses data in a given MemorySegment that is owned and managed by the caller. + * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
* * @author Lee Rhodes @@ -60,50 +65,48 @@ class DirectQuickSelectSketchR extends UpdateSketch { static final double DQS_RESIZE_THRESHOLD = 15.0 / 16.0; //tuned for space final long seed_; //provided, kept only on heap, never serialized. int hashTableThreshold_; //computed, kept only on heap, never serialized. - WritableMemory wmem_; //A WritableMemory for child class, but no write methods here + MemorySegment wseg_; //A MemorySegment for child class, but no write methods here //only called by DirectQuickSelectSketch and below - DirectQuickSelectSketchR(final long seed, final WritableMemory wmem) { + DirectQuickSelectSketchR(final long seed, final MemorySegment wseg) { seed_ = seed; - wmem_ = wmem; + wseg_ = wseg; } /** - * Wrap a sketch around the given source Memory containing sketch data that originated from + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from * this sketch. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * @param srcSeg the source MemorySegment. + * The given MemorySegment object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketchR readOnlyWrap(final Memory srcMem, final long seed) { - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static DirectQuickSelectSketchR readOnlyWrap(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - UpdateSketch.checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); + UpdateSketch.checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); final DirectQuickSelectSketchR dqssr = - new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); + new DirectQuickSelectSketchR(seed, srcSeg); dqssr.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqssr; } /** - * Fast-wrap a sketch around the given source Memory containing sketch data that originated from - * this sketch. This does NO validity checking of the given Memory. - * @param srcMem See Memory - * The given Memory object must be in hash table form and not read only. + * Fast-wrap a sketch around the given source MemorySegment containing sketch data that originated from + * this sketch. This does NO validity checking of the given MemorySegment. + * @param srcSeg The given MemorySegment object must be in hash table form and not read only. * @param seed See Update Hash Seed * @return instance of this sketch */ - static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long seed) { - final int lgNomLongs = srcMem.getByte(LG_NOM_LONGS_BYTE) & 0XFF; - final int lgArrLongs = srcMem.getByte(LG_ARR_LONGS_BYTE) & 0XFF; + static DirectQuickSelectSketchR fastReadOnlyWrap(final MemorySegment srcSeg, final long seed) { + final int lgNomLongs = srcSeg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; + final int lgArrLongs = srcSeg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; - final DirectQuickSelectSketchR dqss = - new DirectQuickSelectSketchR(seed, (WritableMemory) srcMem); + final DirectQuickSelectSketchR dqss = new DirectQuickSelectSketchR(seed, srcSeg); dqss.hashTableThreshold_ = getOffHeapHashTableThreshold(lgNomLongs, lgArrLongs); return dqss; } @@ -113,70 +116,70 @@ static DirectQuickSelectSketchR fastReadOnlyWrap(final Memory srcMem, final long @Override public int getCurrentBytes() { //not compact - final byte lgArrLongs = wmem_.getByte(LG_ARR_LONGS_BYTE); - final int preLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final byte lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE); + final int preLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final int lengthBytes = (preLongs + (1 << lgArrLongs)) << 3; return lengthBytes; } @Override public double getEstimate() { - final int curCount = extractCurCount(wmem_); - final long thetaLong = extractThetaLong(wmem_); + final int curCount = extractCurCount(wseg_); + final long thetaLong = extractThetaLong(wseg_); return Sketch.estimate(thetaLong, curCount); } @Override public Family getFamily() { - final int familyID = wmem_.getByte(FAMILY_BYTE) & 0XFF; + final int familyID = wseg_.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; return Family.idToFamily(familyID); } @Override public int getRetainedEntries(final boolean valid) { //always valid - return wmem_.getInt(RETAINED_ENTRIES_INT); + return wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); } @Override public long getThetaLong() { - return isEmpty() ? Long.MAX_VALUE : wmem_.getLong(THETA_LONG); + return isEmpty() ? Long.MAX_VALUE : wseg_.get(JAVA_LONG_UNALIGNED, THETA_LONG); } @Override - public boolean hasMemory() { - return wmem_ != null; + public boolean hasMemorySegment() { + return wseg_ != null && wseg_.scope().isAlive(); } @Override public boolean isDirect() { - return hasMemory() ? wmem_.isDirect() : false; + return hasMemorySegment() && wseg_.isNative(); } @Override public boolean isEmpty() { - return PreambleUtil.isEmptyFlag(wmem_); + return PreambleUtil.isEmptyFlag(wseg_); } @Override - public boolean isSameResource(final Memory that) { - return hasMemory() ? wmem_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(wseg_, that); } @Override public HashIterator iterator() { - return new MemoryHashIterator(wmem_, 1 << getLgArrLongs(), getThetaLong()); + return new MemorySegmentHashIterator(wseg_, 1 << getLgArrLongs(), getThetaLong()); } @Override - public byte[] toByteArray() { //MY_FAMILY is stored in wmem_ - checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wmem_)); + public byte[] toByteArray() { //MY_FAMILY is stored in wseg_ + checkIllegalCurCountAndEmpty(isEmpty(), extractCurCount(wseg_)); final int lengthBytes = getCurrentBytes(); final byte[] byteArray = new byte[lengthBytes]; - final WritableMemory mem = WritableMemory.writableWrap(byteArray); - wmem_.copyTo(0, mem, 0, lengthBytes); + final MemorySegment seg = MemorySegment.ofArray(byteArray); + MemorySegment.copy(wseg_, 0, seg, 0, lengthBytes); final long thetaLong = - correctThetaOnCompact(isEmpty(), extractCurCount(wmem_), extractThetaLong(wmem_)); - insertThetaLong(wmem_, thetaLong); + correctThetaOnCompact(isEmpty(), extractCurCount(wseg_), extractThetaLong(wseg_)); + insertThetaLong(wseg_, thetaLong); return byteArray; } @@ -184,12 +187,12 @@ public byte[] toByteArray() { //MY_FAMILY is stored in wmem_ @Override public final int getLgNomLongs() { - return PreambleUtil.extractLgNomLongs(wmem_); + return PreambleUtil.extractLgNomLongs(wseg_); } @Override float getP() { - return wmem_.getFloat(P_FLOAT); + return wseg_.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); } @Override @@ -216,11 +219,10 @@ public void reset() { @Override long[] getCache() { - final long lgArrLongs = wmem_.getByte(LG_ARR_LONGS_BYTE) & 0XFF; - final int preambleLongs = wmem_.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + final long lgArrLongs = wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; + final int preambleLongs = wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; final long[] cacheArr = new long[1 << lgArrLongs]; - final WritableMemory mem = WritableMemory.writableWrap(cacheArr); - wmem_.copyTo(preambleLongs << 3, mem, 0, 8 << lgArrLongs); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, preambleLongs << 3, cacheArr, 0, 1 << lgArrLongs); return cacheArr; } @@ -231,17 +233,17 @@ int getCompactPreambleLongs() { @Override int getCurrentPreambleLongs() { - return PreambleUtil.extractPreLongs(wmem_); + return PreambleUtil.extractPreLongs(wseg_); } @Override - WritableMemory getMemory() { - return wmem_; + MemorySegment getMemorySegment() { + return wseg_; } @Override short getSeedHash() { - return (short) PreambleUtil.extractSeedHash(wmem_); + return (short) PreambleUtil.extractSeedHash(wseg_); } @Override @@ -256,11 +258,11 @@ boolean isOutOfSpace(final int numEntries) { @Override int getLgArrLongs() { - return wmem_.getByte(LG_ARR_LONGS_BYTE) & 0XFF; + return wseg_.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; } int getLgRF() { //only Direct needs this - return (wmem_.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; + return (wseg_.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java index 8f6e4972a..45a17d40d 100644 --- a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java @@ -19,9 +19,12 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Singleton empty CompactSketch. @@ -46,14 +49,14 @@ static synchronized EmptyCompactSketch getInstance() { } //This should be a heapify - static synchronized EmptyCompactSketch getHeapInstance(final Memory srcMem) { - final long pre0 = srcMem.getLong(0); + static synchronized EmptyCompactSketch getHeapInstance(final MemorySegment srcSeg) { + final long pre0 = srcSeg.get(JAVA_LONG_UNALIGNED, 0); if (testCandidatePre0(pre0)) { return EMPTY_COMPACT_SKETCH; } final long maskedPre0 = pre0 & EMPTY_SKETCH_MASK; - throw new SketchesArgumentException("Input Memory does not match required Preamble. " - + "Memory Pre0: " + Long.toHexString(maskedPre0) + throw new SketchesArgumentException("Input MemorySegment does not match required Preamble. " + + "MemorySegment Pre0: " + Long.toHexString(maskedPre0) + ", required Pre0: " + Long.toHexString(EMPTY_SKETCH_TEST)); } @@ -61,10 +64,11 @@ static synchronized EmptyCompactSketch getHeapInstance(final Memory srcMem) { // This returns with ordered flag = true independent of dstOrdered. // This is required for fast detection. // The hashSeed is ignored and set == 0. - public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) { - if (wmem == null) { return EmptyCompactSketch.getInstance(); } - wmem.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8); - return new DirectCompactSketch(wmem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { + if (dstWSeg == null) { return EmptyCompactSketch.getInstance(); } + //dstWSeg.putByteArray(0, EMPTY_COMPACT_SKETCH_ARR, 0, 8); + MemorySegment.copy(EMPTY_COMPACT_SKETCH_ARR, 0, dstWSeg, JAVA_BYTE, 0, 8); + return new DirectCompactSketch(dstWSeg); } //static @@ -131,11 +135,6 @@ int getCurrentPreambleLongs() { return 1; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return 0; diff --git a/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java b/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java index 16172d247..723a8b651 100644 --- a/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java +++ b/src/main/java/org/apache/datasketches/theta/ForwardCompatibility.java @@ -19,13 +19,15 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; /** * Used to convert older serialization versions 1 and 2 to version 3. The Serialization @@ -36,13 +38,15 @@ */ final class ForwardCompatibility { + private ForwardCompatibility() { } + /** * Convert a serialization version (SerVer) 1 sketch (~Feb 2014) to a SerVer 3 sketch. * Note: SerVer 1 sketches always have (metadata) preamble-longs of 3 and are always stored * in a compact ordered form, but with 3 different sketch types. All SerVer 1 sketches will * be converted to a SerVer 3 sketches. There is no concept of p-sampling, no empty bit. * - * @param srcMem the image of a SerVer 1 sketch + * @param srcSeg the image of a SerVer 1 sketch * * @param seedHash See Seed Hash. * The seedHash that matches the seedHash of the original seed used to construct the sketch. @@ -50,35 +54,35 @@ final class ForwardCompatibility { * MUST be derived from the actual seed that was used when the SerVer 1 sketches were built. * @return a SerVer 3 {@link CompactSketch}. */ - static final CompactSketch heapify1to3(final Memory srcMem, final short seedHash) { - final int memCap = (int) srcMem.getCapacity(); - final int preLongs = extractPreLongs(srcMem); //always 3 for serVer 1 + static final CompactSketch heapify1to3(final MemorySegment srcSeg, final short seedHash) { + final int segCap = (int) srcSeg.byteSize(); + final int preLongs = extractPreLongs(srcSeg); //always 3 for serVer 1 if (preLongs != 3) { throw new SketchesArgumentException("PreLongs must be 3 for SerVer 1: " + preLongs); } - final int familyId = extractFamilyID(srcMem); //1,2,3 + final int familyId = extractFamilyID(srcSeg); //1,2,3 if ((familyId < 1) || (familyId > 3)) { throw new SketchesArgumentException("Family ID (Sketch Type) must be 1 to 3: " + familyId); } - final int curCount = extractCurCount(srcMem); - final long thetaLong = extractThetaLong(srcMem); + final int curCount = extractCurCount(srcSeg); + final long thetaLong = extractThetaLong(srcSeg); final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE); - if (empty || (memCap <= 24)) { //return empty + if (empty || (segCap <= 24)) { //return empty return EmptyCompactSketch.getInstance(); } final int reqCap = (curCount + preLongs) << 3; - validateInputSize(reqCap, memCap); + validateInputSize(reqCap, segCap); if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) { - final long hash = srcMem.getLong(preLongs << 3); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); return new SingleItemSketch(hash, seedHash); } //theta < 1.0 and/or curCount > 1 final long[] compactOrderedCache = new long[curCount]; - srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); } @@ -86,15 +90,15 @@ static final CompactSketch heapify1to3(final Memory srcMem, final short seedHash * Convert a serialization version (SerVer) 2 sketch to a SerVer 3 HeapCompactOrderedSketch. * Note: SerVer 2 sketches can have metadata-longs of 1,2 or 3 and are always stored * in a compact ordered form (not as a hash table), but with 4 different sketch types. - * @param srcMem the image of a SerVer 2 sketch + * @param srcSeg the image of a SerVer 2 sketch * @param seedHash See Seed Hash. * The seed used for building the sketch image in srcMem * @return a SerVer 3 HeapCompactOrderedSketch */ - static final CompactSketch heapify2to3(final Memory srcMem, final short seedHash) { - final int memCap = (int) srcMem.getCapacity(); - final int preLongs = extractPreLongs(srcMem); //1,2 or 3 - final int familyId = extractFamilyID(srcMem); //1,2,3,4 + static final CompactSketch heapify2to3(final MemorySegment srcSeg, final short seedHash) { + final int segCap = (int) srcSeg.byteSize(); + final int preLongs = extractPreLongs(srcSeg); //1,2 or 3 + final int familyId = extractFamilyID(srcSeg); //1,2,3,4 if ((familyId < 1) || (familyId > 4)) { throw new SketchesArgumentException("Family (Sketch Type) must be 1 to 4: " + familyId); } @@ -103,58 +107,59 @@ static final CompactSketch heapify2to3(final Memory srcMem, final short seedHash long thetaLong = Long.MAX_VALUE; if (preLongs == 1) { reqBytesIn = 8; - validateInputSize(reqBytesIn, memCap); + validateInputSize(reqBytesIn, segCap); return EmptyCompactSketch.getInstance(); } if (preLongs == 2) { //includes pre0 + count, no theta (== 1.0) reqBytesIn = preLongs << 3; - validateInputSize(reqBytesIn, memCap); - curCount = extractCurCount(srcMem); + validateInputSize(reqBytesIn, segCap); + curCount = extractCurCount(srcSeg); if (curCount == 0) { return EmptyCompactSketch.getInstance(); } if (curCount == 1) { reqBytesIn = (preLongs + 1) << 3; - validateInputSize(reqBytesIn, memCap); - final long hash = srcMem.getLong(preLongs << 3); + validateInputSize(reqBytesIn, segCap); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); return new SingleItemSketch(hash, seedHash); } //curCount > 1 reqBytesIn = (curCount + preLongs) << 3; - validateInputSize(reqBytesIn, memCap); + validateInputSize(reqBytesIn, segCap); final long[] compactOrderedCache = new long[curCount]; - srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong,true); } if (preLongs == 3) { //pre0 + count + theta reqBytesIn = (preLongs) << 3; // - validateInputSize(reqBytesIn, memCap); - curCount = extractCurCount(srcMem); - thetaLong = extractThetaLong(srcMem); + validateInputSize(reqBytesIn, segCap); + curCount = extractCurCount(srcSeg); + thetaLong = extractThetaLong(srcSeg); if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) { return EmptyCompactSketch.getInstance(); } if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) { reqBytesIn = (preLongs + 1) << 3; - validateInputSize(reqBytesIn, memCap); - final long hash = srcMem.getLong(preLongs << 3); + validateInputSize(reqBytesIn, segCap); + final long hash = srcSeg.get(JAVA_LONG_UNALIGNED, preLongs << 3); return new SingleItemSketch(hash, seedHash); } //curCount > 1 and/or theta < 1.0 reqBytesIn = (curCount + preLongs) << 3; - validateInputSize(reqBytesIn, memCap); + validateInputSize(reqBytesIn, segCap); final long[] compactOrderedCache = new long[curCount]; - srcMem.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + //srcSeg.getLongArray(preLongs << 3, compactOrderedCache, 0, curCount); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preLongs << 3, compactOrderedCache, 0, curCount); return new HeapCompactSketch(compactOrderedCache, false, seedHash, curCount, thetaLong, true); } throw new SketchesArgumentException("PreLongs must be 1,2, or 3: " + preLongs); } - private static final void validateInputSize(final int reqBytesIn, final int memCap) { - if (reqBytesIn > memCap) { + private static final void validateInputSize(final int reqBytesIn, final int segCap) { + if (reqBytesIn > segCap) { throw new SketchesArgumentException( - "Input Memory or byte[] size is too small: Required Bytes: " + reqBytesIn - + ", bytesIn: " + memCap); + "Input MemorySegment or byte[] size is too small: Required Bytes: " + reqBytesIn + + ", bytesIn: " + segCap); } } diff --git a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java index 3ee7c4880..b4b7848c5 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java @@ -22,6 +22,7 @@ import static java.lang.Math.max; import static java.lang.Math.min; import static java.lang.Math.sqrt; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.common.Util.checkBounds; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; @@ -38,13 +39,12 @@ import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; import static org.apache.datasketches.thetacommon.HashOperations.STRIDE_MASK; +import java.lang.foreign.MemorySegment; import java.util.Objects; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -113,44 +113,43 @@ static HeapAlphaSketch newHeapInstance(final int lgNomLongs, final long seed, fi } /** - * Heapify a sketch from a Memory object containing sketch data. - * @param srcMem The source Memory object. - * See Memory + * Heapify a sketch from a MemorySegment object containing sketch data. + * @param srcSeg The source MemorySegment object. * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return instance of this sketch */ - static HeapAlphaSketch heapifyInstance(final Memory srcMem, final long expectedSeed) { - Objects.requireNonNull(srcMem, "Source Memory must not be null"); - checkBounds(0, 24, srcMem.getCapacity()); - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 + static HeapAlphaSketch heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { + Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); + checkBounds(0, 24, srcSeg.byteSize()); + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 - checkAlphaFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); + checkAlphaFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, expectedSeed, preambleLongs, lgNomLongs, lgArrLongs); - final float p = extractP(srcMem); //bytes 12-15 - final int memlgRF = extractLgResizeFactor(srcMem); //byte 0 - ResizeFactor memRF = ResizeFactor.getRF(memlgRF); + final float p = extractP(srcSeg); //bytes 12-15 + final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor segRF = ResizeFactor.getRF(seglgRF); final double nomLongs = (1L << lgNomLongs); final double alpha = nomLongs / (nomLongs + 1.0); final long split1 = (long) (((p * (alpha + 1.0)) / 2.0) * LONG_MAX_VALUE_AS_DOUBLE); - if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) { - memRF = ResizeFactor.X2; //X2 always works. + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + segRF = ResizeFactor.X2; //X2 always works. } - final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, memRF, alpha, split1); + final HeapAlphaSketch has = new HeapAlphaSketch(lgNomLongs, expectedSeed, p, segRF, alpha, split1); has.lgArrLongs_ = lgArrLongs; has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs); - has.curCount_ = extractCurCount(srcMem); - has.thetaLong_ = extractThetaLong(srcMem); - has.empty_ = PreambleUtil.isEmptyFlag(srcMem); + has.curCount_ = extractCurCount(srcSeg); + has.thetaLong_ = extractThetaLong(srcSeg); + has.empty_ = PreambleUtil.isEmptyFlag(srcSeg); has.cache_ = new long[1 << lgArrLongs]; - srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table return has; } @@ -294,11 +293,6 @@ int getCurrentPreambleLongs() { return Family.ALPHA.getMinPreLongs(); } - @Override - WritableMemory getMemory() { - return null; - } - @Override long[] getCache() { return cache_; @@ -576,9 +570,9 @@ private static final int setHashTableThreshold(final int lgNomLongs, final int l return (int) Math.floor(fraction * (1 << lgArrLongs)); } - static void checkAlphaFamily(final Memory mem, final int preambleLongs, final int lgNomLongs) { + static void checkAlphaFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { //Check Family - final int familyID = extractFamilyID(mem); //byte 2 + final int familyID = extractFamilyID(seg); //byte 2 final Family family = Family.idToFamily(familyID); if (family.equals(Family.ALPHA)) { if (preambleLongs != Family.ALPHA.getMinPreLongs()) { diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java b/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java index d5a6289c7..e8e5d8305 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java @@ -19,7 +19,7 @@ package org.apache.datasketches.theta; -class HeapCompactHashIterator implements HashIterator { +final class HeapCompactHashIterator implements HashIterator { private long[] cache; private int index; diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java index 2572ce5d5..6b5708901 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java @@ -24,22 +24,21 @@ import static org.apache.datasketches.theta.CompactOperations.computeCompactPreLongs; import static org.apache.datasketches.theta.CompactOperations.correctThetaOnCompact; import static org.apache.datasketches.theta.CompactOperations.isSingleItem; -import static org.apache.datasketches.theta.CompactOperations.loadCompactMemory; +import static org.apache.datasketches.theta.CompactOperations.loadCompactMemorySegment; import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import java.lang.foreign.MemorySegment; /** * Parent class of the Heap Compact Sketches. * * @author Lee Rhodes */ -class HeapCompactSketch extends CompactSketch { +final class HeapCompactSketch extends CompactSketch { private final long thetaLong_; //computed private final int curCount_; private final int preLongs_; //computed @@ -76,10 +75,10 @@ class HeapCompactSketch extends CompactSketch { //Sketch @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - if (dstMem == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + if (dstSeg == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; } return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), - true, ordered_, dstOrdered, dstMem, getCache().clone()); + true, ordered_, dstOrdered, dstSeg, getCache().clone()); } @Override @@ -129,30 +128,25 @@ int getCompactPreambleLongs() { return preLongs_; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return seedHash_; } - //use of Memory is convenient. The byteArray and Memory are loaded simultaneously. + //use of a MemorySegment is convenient. The byteArray and MemorySegment are loaded simultaneously. @Override public byte[] toByteArray() { final int bytes = getCurrentBytes(); final byte[] byteArray = new byte[bytes]; - final WritableMemory dstMem = WritableMemory.writableWrap(byteArray); + final MemorySegment dstSeg = MemorySegment.ofArray(byteArray); final int emptyBit = isEmpty() ? EMPTY_FLAG_MASK : 0; final int orderedBit = ordered_ ? ORDERED_FLAG_MASK : 0; final int singleItemBit = singleItem_ ? SINGLEITEM_FLAG_MASK : 0; final byte flags = (byte) (emptyBit | READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | orderedBit | singleItemBit); final int preLongs = getCompactPreambleLongs(); - loadCompactMemory(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), - dstMem, flags, preLongs); + loadCompactMemorySegment(getCache(), getSeedHash(), getRetainedEntries(true), getThetaLong(), + dstSeg, flags, preLongs); return byteArray; } diff --git a/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java b/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java index 9562da7d0..d689b912e 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java @@ -22,7 +22,7 @@ /** * @author Lee Rhodes */ -class HeapHashIterator implements HashIterator { +final class HeapHashIterator implements HashIterator { private long[] cache; private long thetaLong; private int index; diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java index b9d4dc9e1..46e469004 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java @@ -21,6 +21,8 @@ import static java.lang.Math.max; import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; @@ -35,12 +37,11 @@ import static org.apache.datasketches.theta.UpdateReturnState.InsertedCountIncrementedResized; import static org.apache.datasketches.theta.UpdateReturnState.RejectedDuplicate; import static org.apache.datasketches.theta.UpdateReturnState.RejectedOverTheta; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; + +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -100,40 +101,39 @@ private HeapQuickSelectSketch(final int lgNomLongs, final long seed, final float } /** - * Heapify a sketch from a Memory UpdateSketch or Union object + * Heapify a sketch from a MemorySegment UpdateSketch or Union object * containing sketch data. - * @param srcMem The source Memory object. - * See Memory + * @param srcSeg The source MemorySegment object. * @param seed See seed * @return instance of this sketch */ - static HeapQuickSelectSketch heapifyInstance(final Memory srcMem, final long seed) { - final int preambleLongs = extractPreLongs(srcMem); //byte 0 - final int lgNomLongs = extractLgNomLongs(srcMem); //byte 3 - final int lgArrLongs = extractLgArrLongs(srcMem); //byte 4 - - checkUnionQuickSelectFamily(srcMem, preambleLongs, lgNomLongs); - checkMemIntegrity(srcMem, seed, preambleLongs, lgNomLongs, lgArrLongs); - - final float p = extractP(srcMem); //bytes 12-15 - final int memlgRF = extractLgResizeFactor(srcMem); //byte 0 - ResizeFactor memRF = ResizeFactor.getRF(memlgRF); - final int familyID = extractFamilyID(srcMem); + static HeapQuickSelectSketch heapifyInstance(final MemorySegment srcSeg, final long seed) { + final int preambleLongs = extractPreLongs(srcSeg); //byte 0 + final int lgNomLongs = extractLgNomLongs(srcSeg); //byte 3 + final int lgArrLongs = extractLgArrLongs(srcSeg); //byte 4 + + checkUnionQuickSelectFamily(srcSeg, preambleLongs, lgNomLongs); + checkSegIntegrity(srcSeg, seed, preambleLongs, lgNomLongs, lgArrLongs); + + final float p = extractP(srcSeg); //bytes 12-15 + final int seglgRF = extractLgResizeFactor(srcSeg); //byte 0 + ResizeFactor segRF = ResizeFactor.getRF(seglgRF); + final int familyID = extractFamilyID(srcSeg); final Family family = Family.idToFamily(familyID); - if (isResizeFactorIncorrect(srcMem, lgNomLongs, lgArrLongs)) { - memRF = ResizeFactor.X2; //X2 always works. + if (isResizeFactorIncorrect(srcSeg, lgNomLongs, lgArrLongs)) { + segRF = ResizeFactor.X2; //X2 always works. } - final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, memRF, + final HeapQuickSelectSketch hqss = new HeapQuickSelectSketch(lgNomLongs, seed, p, segRF, preambleLongs, family); hqss.lgArrLongs_ = lgArrLongs; hqss.hashTableThreshold_ = getHashTableThreshold(lgNomLongs, lgArrLongs); - hqss.curCount_ = extractCurCount(srcMem); - hqss.thetaLong_ = extractThetaLong(srcMem); - hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem); + hqss.curCount_ = extractCurCount(srcSeg); + hqss.thetaLong_ = extractThetaLong(srcSeg); + hqss.empty_ = PreambleUtil.isEmptyFlag(srcSeg); hqss.cache_ = new long[1 << lgArrLongs]; - srcMem.getLongArray(preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table return hqss; } @@ -230,11 +230,6 @@ int getLgArrLongs() { return lgArrLongs_; } - @Override - WritableMemory getMemory() { - return null; - } - @Override UpdateReturnState hashUpdate(final long hash) { HashOperations.checkHashCorruption(hash); diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java index 49734a9e8..87e1892b8 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static org.apache.datasketches.theta.CompactOperations.checkIllegalCurCountAndEmpty; import static org.apache.datasketches.theta.CompactOperations.correctThetaOnCompact; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; @@ -35,8 +36,10 @@ import static org.apache.datasketches.theta.PreambleUtil.insertSerVer; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -92,7 +95,7 @@ long getSeed() { @Override short getSeedHash() { - return ThetaUtil.computeSeedHash(getSeed()); + return Util.computeSeedHash(getSeed()); } //Used by HeapAlphaSketch and HeapQuickSelectSketch / Theta UpdateSketch @@ -102,33 +105,35 @@ byte[] toByteArray(final int preLongs, final byte familyID) { final int preBytes = (preLongs << 3) & 0X3F; //24 bytes final int dataBytes = getCurrentDataLongs() << 3; final byte[] byteArrOut = new byte[preBytes + dataBytes]; - final WritableMemory memOut = WritableMemory.writableWrap(byteArrOut); + + final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); //preamble first 8 bytes. Note: only compact can be reduced to 8 bytes. final int lgRf = getResizeFactor().lg() & 0x3; - insertPreLongs(memOut, preLongs); //byte 0 low 6 bits - insertLgResizeFactor(memOut, lgRf); //byte 0 high 2 bits - insertSerVer(memOut, SER_VER); //byte 1 - insertFamilyID(memOut, familyID); //byte 2 - insertLgNomLongs(memOut, getLgNomLongs()); //byte 3 - insertLgArrLongs(memOut, getLgArrLongs()); //byte 4 - insertSeedHash(memOut, getSeedHash()); //bytes 6 & 7 - - insertCurCount(memOut, this.getRetainedEntries(true)); - insertP(memOut, getP()); + insertPreLongs(segOut, preLongs); //byte 0 low 6 bits + insertLgResizeFactor(segOut, lgRf); //byte 0 high 2 bits + insertSerVer(segOut, SER_VER); //byte 1 + insertFamilyID(segOut, familyID); //byte 2 + insertLgNomLongs(segOut, getLgNomLongs()); //byte 3 + insertLgArrLongs(segOut, getLgArrLongs()); //byte 4 + insertSeedHash(segOut, getSeedHash()); //bytes 6 & 7 + + insertCurCount(segOut, this.getRetainedEntries(true)); + insertP(segOut, getP()); final long thetaLong = correctThetaOnCompact(isEmpty(), getRetainedEntries(true), getThetaLong()); - insertThetaLong(memOut, thetaLong); + insertThetaLong(segOut, thetaLong); //Flags: BigEnd=0, ReadOnly=0, Empty=X, compact=0, ordered=0 final byte flags = isEmpty() ? (byte) EMPTY_FLAG_MASK : 0; - insertFlags(memOut, flags); + insertFlags(segOut, flags); //Data final int arrLongs = 1 << getLgArrLongs(); final long[] cache = getCache(); - memOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut + //segOut.putLongArray(preBytes, cache, 0, arrLongs); //load byteArrOut + MemorySegment.copy(cache, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, arrLongs); return byteArrOut; } diff --git a/src/main/java/org/apache/datasketches/theta/Intersection.java b/src/main/java/org/apache/datasketches/theta/Intersection.java index 111a9ce55..a31dc3ef9 100644 --- a/src/main/java/org/apache/datasketches/theta/Intersection.java +++ b/src/main/java/org/apache/datasketches/theta/Intersection.java @@ -19,21 +19,23 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.common.Util.floorPowerOf2; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SER_VER; +import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs; import static org.apache.datasketches.theta.PreambleUtil.extractSerVer; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -61,7 +63,7 @@ public CompactSketch getResult() { } /** - * Gets the result of this operation as a CompactSketch in the given dstMem. + * Gets the result of this operation as a CompactSketch in the given dstSeg. * This does not disturb the underlying data structure of this intersection. * The {@link #intersect(Sketch)} method must have been called at least once, otherwise an * exception will be thrown. This is because a virgin Intersection object represents the @@ -77,13 +79,12 @@ public CompactSketch getResult() { * @param dstOrdered * See Destination Ordered * - * @param dstMem - * See Destination Memory. + * @param dstSeg the destination MemorySegment. * - * @return the result of this operation as a CompactSketch stored in the given dstMem, + * @return the result of this operation as a CompactSketch stored in the given dstSeg, * which can be either on or off-heap.. */ - public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem); + public abstract CompactSketch getResult(boolean dstOrdered, MemorySegment dstSeg); /** * Returns true if there is a valid intersection result available @@ -131,32 +132,57 @@ public CompactSketch intersect(final Sketch a, final Sketch b) { * @param b The second sketch argument * @param dstOrdered * See Destination Ordered. - * @param dstMem - * See Destination Memory. + * @param dstSeg the destination MemorySegment. * @return the result as a CompactSketch. */ public abstract CompactSketch intersect(Sketch a, Sketch b, boolean dstOrdered, - WritableMemory dstMem); + MemorySegment dstSeg); + + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. + * @return an Intersection that wraps a source MemorySegment that contains an Intersection image + */ + public static Intersection wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); + } + + /** + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. + * @param expectedSeed See seed + * @return an Intersection that wraps a source MemorySegment that contains an Intersection image + */ + public static Intersection wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); + if (serVer != 3) { + throw new SketchesArgumentException("SerVer must be 3: " + serVer); + } + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); + } // Restricted /** - * Returns the maximum lgArrLongs given the capacity of the Memory. - * @param dstMem the given Memory - * @return the maximum lgArrLongs given the capacity of the Memory + * Returns the maximum lgArrLongs given the capacity of the MemorySegment. + * @param dstSeg the given MemorySegment + * @return the maximum lgArrLongs given the capacity of the MemorySegment */ - protected static int getMaxLgArrLongs(final Memory dstMem) { + protected static int getMaxLgArrLongs(final MemorySegment dstSeg) { final int preBytes = CONST_PREAMBLE_LONGS << 3; - final long cap = dstMem.getCapacity(); + final long cap = dstSeg.byteSize(); return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3); } - protected static void checkMinSizeMemory(final Memory mem) { + protected static void checkMinSizeMemorySegment(final MemorySegment seg) { final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << ThetaUtil.MIN_LG_ARR_LONGS);//280 - final long cap = mem.getCapacity(); + final long cap = seg.byteSize(); if (cap < minBytes) { throw new SketchesArgumentException( - "Memory must be at least " + minBytes + " bytes. Actual capacity: " + cap); + "MemorySegment must be at least " + minBytes + " bytes. Actual capacity: " + cap); } } @@ -191,19 +217,19 @@ static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs return cacheOut; } - protected static void memChecks(final Memory srcMem) { + protected static void segChecks(final MemorySegment srcSeg) { //Get Preamble //Note: Intersection does not use lgNomLongs (or k), per se. //seedHash loaded and checked in private constructor - final int preLongs = extractPreLongs(srcMem); - final int serVer = extractSerVer(srcMem); - final int famID = extractFamilyID(srcMem); - final boolean empty = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; - final int curCount = extractCurCount(srcMem); + final int preLongs = extractPreLongs(srcSeg); + final int serVer = extractSerVer(srcSeg); + final int famID = extractFamilyID(srcSeg); + final boolean empty = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; + final int curCount = extractCurCount(srcSeg); //Checks if (preLongs != CONST_PREAMBLE_LONGS) { throw new SketchesArgumentException( - "Memory PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs); + "MemorySegment PreambleLongs must equal " + CONST_PREAMBLE_LONGS + ": " + preLongs); } if (serVer != SER_VER) { throw new SketchesArgumentException("Serialization Version must equal " + SER_VER); @@ -212,7 +238,7 @@ protected static void memChecks(final Memory srcMem) { if (empty) { if (curCount != 0) { throw new SketchesArgumentException( - "srcMem empty state inconsistent with curCount: " + empty + "," + curCount); + "srcSeg empty state inconsistent with curCount: " + empty + "," + curCount); } //empty = true AND curCount_ = 0: OK } //else empty = false, curCount could be anything diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index 772480fea..6bf922be0 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -20,6 +20,13 @@ package org.apache.datasketches.theta; import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; +import static org.apache.datasketches.common.Util.clearBits; +import static org.apache.datasketches.common.Util.setBits; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; @@ -47,36 +54,36 @@ import static org.apache.datasketches.theta.PreambleUtil.setEmpty; import static org.apache.datasketches.thetacommon.HashOperations.continueCondition; import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnly; -import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemory; +import static org.apache.datasketches.thetacommon.HashOperations.hashInsertOnlyMemorySegment; import static org.apache.datasketches.thetacommon.HashOperations.hashSearch; import static org.apache.datasketches.thetacommon.HashOperations.minLgHashTableSize; +import java.lang.foreign.MemorySegment; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** * Intersection operation for Theta Sketches. * - *This implementation uses data either on-heap or off-heap in a given Memory + *
This implementation uses data either on-heap or off-heap in a given MemorySegment * that is owned and managed by the caller. - * The off-heap Memory, which if managed properly, will greatly reduce the need for + * The off-heap MemorySegment, which if managed properly, will greatly reduce the need for * the JVM to perform garbage collection.
* * @author Lee Rhodes * @author Kevin Lang */ -class IntersectionImpl extends Intersection { +final class IntersectionImpl extends Intersection { protected final short seedHash_; protected final boolean readOnly_; //True if this sketch is to be treated as read only - protected final WritableMemory wmem_; - protected final int maxLgArrLongs_; //only used with WritableMemory, not serialized + protected final MemorySegment wseg_; + protected final int maxLgArrLongs_; //only used with MemorySegment, not serialized //Note: Intersection does not use lgNomLongs or k, per se. protected int lgArrLongs_; //current size of hash table @@ -87,30 +94,30 @@ class IntersectionImpl extends Intersection { /** * Constructor: Sets the class finals and computes, sets and checks the seedHash. - * @param wmem Can be either a Source(e.g. wrap) or Destination (new Direct) WritableMemory. + * @param wseg Can be either a Source(e.g. wrap) or Destination (new offHeap) MemorySegment. * @param seed Used to validate incoming sketch arguments. - * @param dstMemFlag The given memory is a Destination (new Direct) WritableMemory. - * @param readOnly True if memory is to be treated as read only. + * @param dstSegFlag The given MemorySegment is a Destination (new offHeap) MemorySegment. + * @param readOnly True if MemorySegment is to be treated as read only. */ - protected IntersectionImpl(final WritableMemory wmem, final long seed, final boolean dstMemFlag, + protected IntersectionImpl(final MemorySegment wseg, final long seed, final boolean dstSegFlag, final boolean readOnly) { readOnly_ = readOnly; - if (wmem != null) { - wmem_ = wmem; - if (dstMemFlag) { //DstMem: compute & store seedHash, no seedhash checking - checkMinSizeMemory(wmem); - maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wmem) : 0; //Only Off Heap - seedHash_ = ThetaUtil.computeSeedHash(seed); - wmem_.putShort(SEED_HASH_SHORT, seedHash_); - } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed - seedHash_ = wmem_.getShort(SEED_HASH_SHORT); - ThetaUtil.checkSeedHashes(seedHash_, ThetaUtil.computeSeedHash(seed)); //check for seed hash conflict + if (wseg != null) { + wseg_ = wseg; + if (dstSegFlag) { //DstSeg: compute & store seedHash, no seedHash checking + checkMinSizeMemorySegment(wseg); + maxLgArrLongs_ = !readOnly ? getMaxLgArrLongs(wseg) : 0; //Only Off Heap + seedHash_ = Util.computeSeedHash(seed); + wseg_.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + } else { //SrcSeg:gets and stores the seedHash, checks seg_seedHash against the seed + seedHash_ = wseg_.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT); + Util.checkSeedHashes(seedHash_, Util.computeSeedHash(seed)); //check for seed hash conflict maxLgArrLongs_ = 0; } } else { //compute & store seedHash - wmem_ = null; + wseg_ = null; maxLgArrLongs_ = 0; - seedHash_ = ThetaUtil.computeSeedHash(seed); + seedHash_ = Util.computeSeedHash(seed); } } @@ -122,105 +129,101 @@ protected IntersectionImpl(final WritableMemory wmem, final long seed, final boo * @return a new IntersectionImpl on the Java heap */ static IntersectionImpl initNewHeapInstance(final long seed) { - final boolean dstMemFlag = false; + final boolean dstSegFlag = false; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); impl.hardReset(); return impl; } /** - * Factory: Construct a new Intersection target direct to the given destination Memory. + * Factory: Construct a new Intersection target direct to the given destination MemorySegment. * Called by SetOperationBuilder, test. * * @param seed See Seed - * @param dstMem destination Memory - * See Memory + * @param dstSeg destination MemorySegment * @return a new IntersectionImpl that may be off-heap */ - static IntersectionImpl initNewDirectInstance(final long seed, final WritableMemory dstMem) { + static IntersectionImpl initNewDirectInstance(final long seed, final MemorySegment dstSeg) { //Load Preamble //Pre0 - dstMem.clear(0, CONST_PREAMBLE_LONGS << 3); - insertPreLongs(dstMem, CONST_PREAMBLE_LONGS); //RF not used = 0 - insertSerVer(dstMem, SER_VER); - insertFamilyID(dstMem, Family.INTERSECTION.getID()); + dstSeg.asSlice(0, CONST_PREAMBLE_LONGS << 3).fill((byte)0); + insertPreLongs(dstSeg, CONST_PREAMBLE_LONGS); //RF not used = 0 + insertSerVer(dstSeg, SER_VER); + insertFamilyID(dstSeg, Family.INTERSECTION.getID()); //lgNomLongs not used by Intersection //lgArrLongs set by hardReset //flags are already 0: bigEndian = readOnly = compact = ordered = empty = false; //seedHash loaded and checked in IntersectionImpl constructor //Pre1 //CurCount set by hardReset - insertP(dstMem, (float) 1.0); //not used by intersection + insertP(dstSeg, (float) 1.0); //not used by intersection //Pre2 //thetaLong set by hardReset //Initialize - final boolean dstMemFlag = true; + final boolean dstSegFlag = true; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(dstMem, seed, dstMemFlag, readOnly); + final IntersectionImpl impl = new IntersectionImpl(dstSeg, seed, dstSegFlag, readOnly); impl.hardReset(); return impl; } /** - * Factory: Heapify an intersection target from a Memory image containing data. - * @param srcMem The source Memory object. - * See Memory + * Factory: Heapify an intersection target from a MemorySegment image containing data. + * @param srcSeg The source MemorySegment object. * @param seed See seed * @return a IntersectionImpl instance on the Java heap */ - static IntersectionImpl heapifyInstance(final Memory srcMem, final long seed) { - final boolean dstMemFlag = false; + static IntersectionImpl heapifyInstance(final MemorySegment srcSeg, final long seed) { + final boolean dstSegFlag = false; final boolean readOnly = false; - final IntersectionImpl impl = new IntersectionImpl(null, seed, dstMemFlag, readOnly); - memChecks(srcMem); + final IntersectionImpl impl = new IntersectionImpl(null, seed, dstSegFlag, readOnly); + segChecks(srcSeg); //Initialize - impl.lgArrLongs_ = extractLgArrLongs(srcMem); - impl.curCount_ = extractCurCount(srcMem); - impl.thetaLong_ = extractThetaLong(srcMem); - impl.empty_ = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; + impl.lgArrLongs_ = extractLgArrLongs(srcSeg); + impl.curCount_ = extractCurCount(srcSeg); + impl.thetaLong_ = extractThetaLong(srcSeg); + impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; if (!impl.empty_) { if (impl.curCount_ > 0) { impl.hashTable_ = new long[1 << impl.lgArrLongs_]; - srcMem.getLongArray(CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, impl.hashTable_, 0, 1 << impl.lgArrLongs_); } } return impl; } /** - * Factory: Wrap an Intersection target around the given source WritableMemory containing - * intersection data. - * @param srcMem The source WritableMemory image. - * See Memory + * Factory: Wrap an Intersection target around the given source MemorySegment containing intersection data. + * If the given source MemorySegment is read-only, the returned object will also be read-only. + * @param srcSeg The source MemorySegment image. * @param seed See seed - * @param readOnly True if memory is to be treated as read only - * @return a IntersectionImpl that wraps a source WritableMemory that contains an Intersection image + * @param readOnly True if MemorySegment is to be treated as read only + * @return a IntersectionImpl that wraps a source MemorySegment that contains an Intersection image */ static IntersectionImpl wrapInstance( - final WritableMemory srcMem, + final MemorySegment srcSeg, final long seed, final boolean readOnly) { - final boolean dstMemFlag = false; - final IntersectionImpl impl = new IntersectionImpl(srcMem, seed, dstMemFlag, readOnly); - memChecks(srcMem); - impl.lgArrLongs_ = extractLgArrLongs(srcMem); - impl.curCount_ = extractCurCount(srcMem); - impl.thetaLong_ = extractThetaLong(srcMem); - impl.empty_ = (extractFlags(srcMem) & EMPTY_FLAG_MASK) > 0; + final boolean dstSegFlag = false; + final IntersectionImpl impl = new IntersectionImpl(srcSeg, seed, dstSegFlag, readOnly); + segChecks(srcSeg); + impl.lgArrLongs_ = extractLgArrLongs(srcSeg); + impl.curCount_ = extractCurCount(srcSeg); + impl.thetaLong_ = extractThetaLong(srcSeg); + impl.empty_ = (extractFlags(srcSeg) & EMPTY_FLAG_MASK) > 0; return impl; } @Override - public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, - final WritableMemory dstMem) { - if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); } + public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered, final MemorySegment dstSeg) { + if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } hardReset(); intersect(a); intersect(b); - final CompactSketch csk = getResult(dstOrdered, dstMem); + final CompactSketch csk = getResult(dstOrdered, dstSeg); hardReset(); return csk; } @@ -230,20 +233,20 @@ public void intersect(final Sketch sketchIn) { if (sketchIn == null) { throw new SketchesArgumentException("Intersection argument must not be null."); } - if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); } + if (wseg_ != null && readOnly_) { throw new SketchesReadOnlyException(); } if (empty_ || sketchIn.isEmpty()) { //empty rule //Because of the def of null above and the Empty Rule (which is OR), empty_ must be true. //Whatever the current internal state, we make our local empty. resetToEmpty(); return; } - ThetaUtil.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); + Util.checkSeedHashes(seedHash_, sketchIn.getSeedHash()); //Set minTheta thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule empty_ = false; - if (wmem_ != null) { - insertThetaLong(wmem_, thetaLong_); - clearEmpty(wmem_); //false + if (wseg_ != null) { + insertThetaLong(wseg_, thetaLong_); + clearEmpty(wseg_); //false } // The truth table for the following state machine. MinTheta is set above. @@ -261,8 +264,8 @@ public void intersect(final Sketch sketchIn) { //states 1,2,3,6 if (curCount_ == 0 || sketchInEntries == 0) { curCount_ = 0; - if (wmem_ != null) { insertCurCount(wmem_, 0); } - hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid + if (wseg_ != null) { insertCurCount(wseg_, 0); } + hashTable_ = null; //No need for a HT. Don't bother clearing seg if valid } //end of states 1,2,3,6 // state 5 @@ -272,17 +275,17 @@ else if (curCount_ < 0 && sketchInEntries > 0) { final int priorLgArrLongs = lgArrLongs_; //prior only used in error message lgArrLongs_ = requiredLgArrLongs; - if (wmem_ != null) { //Off heap, check if current dstMem is large enough - insertCurCount(wmem_, curCount_); - insertLgArrLongs(wmem_, lgArrLongs_); + if (wseg_ != null) { //Off heap, check if current dstSeg is large enough + insertCurCount(wseg_, curCount_); + insertLgArrLongs(wseg_, lgArrLongs_); if (requiredLgArrLongs <= maxLgArrLongs_) { - wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear only what required + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); } - else { //not enough space in dstMem + else { //not enough space in dstSeg final int requiredBytes = (8 << requiredLgArrLongs) + 24; final int givenBytes = (8 << priorLgArrLongs) + 24; throw new SketchesArgumentException( - "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes); + "Insufficient internal MemorySegment space: " + requiredBytes + " > " + givenBytes); } } else { //On the heap, allocate a HT @@ -303,7 +306,10 @@ else if (curCount_ > 0 && sketchInEntries > 0) { } @Override - public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) { + MemorySegment getMemorySegment() { return wseg_; } + + @Override + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { if (curCount_ < 0) { throw new SketchesStateException( "Calling getResult() with no intervening intersections would represent the infinite set, " @@ -314,17 +320,17 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds if (curCount_ == 0) { compactCache = new long[0]; srcCompact = true; - srcOrdered = false; //hashTable, even tho empty + srcOrdered = false; //hashTable, even though empty return CompactOperations.componentsToCompact( thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, - dstMem, compactCache); + dstSeg, compactCache); } //else curCount > 0 final long[] hashTable; - if (wmem_ != null) { + if (wseg_ != null) { final int htLen = 1 << lgArrLongs_; hashTable = new long[htLen]; - wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); } else { hashTable = hashTable_; } @@ -333,27 +339,27 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds srcOrdered = dstOrdered; return CompactOperations.componentsToCompact( thetaLong_, curCount_, seedHash_, empty_, srcCompact, srcOrdered, dstOrdered, - dstMem, compactCache); + dstSeg, compactCache); } @Override - public boolean hasMemory() { - return wmem_ != null; + public boolean hasMemorySegment() { + return wseg_ != null && wseg_.scope().isAlive(); } @Override public boolean hasResult() { - return hasMemory() ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + return hasMemorySegment() ? wseg_.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; } @Override public boolean isDirect() { - return hasMemory() ? wmem_.isDirect() : false; + return hasMemorySegment() && wseg_.isNative(); } @Override - public boolean isSameResource(final Memory that) { - return hasMemory() ? wmem_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(wseg_, that); } @Override @@ -366,28 +372,28 @@ public byte[] toByteArray() { final int preBytes = CONST_PREAMBLE_LONGS << 3; final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0; final byte[] byteArrOut = new byte[preBytes + dataBytes]; - if (wmem_ != null) { - wmem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes); + if (wseg_ != null) { + MemorySegment.copy(wseg_, JAVA_BYTE, 0, byteArrOut, 0, preBytes + dataBytes); } else { - final WritableMemory memOut = WritableMemory.writableWrap(byteArrOut); + final MemorySegment segOut = MemorySegment.ofArray(byteArrOut); //preamble - memOut.putByte(PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 - memOut.putByte(SER_VER_BYTE, (byte) SER_VER); - memOut.putByte(FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); - memOut.putByte(LG_NOM_LONGS_BYTE, (byte) 0); //not used - memOut.putByte(LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); - if (empty_) { memOut.setBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } - else { memOut.clearBits(FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } - memOut.putShort(SEED_HASH_SHORT, seedHash_); - memOut.putInt(RETAINED_ENTRIES_INT, curCount_); - memOut.putFloat(P_FLOAT, (float) 1.0); - memOut.putLong(THETA_LONG, thetaLong_); + segOut.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) CONST_PREAMBLE_LONGS); //RF not used = 0 + segOut.set(JAVA_BYTE, SER_VER_BYTE, (byte) SER_VER); + segOut.set(JAVA_BYTE, FAMILY_BYTE, (byte) Family.INTERSECTION.getID()); + segOut.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 0); //not used + segOut.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs_); + if (empty_) { setBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } + else { clearBits(segOut, FLAGS_BYTE, (byte) EMPTY_FLAG_MASK); } + segOut.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, seedHash_); + segOut.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount_); + segOut.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, (float) 1.0); + segOut.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong_); //data if (curCount_ > 0) { - memOut.putLongArray(preBytes, hashTable_, 0, 1 << lgArrLongs_); + MemorySegment.copy(hashTable_, 0, segOut, JAVA_LONG_UNALIGNED, preBytes, 1 << lgArrLongs_); } } return byteArrOut; @@ -411,13 +417,13 @@ boolean isEmpty() { @Override long[] getCache() { - if (wmem_ == null) { + if (wseg_ == null) { return hashTable_ != null ? hashTable_ : new long[0]; } - //Direct + //offHeap final int arrLongs = 1 << lgArrLongs_; final long[] outArr = new long[arrLongs]; - wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs); return outArr; } @@ -435,10 +441,10 @@ private void performIntersect(final Sketch sketchIn) { // curCount and input data are nonzero, match against HT assert curCount_ > 0 && !empty_; final long[] hashTable; - if (wmem_ != null) { + if (wseg_ != null) { final int htLen = 1 << lgArrLongs_; hashTable = new long[htLen]; - wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); + MemorySegment.copy(wseg_, JAVA_LONG_UNALIGNED, CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen); } else { hashTable = hashTable_; } @@ -462,10 +468,10 @@ private void performIntersect(final Sketch sketchIn) { //reduce effective array size to minimum curCount_ = matchSetCount; lgArrLongs_ = minLgHashTableSize(matchSetCount, ThetaUtil.REBUILD_THRESHOLD); - if (wmem_ != null) { - insertCurCount(wmem_, matchSetCount); - insertLgArrLongs(wmem_, lgArrLongs_); - wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear for rebuild + if (wseg_ != null) { + insertCurCount(wseg_, matchSetCount); + insertLgArrLongs(wseg_, lgArrLongs_); + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_).fill((byte)0); //clear for rebuild } else { Arrays.fill(hashTable_, 0, 1 << lgArrLongs_, 0L); //clear for rebuild } @@ -482,14 +488,14 @@ private void performIntersect(final Sketch sketchIn) { private void moveDataToTgt(final long[] arr, final int count) { final int arrLongsIn = arr.length; int tmpCnt = 0; - if (wmem_ != null) { //Off Heap puts directly into mem + if (wseg_ != null) { //Off Heap puts directly into mem final int preBytes = CONST_PREAMBLE_LONGS << 3; final int lgArrLongs = lgArrLongs_; final long thetaLong = thetaLong_; for (int i = 0; i < arrLongsIn; i++ ) { final long hashIn = arr[i]; if (continueCondition(thetaLong, hashIn)) { continue; } - hashInsertOnlyMemory(wmem_, lgArrLongs, hashIn, preBytes); + hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hashIn, preBytes); tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough @@ -506,7 +512,7 @@ private void moveDataToTgt(final long[] arr, final int count) { private void moveDataToTgt(final Sketch sketch) { final int count = sketch.getRetainedEntries(); int tmpCnt = 0; - if (wmem_ != null) { //Off Heap puts directly into mem + if (wseg_ != null) { //Off Heap puts directly into mem final int preBytes = CONST_PREAMBLE_LONGS << 3; final int lgArrLongs = lgArrLongs_; final long thetaLong = thetaLong_; @@ -514,7 +520,7 @@ private void moveDataToTgt(final Sketch sketch) { while (it.next()) { final long hash = it.get(); if (continueCondition(thetaLong, hash)) { continue; } - hashInsertOnlyMemory(wmem_, lgArrLongs, hash, preBytes); + hashInsertOnlyMemorySegment(wseg_, lgArrLongs, hash, preBytes); tmpCnt++; } } else { //On Heap. Assumes HT exists and is large enough @@ -531,9 +537,9 @@ private void moveDataToTgt(final Sketch sketch) { private void hardReset() { resetCommon(); - if (wmem_ != null) { - insertCurCount(wmem_, -1); //Universal Set - clearEmpty(wmem_); //false + if (wseg_ != null) { + insertCurCount(wseg_, -1); //Universal Set + clearEmpty(wseg_); //false } curCount_ = -1; //Universal Set empty_ = false; @@ -541,20 +547,20 @@ private void hardReset() { private void resetToEmpty() { resetCommon(); - if (wmem_ != null) { - insertCurCount(wmem_, 0); - setEmpty(wmem_); //true + if (wseg_ != null) { + insertCurCount(wseg_, 0); + setEmpty(wseg_); //true } curCount_ = 0; empty_ = true; } private void resetCommon() { - if (wmem_ != null) { + if (wseg_ != null) { if (readOnly_) { throw new SketchesReadOnlyException(); } - wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS); - insertLgArrLongs(wmem_, ThetaUtil.MIN_LG_ARR_LONGS); - insertThetaLong(wmem_, Long.MAX_VALUE); + wseg_.asSlice(CONST_PREAMBLE_LONGS << 3, 8 << ThetaUtil.MIN_LG_ARR_LONGS).fill((byte)0); + insertLgArrLongs(wseg_, ThetaUtil.MIN_LG_ARR_LONGS); + insertThetaLong(wseg_, Long.MAX_VALUE); } lgArrLongs_ = ThetaUtil.MIN_LG_ARR_LONGS; thetaLong_ = Long.MAX_VALUE; diff --git a/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java b/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java index 1f9e710c0..a058f649a 100644 --- a/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java +++ b/src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java @@ -37,6 +37,8 @@ public final class JaccardSimilarity { private static final double[] ZEROS = {0.0, 0.0, 0.0}; // LB, Estimate, UB private static final double[] ONES = {1.0, 1.0, 1.0}; + private JaccardSimilarity() { } + /** * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index * J(A,B) = (A ^ B)/(A U B) is used to measure how similar the two sketches are to each diff --git a/src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java b/src/main/java/org/apache/datasketches/theta/MemorySegmentCompactCompressedHashIterator.java similarity index 72% rename from src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java rename to src/main/java/org/apache/datasketches/theta/MemorySegmentCompactCompressedHashIterator.java index d5f37de96..031cee687 100644 --- a/src/main/java/org/apache/datasketches/theta/MemoryCompactCompressedHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/MemorySegmentCompactCompressedHashIterator.java @@ -19,15 +19,19 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.theta.PreambleUtil.wholeBytesToHoldBits; -import org.apache.datasketches.memory.Memory; +import java.lang.foreign.MemorySegment; + +import org.apache.datasketches.common.MemorySegmentStatus; +import org.apache.datasketches.common.Util; /* * This is to uncompress serial version 4 sketch incrementally */ -class MemoryCompactCompressedHashIterator implements HashIterator { - private Memory mem; +final class MemorySegmentCompactCompressedHashIterator implements HashIterator, MemorySegmentStatus { + private MemorySegment seg; private int offset; private int entryBits; private int numEntries; @@ -39,13 +43,12 @@ class MemoryCompactCompressedHashIterator implements HashIterator { private boolean isBlockMode; private boolean isFirstUnpack1; - MemoryCompactCompressedHashIterator( - final Memory mem, + MemorySegmentCompactCompressedHashIterator( + final MemorySegment srcSeg, final int offset, final int entryBits, - final int numEntries - ) { - this.mem = mem; + final int numEntries) { + this.seg = srcSeg; this.offset = offset; this.entryBits = entryBits; this.numEntries = numEntries; @@ -63,6 +66,21 @@ public long get() { return buffer[index & 7]; } + @Override + public boolean hasMemorySegment() { + return seg != null && seg.scope().isAlive(); + } + + @Override + public boolean isDirect() { + return hasMemorySegment() && seg.isNative(); + } + + @Override + public boolean isSameResource(final MemorySegment that) { + return hasMemorySegment() && Util.isSameResource(seg, that); + } + @Override public boolean next() { if (++index == numEntries) { return false; } @@ -83,7 +101,7 @@ public boolean next() { private void unpack1() { if (isFirstUnpack1) { - mem.getByteArray(offset, bytes, 0, wholeBytesToHoldBits((numEntries - index) * entryBits)); + MemorySegment.copy(seg, JAVA_BYTE, offset, bytes, 0, wholeBytesToHoldBits((numEntries - index) * entryBits)); offset = 0; isFirstUnpack1 = false; } @@ -96,7 +114,7 @@ private void unpack1() { } private void unpack8() { - mem.getByteArray(offset, bytes, 0, entryBits); + MemorySegment.copy(seg, JAVA_BYTE, offset, bytes, 0, entryBits); BitPacking.unpackBitsBlock8(buffer, 0, bytes, 0, entryBits); offset += entryBits; for (int i = 0; i < 8; i++) { diff --git a/src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java b/src/main/java/org/apache/datasketches/theta/MemorySegmentHashIterator.java similarity index 74% rename from src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java rename to src/main/java/org/apache/datasketches/theta/MemorySegmentHashIterator.java index 926d0ad9e..548c79ef3 100644 --- a/src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java +++ b/src/main/java/org/apache/datasketches/theta/MemorySegmentHashIterator.java @@ -19,24 +19,26 @@ package org.apache.datasketches.theta; -import org.apache.datasketches.memory.Memory; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; + +import java.lang.foreign.MemorySegment; /** * @author Lee Rhodes */ -class MemoryHashIterator implements HashIterator { - private Memory mem; +final class MemorySegmentHashIterator implements HashIterator { + private MemorySegment seg; private int arrLongs; private long thetaLong; private long offsetBytes; private int index; private long hash; - MemoryHashIterator(final Memory mem, final int arrLongs, final long thetaLong) { - this.mem = mem; + MemorySegmentHashIterator(final MemorySegment srcSeg, final int arrLongs, final long thetaLong) { + this.seg = srcSeg; this.arrLongs = arrLongs; this.thetaLong = thetaLong; - offsetBytes = PreambleUtil.extractPreLongs(mem) << 3; + offsetBytes = PreambleUtil.extractPreLongs(srcSeg) << 3; index = -1; hash = 0; } @@ -49,7 +51,7 @@ public long get() { @Override public boolean next() { while (++index < arrLongs) { - hash = mem.getLong(offsetBytes + (index << 3)); + hash = seg.get(JAVA_LONG_UNALIGNED, offsetBytes + (index << 3)); if ((hash != 0) && (hash < thetaLong)) { return true; } diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index ec0bc1268..ed368bd8b 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -19,18 +19,21 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static java.lang.foreign.ValueLayout.JAVA_SHORT_UNALIGNED; import static org.apache.datasketches.common.Util.LS; import static org.apache.datasketches.common.Util.zeroPad; +import java.lang.foreign.MemorySegment; import java.nio.ByteOrder; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; //@formatter:off @@ -198,14 +201,13 @@ private PreambleUtil() {} (ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN); /** - * Computes the number of bytes required for a non-full sized sketch in hash-table form. - * This can be used to compute current storage size for heap sketches, or current off-heap memory - * required for off-heap (direct) sketches. This does not apply for compact sketches. + * Computes the number of bytes required for an updatable sketch using a hash-table cache. + * This does not apply for compact sketches. * @param lgArrLongs log2(current hash-table size) * @param preambleLongs current preamble size * @return the size in bytes */ - static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { + static final int getSegBytes(final int lgArrLongs, final int preambleLongs) { return (8 << lgArrLongs) + (preambleLongs << 3); } @@ -219,31 +221,31 @@ static final int getMemBytes(final int lgArrLongs, final int preambleLongs) { * @return the summary preamble string. */ static String preambleToString(final byte[] byteArr) { - final Memory mem = Memory.wrap(byteArr); - return preambleToString(mem); + final MemorySegment seg = MemorySegment.ofArray(byteArr); + return preambleToString(seg); } /** - * Returns a human readable string summary of the preamble state of the given Memory. - * Note: other than making sure that the given Memory size is large + * Returns a human readable string summary of the preamble state of the given MemorySegment. + * Note: other than making sure that the given MemorySegment size is large * enough for just the preamble, this does not do much value checking of the contents of the * preamble as this is primarily a tool for debugging the preamble visually. * - * @param mem the given Memory. + * @param seg the given MemorySegment. * @return the summary preamble string. */ - static String preambleToString(final Memory mem) { - final int preLongs = getAndCheckPreLongs(mem); - final int rfId = extractLgResizeFactor(mem); + static String preambleToString(final MemorySegment seg) { + final int preLongs = getAndCheckPreLongs(seg); + final int rfId = extractLgResizeFactor(seg); final ResizeFactor rf = ResizeFactor.getRF(rfId); - final int serVer = extractSerVer(mem); - final int familyId = extractFamilyID(mem); + final int serVer = extractSerVer(seg); + final int familyId = extractFamilyID(seg); final Family family = Family.idToFamily(familyId); - final int lgNomLongs = extractLgNomLongs(mem); - final int lgArrLongs = extractLgArrLongs(mem); + final int lgNomLongs = extractLgNomLongs(seg); + final int lgArrLongs = extractLgArrLongs(seg); //Flags - final int flags = extractFlags(mem); + final int flags = extractFlags(seg); final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", " + zeroPad(Integer.toBinaryString(flags), 8); final String nativeOrder = ByteOrder.nativeOrder().toString(); @@ -254,7 +256,7 @@ static String preambleToString(final Memory mem) { final boolean ordered = (flags & ORDERED_FLAG_MASK) > 0; final boolean singleItem = (flags & SINGLEITEM_FLAG_MASK) > 0; //!empty && (preLongs == 1); - final int seedHash = extractSeedHash(mem); + final int seedHash = extractSeedHash(seg); //assumes preLongs == 1; empty or singleItem int curCount = singleItem ? 1 : 0; @@ -263,20 +265,20 @@ static String preambleToString(final Memory mem) { long thetaULong = thetaLong; //preLongs 1, 2 or 3 if (preLongs == 2) { //exact (non-estimating) CompactSketch - curCount = extractCurCount(mem); - p = extractP(mem); + curCount = extractCurCount(seg); + p = extractP(seg); } else if (preLongs == 3) { //Update Sketch - curCount = extractCurCount(mem); - p = extractP(mem); - thetaLong = extractThetaLong(mem); + curCount = extractCurCount(seg); + p = extractP(seg); + thetaLong = extractThetaLong(seg); thetaULong = thetaLong; } else if (preLongs == 4) { //Union - curCount = extractCurCount(mem); - p = extractP(mem); - thetaLong = extractThetaLong(mem); - thetaULong = extractUnionThetaLong(mem); + curCount = extractCurCount(seg); + p = extractP(seg); + thetaLong = extractThetaLong(seg); + thetaULong = extractUnionThetaLong(seg); } //else the same as an empty sketch or singleItem @@ -340,171 +342,171 @@ else if (preLongs == 3) { sb.append( "Preamble Bytes : ").append(preLongs * 8).append(LS); sb.append( "Data Bytes : ").append(curCount * 8).append(LS); sb.append( "TOTAL Sketch Bytes : ").append((preLongs + curCount) * 8).append(LS); - sb.append( "TOTAL Capacity Bytes : ").append(mem.getCapacity()).append(LS); + sb.append( "TOTAL Capacity Bytes : ").append(seg.byteSize()).append(LS); sb.append("### END SKETCH PREAMBLE SUMMARY").append(LS); return sb.toString(); } //@formatter:on - static int extractPreLongs(final Memory mem) { - return mem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; + static int extractPreLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; } - static int extractLgResizeFactor(final Memory mem) { - return (mem.getByte(PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; + static int extractLgResizeFactor(final MemorySegment seg) { + return (seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) >>> LG_RESIZE_FACTOR_BIT) & 0X3; } - static int extractLgResizeRatioV1(final Memory mem) { - return mem.getByte(LG_RESIZE_RATIO_BYTE_V1) & 0X3; + static int extractLgResizeRatioV1(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_RESIZE_RATIO_BYTE_V1) & 0X3; } - static int extractSerVer(final Memory mem) { - return mem.getByte(SER_VER_BYTE) & 0XFF; + static int extractSerVer(final MemorySegment seg) { + return seg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; } - static int extractFamilyID(final Memory mem) { - return mem.getByte(FAMILY_BYTE) & 0XFF; + static int extractFamilyID(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; } - static int extractLgNomLongs(final Memory mem) { - return mem.getByte(LG_NOM_LONGS_BYTE) & 0XFF; + static int extractLgNomLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_NOM_LONGS_BYTE) & 0XFF; } - static int extractLgArrLongs(final Memory mem) { - return mem.getByte(LG_ARR_LONGS_BYTE) & 0XFF; + static int extractLgArrLongs(final MemorySegment seg) { + return seg.get(JAVA_BYTE, LG_ARR_LONGS_BYTE) & 0XFF; } - static int extractFlags(final Memory mem) { - return mem.getByte(FLAGS_BYTE) & 0XFF; + static int extractFlags(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; } - static int extractFlagsV1(final Memory mem) { - return mem.getByte(FLAGS_BYTE_V1) & 0XFF; + static int extractFlagsV1(final MemorySegment seg) { + return seg.get(JAVA_BYTE, FLAGS_BYTE_V1) & 0XFF; } - static int extractSeedHash(final Memory mem) { - return mem.getShort(SEED_HASH_SHORT) & 0XFFFF; + static int extractSeedHash(final MemorySegment seg) { + return seg.get(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT) & 0XFFFF; } - static int extractCurCount(final Memory mem) { - return mem.getInt(RETAINED_ENTRIES_INT); + static int extractCurCount(final MemorySegment seg) { + return seg.get(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT); } - static float extractP(final Memory mem) { - return mem.getFloat(P_FLOAT); + static float extractP(final MemorySegment seg) { + return seg.get(JAVA_FLOAT_UNALIGNED, P_FLOAT); } - static long extractThetaLong(final Memory mem) { - return mem.getLong(THETA_LONG); + static long extractThetaLong(final MemorySegment seg) { + return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG); } - static long extractUnionThetaLong(final Memory mem) { - return mem.getLong(UNION_THETA_LONG); + static long extractUnionThetaLong(final MemorySegment seg) { + return seg.get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG); } - static int extractEntryBitsV4(final Memory mem) { - return mem.getByte(ENTRY_BITS_BYTE_V4) & 0XFF; + static int extractEntryBitsV4(final MemorySegment seg) { + return seg.get(JAVA_BYTE, ENTRY_BITS_BYTE_V4) & 0XFF; } - static int extractNumEntriesBytesV4(final Memory mem) { - return mem.getByte(NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF; + static int extractNumEntriesBytesV4(final MemorySegment seg) { + return seg.get(JAVA_BYTE, NUM_ENTRIES_BYTES_BYTE_V4) & 0XFF; } - static long extractThetaLongV4(final Memory mem) { - return mem.getLong(THETA_LONG_V4); + static long extractThetaLongV4(final MemorySegment seg) { + return seg.get(JAVA_LONG_UNALIGNED, THETA_LONG_V4); } /** * Sets PreLongs in the low 6 bits and sets LgRF in the upper 2 bits = 0. - * @param wmem the target WritableMemory + * @param seg the target MemorySegment * @param preLongs the given number of preamble longs */ - static void insertPreLongs(final WritableMemory wmem, final int preLongs) { - wmem.putByte(PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); + static void insertPreLongs(final MemorySegment seg, final int preLongs) { + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, (byte) (preLongs & 0X3F)); } /** * Sets the top 2 lgRF bits and does not affect the lower 6 bits (PreLongs). * To work properly, this should be called after insertPreLongs(). - * @param wmem the target WritableMemory + * @param seg the target MemorySegment * @param rf the given lgRF bits */ - static void insertLgResizeFactor(final WritableMemory wmem, final int rf) { - final int curByte = wmem.getByte(PREAMBLE_LONGS_BYTE) & 0xFF; + static void insertLgResizeFactor(final MemorySegment seg, final int rf) { + final int curByte = seg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0xFF; final int shift = LG_RESIZE_FACTOR_BIT; // shift in bits final int mask = 3; final byte newByte = (byte) (((rf & mask) << shift) | (~(mask << shift) & curByte)); - wmem.putByte(PREAMBLE_LONGS_BYTE, newByte); + seg.set(JAVA_BYTE, PREAMBLE_LONGS_BYTE, newByte); } - static void insertSerVer(final WritableMemory wmem, final int serVer) { - wmem.putByte(SER_VER_BYTE, (byte) serVer); + static void insertSerVer(final MemorySegment seg, final int serVer) { + seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) serVer); } - static void insertFamilyID(final WritableMemory wmem, final int famId) { - wmem.putByte(FAMILY_BYTE, (byte) famId); + static void insertFamilyID(final MemorySegment seg, final int famId) { + seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) famId); } - static void insertLgNomLongs(final WritableMemory wmem, final int lgNomLongs) { - wmem.putByte(LG_NOM_LONGS_BYTE, (byte) lgNomLongs); + static void insertLgNomLongs(final MemorySegment seg, final int lgNomLongs) { + seg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) lgNomLongs); } - static void insertLgArrLongs(final WritableMemory wmem, final int lgArrLongs) { - wmem.putByte(LG_ARR_LONGS_BYTE, (byte) lgArrLongs); + static void insertLgArrLongs(final MemorySegment seg, final int lgArrLongs) { + seg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte) lgArrLongs); } - static void insertFlags(final WritableMemory wmem, final int flags) { - wmem.putByte(FLAGS_BYTE, (byte) flags); + static void insertFlags(final MemorySegment seg, final int flags) { + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); } - static void insertSeedHash(final WritableMemory wmem, final int seedHash) { - wmem.putShort(SEED_HASH_SHORT, (short) seedHash); + static void insertSeedHash(final MemorySegment seg, final int seedHash) { + seg.set(JAVA_SHORT_UNALIGNED, SEED_HASH_SHORT, (short) seedHash); } - static void insertCurCount(final WritableMemory wmem, final int curCount) { - wmem.putInt(RETAINED_ENTRIES_INT, curCount); + static void insertCurCount(final MemorySegment seg, final int curCount) { + seg.set(JAVA_INT_UNALIGNED, RETAINED_ENTRIES_INT, curCount); } - static void insertP(final WritableMemory wmem, final float p) { - wmem.putFloat(P_FLOAT, p); + static void insertP(final MemorySegment seg, final float p) { + seg.set(JAVA_FLOAT_UNALIGNED, P_FLOAT, p); } - static void insertThetaLong(final WritableMemory wmem, final long thetaLong) { - wmem.putLong(THETA_LONG, thetaLong); + static void insertThetaLong(final MemorySegment seg, final long thetaLong) { + seg.set(JAVA_LONG_UNALIGNED, THETA_LONG, thetaLong); } - static void insertUnionThetaLong(final WritableMemory wmem, final long unionThetaLong) { - wmem.putLong(UNION_THETA_LONG, unionThetaLong); + static void insertUnionThetaLong(final MemorySegment seg, final long unionThetaLong) { + seg.set(JAVA_LONG_UNALIGNED, UNION_THETA_LONG, unionThetaLong); } - static void setEmpty(final WritableMemory wmem) { - int flags = wmem.getByte(FLAGS_BYTE) & 0XFF; + static void setEmpty(final MemorySegment seg) { + int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; flags |= EMPTY_FLAG_MASK; - wmem.putByte(FLAGS_BYTE, (byte) flags); + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); } - static void clearEmpty(final WritableMemory wmem) { - int flags = wmem.getByte(FLAGS_BYTE) & 0XFF; + static void clearEmpty(final MemorySegment seg) { + int flags = seg.get(JAVA_BYTE, FLAGS_BYTE) & 0XFF; flags &= ~EMPTY_FLAG_MASK; - wmem.putByte(FLAGS_BYTE, (byte) flags); + seg.set(JAVA_BYTE, FLAGS_BYTE, (byte) flags); } - static boolean isEmptyFlag(final Memory mem) { - return ((extractFlags(mem) & EMPTY_FLAG_MASK) > 0); + static boolean isEmptyFlag(final MemorySegment seg) { + return ((extractFlags(seg) & EMPTY_FLAG_MASK) > 0); } /** - * Checks Memory for capacity to hold the preamble and returns the extracted preLongs. - * @param mem the given Memory + * Checks MemorySegment for capacity to hold the preamble and returns the extracted preLongs. + * @param seg the given MemorySegment * @return the extracted prelongs value. */ - static int getAndCheckPreLongs(final Memory mem) { - final long cap = mem.getCapacity(); + static int getAndCheckPreLongs(final MemorySegment seg) { + final long cap = seg.byteSize(); if (cap < 8) { throwNotBigEnough(cap, 8); } - final int preLongs = extractPreLongs(mem); + final int preLongs = extractPreLongs(seg); final int required = Math.max(preLongs << 3, 8); if (cap < required) { throwNotBigEnough(cap, required); @@ -512,15 +514,15 @@ static int getAndCheckPreLongs(final Memory mem) { return preLongs; } - static final short checkMemorySeedHash(final Memory mem, final long seed) { - final short seedHashMem = (short) extractSeedHash(mem); - ThetaUtil.checkSeedHashes(seedHashMem, ThetaUtil.computeSeedHash(seed)); //throws if bad seedHash - return seedHashMem; + static final short checkSegmentSeedHash(final MemorySegment seg, final long seed) { + final short seedHashSeg = (short) extractSeedHash(seg); + Util.checkSeedHashes(seedHashSeg, Util.computeSeedHash(seed)); //throws if bad seedHash + return seedHashSeg; } private static void throwNotBigEnough(final long cap, final int required) { throw new SketchesArgumentException( - "Possible Corruption: Size of byte array or Memory not large enough: Size: " + cap + "Possible Corruption: Size of byte array or MemorySegment not large enough: Size: " + cap + ", Required: " + required); } diff --git a/src/main/java/org/apache/datasketches/theta/Rebuilder.java b/src/main/java/org/apache/datasketches/theta/Rebuilder.java index b6e3de342..54b1e190b 100644 --- a/src/main/java/org/apache/datasketches/theta/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta/Rebuilder.java @@ -19,6 +19,9 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE; import static org.apache.datasketches.theta.PreambleUtil.extractCurCount; import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs; @@ -26,16 +29,19 @@ import static org.apache.datasketches.theta.PreambleUtil.insertCurCount; import static org.apache.datasketches.theta.PreambleUtil.insertLgArrLongs; import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; + +import java.lang.foreign.MemorySegment; import org.apache.datasketches.common.Util; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; /** - * Handles common resize, rebuild and move operations. - * The Memory based operations assume a specific data structure that is unique to the theta sketches. + * This class performs resize, rebuild and move operations where the input and output are Theta sketch images in MemorySegments. + * + *NOTE: These operations copy data from the input MemorySegment into local arrays, perform the required operations on the + * arrays, and then copies the result to the destination MemorySegment. Attempting to perform these operations directly on the + * MemorySegments would be slower due to MemorySegment internal checks. Meanwhile, he bulk copies performed by the MemorySegments are + * vectorized at the machine level and are quite fast. Measurements reveal that this is a good tradeoff.
* * @author Lee Rhodes */ @@ -44,73 +50,63 @@ final class Rebuilder { private Rebuilder() {} /** - * Rebuild the hashTable in the given Memory at its current size. Changes theta and thus count. - * This assumes a Memory preamble of standard form with correct values of curCount and thetaLong. + * Rebuild the hashTable in the given MemorySegment at its current size. Changes theta and thus count. + * This assumes a MemorySegment preamble of standard form with correct values of curCount and thetaLong. * ThetaLong and curCount will change. - * Afterwards, caller must update local class members curCount and thetaLong from Memory. + * Afterwards, caller must update local class members curCount and thetaLong from MemorySegment. * - * @param mem the Memory the given Memory + * @param seg the given MemorySegment * @param preambleLongs size of preamble in longs * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch */ - static final void quickSelectAndRebuild(final WritableMemory mem, final int preambleLongs, - final int lgNomLongs) { - //Note: This copies the Memory data onto the heap and then at the end copies the result - // back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing, - // and the internal loops would be slower. The bulk copies are performed at a low level and - // are quite fast. Measurements reveal that we are not paying much of a penalty. - - //Pull data into tmp arr for QS algo - final int lgArrLongs = extractLgArrLongs(mem); - final int curCount = extractCurCount(mem); + static final void quickSelectAndRebuild(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { + + //Copy data from input segment into local buffer array for QS algorithm + final int lgArrLongs = extractLgArrLongs(seg); final int arrLongs = 1 << lgArrLongs; final long[] tmpArr = new long[arrLongs]; final int preBytes = preambleLongs << 3; - mem.getLongArray(preBytes, tmpArr, 0, arrLongs); //copy mem data to tmpArr + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, tmpArr, 0, arrLongs); //Do the QuickSelect on a tmp arr to create new thetaLong final int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS - final long newThetaLong = selectExcludingZeros(tmpArr, curCount, pivot); - insertThetaLong(mem, newThetaLong); //UPDATE thetalong + final long newThetaLong = selectExcludingZeros(tmpArr, extractCurCount(seg), pivot); + insertThetaLong(seg, newThetaLong); //UPDATE thetaLong //Rebuild to clean up dirty data, update count final long[] tgtArr = new long[arrLongs]; final int newCurCount = HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong); - insertCurCount(mem, newCurCount); //UPDATE curCount + insertCurCount(seg, newCurCount); //UPDATE curCount - //put the rebuilt array back into memory - mem.putLongArray(preBytes, tgtArr, 0, arrLongs); + //put the rebuilt array back into MemorySegment + MemorySegment.copy(tgtArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, arrLongs); } /** - * Moves me (the entire updatable sketch) to a new larger Memory location and rebuilds the hash table. - * This assumes a Memory preamble of standard form with the correct value of thetaLong. - * Afterwards, the caller must update the local Memory reference, lgArrLongs - * and hashTableThreshold from the dstMemory and free the source Memory. + * Moves me (the entire updatable sketch) to a new larger MemorySegment location and rebuilds the hash table. + * This assumes a MemorySegment preamble of standard form with the correct value of thetaLong. + * Afterwards, the caller must update the local MemorySegment reference, lgArrLongs + * and hashTableThreshold from the destination MemorySegment and free the source MemorySegment. * - * @param srcMem the source Memory + * @param srcSeg the source MemorySegment * @param preambleLongs size of preamble in longs * @param srcLgArrLongs size (log_base2) of source hash table - * @param dstMem the destination Memory, which may be garbage + * @param dstSeg the destination MemorySegment, which may be garbage * @param dstLgArrLongs the destination hash table target size * @param thetaLong theta as a long */ - static final void moveAndResize(final Memory srcMem, final int preambleLongs, - final int srcLgArrLongs, final WritableMemory dstMem, final int dstLgArrLongs, final long thetaLong) { - //Note: This copies the Memory data onto the heap and then at the end copies the result - // back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing, - // and the internal loops would be slower. The bulk copies are performed at a low level and - // are quite fast. Measurements reveal that we are not paying much of a penalty. - - //Move Preamble to destination memory + static final void moveAndResize(final MemorySegment srcSeg, final int preambleLongs, + final int srcLgArrLongs, final MemorySegment dstSeg, final int dstLgArrLongs, final long thetaLong) { + + //Move Preamble to destination MemorySegment final int preBytes = preambleLongs << 3; - srcMem.copyTo(0, dstMem, 0, preBytes); //copy the preamble + MemorySegment.copy(srcSeg, 0, dstSeg, 0, preBytes); - //Bulk copy source to on-heap buffer + //Bulk copy source Hash Table to local buffer array final int srcHTLen = 1 << srcLgArrLongs; final long[] srcHTArr = new long[srcHTLen]; - srcMem.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + MemorySegment.copy(srcSeg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); //Create destination buffer final int dstHTLen = 1 << dstLgArrLongs; @@ -119,51 +115,51 @@ static final void moveAndResize(final Memory srcMem, final int preambleLongs, //Rebuild hash table in destination buffer HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong); - //Bulk copy to destination memory - dstMem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); - dstMem.putByte(LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update in dstMem + //Bulk copy to destination MemorySegment + MemorySegment.copy(dstHTArr, 0, dstSeg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); + dstSeg.set(JAVA_BYTE, LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update lgArrLongs in dstSeg } /** - * Resizes existing hash array into a larger one within a single Memory assuming enough space. - * This assumes a Memory preamble of standard form with the correct value of thetaLong. - * The Memory lgArrLongs will change. - * Afterwards, the caller must update local copies of lgArrLongs and hashTableThreshold from - * Memory. + * Resizes existing hash array into a larger one within a single MemorySegment, assuming enough space. + * This assumes a preamble of standard form with the correct value of thetaLong. + * The lgArrLongs will change. + * Afterwards, the caller must update the caller's local copies of lgArrLongs and hashTableThreshold + * from the returned MemorySegment. * - * @param mem the Memory + * @param seg the source and destination MemorySegment * @param preambleLongs the size of the preamble in longs * @param srcLgArrLongs the size of the source hash table * @param tgtLgArrLongs the LgArrLongs value for the new hash table */ - static final void resize(final WritableMemory mem, final int preambleLongs, + static final void resize(final MemorySegment seg, final int preambleLongs, final int srcLgArrLongs, final int tgtLgArrLongs) { - //Note: This copies the Memory data onto the heap and then at the end copies the result - // back to Memory. Even if we tried to do this directly into Memory it would require pre-clearing, - // and the internal loops would be slower. The bulk copies are performed at a low level and - // are quite fast. Measurements reveal that we are not paying much of a penalty. //Preamble stays in place final int preBytes = preambleLongs << 3; + //Bulk copy source to on-heap buffer final int srcHTLen = 1 << srcLgArrLongs; //current value final long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer - mem.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + //seg.getLongArray(preBytes, srcHTArr, 0, srcHTLen); + MemorySegment.copy(seg, JAVA_LONG_UNALIGNED, preBytes, srcHTArr, 0, srcHTLen); + //Create destination on-heap buffer final int dstHTLen = 1 << tgtLgArrLongs; final long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer + //Rebuild hash table in destination buffer - final long thetaLong = extractThetaLong(mem); - HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, thetaLong); - //Bulk copy to destination memory - mem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); //put it back, no need to clear - insertLgArrLongs(mem, tgtLgArrLongs); //update in mem + HashOperations.hashArrayInsert(srcHTArr, dstHTArr, tgtLgArrLongs, extractThetaLong(seg)); + + //Bulk copy to destination segment + MemorySegment.copy(dstHTArr, 0, seg, JAVA_LONG_UNALIGNED, preBytes, dstHTLen); + insertLgArrLongs(seg, tgtLgArrLongs); //update in mem } /** * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be * an integer value between zero and the given lgRF, inclusive; - * @param capBytes the current memory capacity in bytes + * @param capBytes the current MemorySegment capacity in bytes * @param lgArrLongs the current lg hash table size in longs * @param preLongs the current preamble size in longs * @param lgRF the configured lg Resize Factor diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java index c198dceb7..126a9298a 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java @@ -19,24 +19,25 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.apache.datasketches.common.Family.idToFamily; import static org.apache.datasketches.common.Util.ceilingPowerOf2; import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE; import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; -import org.apache.datasketches.common.MemoryStatus; +import org.apache.datasketches.common.MemorySegmentStatus; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; /** * The parent API for all Set Operations * * @author Lee Rhodes */ -public abstract class SetOperation implements MemoryStatus { +public abstract class SetOperation implements MemorySegmentStatus { static final int CONST_PREAMBLE_LONGS = 3; /** @@ -54,45 +55,43 @@ public static final SetOperationBuilder builder() { } /** - * Heapify takes the SetOperations image in Memory and instantiates an on-heap + * Heapify takes the SetOperations image in MemorySegment and instantiates an on-heap * SetOperation using the * Default Update Seed. - * The resulting SetOperation will not retain any link to the source Memory. + * The resulting SetOperation will not retain any link to the source MemorySegment. * *Note: Only certain set operators during stateful operations can be serialized and thus * heapified.
* - * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash. - * See Memory - * @return a Heap-based SetOperation from the given Memory + * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. + * @return a Heap-based SetOperation from the given MemorySegment */ - public static SetOperation heapify(final Memory srcMem) { - return heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + public static SetOperation heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify takes the SetOperation image in Memory and instantiates an on-heap + * Heapify takes the SetOperation image in MemorySegment and instantiates an on-heap * SetOperation using the given expectedSeed. - * The resulting SetOperation will not retain any link to the source Memory. + * The resulting SetOperation will not retain any link to the source MemorySegment. * *Note: Only certain set operators during stateful operations can be serialized and thus * heapified.
* - * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a Heap-based SetOperation from the given Memory + * @return a Heap-based SetOperation from the given MemorySegment */ - public static SetOperation heapify(final Memory srcMem, final long expectedSeed) { - final byte famID = srcMem.getByte(FAMILY_BYTE); + public static SetOperation heapify(final MemorySegment srcSeg, final long expectedSeed) { + final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(famID); switch (family) { case UNION : { - return UnionImpl.heapifyInstance(srcMem, expectedSeed); + return UnionImpl.heapifyInstance(srcSeg, expectedSeed); } case INTERSECTION : { - return IntersectionImpl.heapifyInstance(srcMem, expectedSeed); + return IntersectionImpl.heapifyInstance(srcSeg, expectedSeed); } default: { throw new SketchesArgumentException("SetOperation cannot heapify family: " @@ -102,104 +101,51 @@ public static SetOperation heapify(final Memory srcMem, final long expectedSeed) } /** - * Wrap takes the SetOperation image in Memory and refers to it directly. + * Wrap takes the SetOperation image in MemorySegment and refers to it directly. * There is no data copying onto the java heap. - * This method assumes the - * Default Update Seed. + * This method assumes the Default Update Seed. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * - *Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.
+ *Note: Only certain set operators during stateful operations can be serialized and thus wrapped.
* - * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash. - * See Memory - * @return a SetOperation backed by the given Memory + * @param srcSeg an image of a SetOperation where the image seed hash matches the default seed hash. + * @return a SetOperation backed by the given MemorySegment */ - public static SetOperation wrap(final Memory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + public static SetOperation wrap(final MemorySegment srcSeg) { + return wrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap takes the SetOperation image in Memory and refers to it directly. + * Wrap takes the SetOperation image in MemorySegment and refers to it directly. * There is no data copying onto the java heap. + * If the given source MemorySegment is read-only, the returned object will also be read-only. * - *Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.
+ *Note: Only certain set operators during stateful operations can be serialized and thus wrapped.
* - * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a SetOperation backed by the given Memory + * @return a SetOperation backed by the given MemorySegment */ - public static SetOperation wrap(final Memory srcMem, final long expectedSeed) { - final byte famID = srcMem.getByte(FAMILY_BYTE); + public static SetOperation wrap(final MemorySegment srcSeg, final long expectedSeed) { + final byte famID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(famID); - final int serVer = srcMem.getByte(SER_VER_BYTE); + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE); if (serVer != 3) { throw new SketchesArgumentException("SerVer must be 3: " + serVer); } switch (family) { case UNION : { - return UnionImpl.wrapInstance(srcMem, expectedSeed); + return UnionImpl.wrapInstance(srcSeg, expectedSeed); } case INTERSECTION : { - return IntersectionImpl.wrapInstance((WritableMemory)srcMem, expectedSeed, true); + return IntersectionImpl.wrapInstance(srcSeg, expectedSeed, srcSeg.isReadOnly() ); } default: throw new SketchesArgumentException("SetOperation cannot wrap family: " + family.toString()); } } - /** - * Wrap takes the SetOperation image in Memory and refers to it directly. - * There is no data copying onto the java heap. - * This method assumes the - * Default Update Seed. - * - *Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.
- * - * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash. - * See Memory - * @return a SetOperation backed by the given Memory - */ - public static SetOperation wrap(final WritableMemory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); - } - - /** - * Wrap takes the SetOperation image in Memory and refers to it directly. - * There is no data copying onto the java heap. - * - *Note: Only certain set operators during stateful operations can be serialized and thus - * wrapped.
- * - * @param srcMem an image of a SetOperation where the hash of the given expectedSeed matches the image seed hash. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. - * See Update Hash Seed. - * @return a SetOperation backed by the given Memory - */ - public static SetOperation wrap(final WritableMemory srcMem, final long expectedSeed) { - final byte famID = srcMem.getByte(FAMILY_BYTE); - final Family family = idToFamily(famID); - final int serVer = srcMem.getByte(SER_VER_BYTE); - if (serVer != 3) { - throw new SketchesArgumentException("SerVer must be 3: " + serVer); - } - switch (family) { - case UNION : { - return UnionImpl.wrapInstance(srcMem, expectedSeed); - } - case INTERSECTION : { - return IntersectionImpl.wrapInstance(srcMem, expectedSeed, false); - } - default: - throw new SketchesArgumentException("SetOperation cannot wrap family: " - + family.toString()); - } - } - /** * Returns the maximum required storage bytes given a nomEntries parameter for Union operations * @param nomEntries Nominal Entries @@ -251,6 +197,12 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract long[] getCache(); + /** + * Returns the backing MemorySegment object if it exists, otherwise null. + * @return the backing MemorySegment object if it exists, otherwise null. + */ + MemorySegment getMemorySegment() { return null; } + /** * Gets the current count of retained entries. * This is only useful during stateful operations. @@ -274,6 +226,12 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract long getThetaLong(); + @Override + public abstract boolean hasMemorySegment(); + + @Override + public abstract boolean isDirect(); + /** * Returns true if this set operator is empty. * Only useful during stateful operations. @@ -282,4 +240,7 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ abstract boolean isEmpty(); + @Override + public abstract boolean isSameResource(final MemorySegment seg); + } diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java index 4a35cf67d..d4af63681 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java @@ -23,12 +23,12 @@ import static org.apache.datasketches.common.Util.TAB; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -36,30 +36,28 @@ * * @author Lee Rhodes */ -public class SetOperationBuilder { +public final class SetOperationBuilder { private int bLgNomLongs; private long bSeed; private ResizeFactor bRF; private float bP; - private MemoryRequestServer bMemReqSvr; /** * Constructor for building a new SetOperation. The default configuration is *The resulting sketch will not retain any link to the source Memory.
+ *The resulting sketch will not retain any link to the source MemorySegment.
* *For Update Sketches this method checks if the * Default Update Seed
- * was used to create the source Memory image. + * was used to create the source MemorySegment image. * *For Compact Sketches this method assumes that the sketch image was created with the * correct hash seed, so it is not checked.
* - * @param srcMem an image of a Sketch. - * See Memory. + * @param srcSeg an image of a Sketch. + * * @return a Sketch on the heap. */ - public static Sketch heapify(final Memory srcMem) { - final byte familyID = srcMem.getByte(FAMILY_BYTE); + public static Sketch heapify(final MemorySegment srcSeg) { + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(familyID); if (family == Family.COMPACT) { - return CompactSketch.heapify(srcMem); + return CompactSketch.heapify(srcSeg); } - return heapifyUpdateFromMemory(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return heapifyUpdateFromMemorySegment(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Heapify takes the sketch image in Memory and instantiates an on-heap Sketch. + * Heapify takes the sketch image in MemorySegment and instantiates an on-heap Sketch. * - *The resulting sketch will not retain any link to the source Memory.
+ *The resulting sketch will not retain any link to the source MemorySegment.
* *For Update and Compact Sketches this method checks if the given expectedSeed was used to - * create the source Memory image. However, SerialVersion 1 sketches cannot be checked.
+ * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked. * - * @param srcMem an image of a Sketch that was created using the given expectedSeed. - * See Memory. - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg an image of a Sketch that was created using the given expectedSeed. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * Compact sketches store a 16-bit hash of the seed, but not the seed itself. * @return a Sketch on the heap. */ - public static Sketch heapify(final Memory srcMem, final long expectedSeed) { - final byte familyID = srcMem.getByte(FAMILY_BYTE); + public static Sketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + final byte familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE); final Family family = idToFamily(familyID); if (family == Family.COMPACT) { - return CompactSketch.heapify(srcMem, expectedSeed); + return CompactSketch.heapify(srcSeg, expectedSeed); } - return heapifyUpdateFromMemory(srcMem, expectedSeed); + return heapifyUpdateFromMemorySegment(srcSeg, expectedSeed); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -114,41 +114,40 @@ public static Sketch heapify(final Memory srcMem, final long expectedSeed) { * *Wrapping any subclass of this class that is empty or contains only a single item will * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.
+ * This is actually faster and consumes less overall space. * *For Update Sketches this method checks if the * Default Update Seed
- * was used to create the source Memory image. + * was used to create the source MemorySegment image. * *For Compact Sketches this method assumes that the sketch image was created with the * correct hash seed, so it is not checked.
* - * @param srcMem an image of a Sketch. - * See Memory. - * @return a Sketch backed by the given Memory - */ - public static Sketch wrap(final Memory srcMem) { - final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; - final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF; + * @param srcSeg an image of a Sketch. + * @return a Sketch backed by the given MemorySegment + */ + public static Sketch wrap(final MemorySegment srcSeg) { + final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; final Family family = Family.idToFamily(familyID); if (family == Family.QUICKSELECT) { if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, Util.DEFAULT_UPDATE_SEED); } else { throw new SketchesArgumentException( "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); } } if (family == Family.COMPACT) { - return CompactSketch.wrap(srcMem); + return CompactSketch.wrap(srcSeg); } throw new SketchesArgumentException( "Cannot wrap family: " + family + " as a Sketch"); } /** - * Wrap takes the sketch image in the given Memory and refers to it directly. + * Wrap takes the sketch image in the given MemorySegment and refers to it directly. * There is no data copying onto the java heap. * The wrap operation enables fast read-only merging and access to all the public read-only API. * @@ -160,32 +159,31 @@ public static Sketch wrap(final Memory srcMem) { * *Wrapping any subclass of this class that is empty or contains only a single item will * result in on-heap equivalent forms of empty and single item sketch respectively. - * This is actually faster and consumes less overall memory.
+ * This is actually faster and consumes less overall space. * *For Update and Compact Sketches this method checks if the given expectedSeed was used to - * create the source Memory image. However, SerialVersion 1 sketches cannot be checked.
+ * create the source MemorySegment image. However, SerialVersion 1 sketches cannot be checked. * - * @param srcMem an image of a Sketch. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg a MemorySegment with an image of a Sketch. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. - * @return a UpdateSketch backed by the given Memory except as above. + * @return a UpdateSketch backed by the given MemorySegment except as above. */ - public static Sketch wrap(final Memory srcMem, final long expectedSeed) { - final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; - final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF; + public static Sketch wrap(final MemorySegment srcSeg, final long expectedSeed) { + final int preLongs = srcSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; final Family family = Family.idToFamily(familyID); if (family == Family.QUICKSELECT) { if (serVer == 3 && preLongs == 3) { - return DirectQuickSelectSketchR.readOnlyWrap(srcMem, expectedSeed); + return DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed); } else { throw new SketchesArgumentException( "Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3"); } } if (family == Family.COMPACT) { - return CompactSketch.wrap(srcMem, expectedSeed); + return CompactSketch.wrap(srcSeg, expectedSeed); } throw new SketchesArgumentException( "Cannot wrap family: " + family + " as a Sketch"); @@ -198,7 +196,7 @@ public static Sketch wrap(final Memory srcMem, final long expectedSeed) { * *If this.isCompact() == true this method returns this, * otherwise, this method is equivalent to - * {@link #compact(boolean, WritableMemory) compact(true, null)}. + * {@link #compact(boolean, MemorySegment) compact(true, null)}. * *
A CompactSketch is always immutable.
* @@ -223,9 +221,9 @@ public CompactSketch compact() { *A CompactSketch is always immutable.
* *A new CompactSketch object is created:
- *Nulls and empty sketches are ignored.
* - * @param mem Memory image of sketch to be merged + * @param seg MemorySegment image of sketch to be merged */ - public abstract void union(Memory mem); + public abstract void union(MemorySegment seg); /** * Update this union with the given long data item. diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 8c5b2f8f0..387ee3455 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -20,22 +20,22 @@ package org.apache.datasketches.theta; import static java.lang.Math.min; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; +import static org.apache.datasketches.common.QuickSelect.selectExcludingZeros; import static org.apache.datasketches.theta.PreambleUtil.UNION_THETA_LONG; import static org.apache.datasketches.theta.PreambleUtil.clearEmpty; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractUnionThetaLong; import static org.apache.datasketches.theta.PreambleUtil.insertUnionThetaLong; -import static org.apache.datasketches.thetacommon.QuickSelect.selectExcludingZeros; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; +import java.util.Objects; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.HashOperations; -import org.apache.datasketches.thetacommon.ThetaUtil; /** * Shared code for the HeapUnion and DirectUnion implementations. @@ -61,7 +61,7 @@ final class UnionImpl extends Union { private UnionImpl(final UpdateSketch gadget, final long seed) { gadget_ = gadget; - expectedSeedHash_ = ThetaUtil.computeSeedHash(seed); + expectedSeedHash_ = Util.computeSeedHash(seed); } /** @@ -88,15 +88,14 @@ static UnionImpl initNewHeapInstance( } /** - * Construct a new Direct Union in the off-heap destination Memory. + * Construct a new Direct Union in the destination MemorySegment. * Called by SetOperationBuilder. * * @param lgNomLongs See lgNomLongs. * @param seed See seed * @param p See Sampling Probability, p * @param rf See Resize Factor - * @param memReqSvr a given instance of a MemoryRequestServer - * @param dstMem the given Memory object destination. It will be cleared prior to use. + * @param dstSeg the given MemorySegment object destination. It will be cleared prior to use. * @return this class */ static UnionImpl initNewDirectInstance( @@ -104,10 +103,9 @@ static UnionImpl initNewDirectInstance( final long seed, final float p, final ResizeFactor rf, - final MemoryRequestServer memReqSvr, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { final UpdateSketch gadget = //create with UNION family - new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, memReqSvr, dstMem, true); + new DirectQuickSelectSketch(lgNomLongs, seed, p, rf, dstSeg, true); final UnionImpl unionImpl = new UnionImpl(gadget, seed); unionImpl.unionThetaLong_ = gadget.getThetaLong(); unionImpl.unionEmpty_ = gadget.isEmpty(); @@ -115,92 +113,58 @@ static UnionImpl initNewDirectInstance( } /** - * Heapify a Union from a Memory Union object containing data. + * Heapify a Union from a MemorySegment Union object containing data. * Called by SetOperation. - * @param srcMem The source Memory Union object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg The source MemorySegment Union object. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return this class */ - static UnionImpl heapifyInstance(final Memory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed); + static UnionImpl heapifyInstance(final MemorySegment srcSeg, final long expectedSeed) { + final MemorySegment srcSegRO = srcSeg.asReadOnly(); + Family.UNION.checkFamilyID(extractFamilyID(srcSegRO)); + final UpdateSketch gadget = HeapQuickSelectSketch.heapifyInstance(srcSegRO, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSegRO); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSegRO); return unionImpl; } /** - * Fast-wrap a Union object around a Union Memory object containing data. - * This does NO validity checking of the given Memory. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * Fast-wrap a Union object around a Union MemorySegment object containing data. + * This does NO validity checking of the given MemorySegment. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return this class */ - static UnionImpl fastWrap(final Memory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketchR.fastReadOnlyWrap(srcMem, expectedSeed); + static UnionImpl fastWrapInstance(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = srcSeg.isReadOnly() + ? DirectQuickSelectSketchR.fastReadOnlyWrap(srcSeg, expectedSeed) + : DirectQuickSelectSketch.fastWritableWrap(srcSeg, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); return unionImpl; } /** - * Fast-wrap a Union object around a Union WritableMemory object containing data. - * This does NO validity checking of the given Memory. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. - * See seed - * @return this class - */ - static UnionImpl fastWrap(final WritableMemory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketch.fastWritableWrap(srcMem, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); - return unionImpl; - } - - /** - * Wrap a Union object around a Union Memory object containing data. + * Wrap a Union object around a Union MemorySegment object containing data. * Called by SetOperation. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param srcSeg The source MemorySegment object. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See seed * @return this class */ - static UnionImpl wrapInstance(final Memory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketchR.readOnlyWrap(srcMem, expectedSeed); + static UnionImpl wrapInstance(final MemorySegment srcSeg, final long expectedSeed) { + Family.UNION.checkFamilyID(extractFamilyID(srcSeg)); + final UpdateSketch gadget = srcSeg.isReadOnly() + ? DirectQuickSelectSketchR.readOnlyWrap(srcSeg, expectedSeed) + : DirectQuickSelectSketch.writableWrap(srcSeg, expectedSeed); final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); - return unionImpl; - } - - /** - * Wrap a Union object around a Union WritableMemory object containing data. - * Called by SetOperation. - * @param srcMem The source Memory object. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. - * See seed - * @return this class - */ - static UnionImpl wrapInstance(final WritableMemory srcMem, final long expectedSeed) { - Family.UNION.checkFamilyID(extractFamilyID(srcMem)); - final UpdateSketch gadget = DirectQuickSelectSketch.writableWrap(srcMem, expectedSeed); - final UnionImpl unionImpl = new UnionImpl(gadget, expectedSeed); - unionImpl.unionThetaLong_ = extractUnionThetaLong(srcMem); - unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcMem); + unionImpl.unionThetaLong_ = extractUnionThetaLong(srcSeg); + unionImpl.unionEmpty_ = PreambleUtil.isEmptyFlag(srcSeg); return unionImpl; } @@ -215,17 +179,22 @@ public int getMaxUnionBytes() { return (16 << lgK) + (Family.UNION.getMaxPreLongs() << 3); } + @Override + MemorySegment getMemorySegment() { + return hasMemorySegment() ? gadget_.getMemorySegment() : null; + } + @Override public CompactSketch getResult() { return getResult(true, null); } @Override - public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) { + public CompactSketch getResult(final boolean dstOrdered, final MemorySegment dstSeg) { final int gadgetCurCount = gadget_.getRetainedEntries(true); final int k = 1 << gadget_.getLgNomLongs(); final long[] gadgetCacheCopy = - gadget_.hasMemory() ? gadget_.getCache() : gadget_.getCache().clone(); + gadget_.hasMemorySegment() ? gadget_.getCache() : gadget_.getCache().clone(); //Pull back to k final long curGadgetThetaLong = gadget_.getThetaLong(); @@ -233,8 +202,9 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds ? selectExcludingZeros(gadgetCacheCopy, gadgetCurCount, k + 1) : curGadgetThetaLong; //Finalize Theta and curCount - final long unionThetaLong = gadget_.hasMemory() - ? gadget_.getMemory().getLong(UNION_THETA_LONG) : unionThetaLong_; + final long unionThetaLong = gadget_.hasMemorySegment() + ? gadget_.getMemorySegment().get(JAVA_LONG_UNALIGNED, UNION_THETA_LONG) + : unionThetaLong_; final long minThetaLong = min(min(curGadgetThetaLong, adjGadgetThetaLong), unionThetaLong); final int curCountOut = minThetaLong < curGadgetThetaLong @@ -247,25 +217,22 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds final boolean empty = gadget_.isEmpty() && unionEmpty_; final short seedHash = gadget_.getSeedHash(); return CompactOperations.componentsToCompact( - minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem, compactCacheOut); + minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstSeg, compactCacheOut); } @Override - public boolean hasMemory() { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.hasMemory() : false; + public boolean hasMemorySegment() { + return gadget_.hasMemorySegment(); } @Override public boolean isDirect() { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.isDirect() : false; + return gadget_.isDirect(); } @Override - public boolean isSameResource(final Memory that) { - return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.isSameResource(that) : false; + public boolean isSameResource(final MemorySegment that) { + return gadget_.isSameResource(that); } @Override @@ -278,10 +245,10 @@ public void reset() { @Override public byte[] toByteArray() { final byte[] gadgetByteArr = gadget_.toByteArray(); - final WritableMemory mem = WritableMemory.writableWrap(gadgetByteArr); - insertUnionThetaLong(mem, unionThetaLong_); + final MemorySegment seg = MemorySegment.ofArray(gadgetByteArr); + insertUnionThetaLong(seg, unionThetaLong_); if (gadget_.isEmpty() != unionEmpty_) { - clearEmpty(mem); + clearEmpty(seg); unionEmpty_ = false; } return gadgetByteArr; @@ -289,11 +256,11 @@ public byte[] toByteArray() { @Override //Stateless Union public CompactSketch union(final Sketch sketchA, final Sketch sketchB, final boolean dstOrdered, - final WritableMemory dstMem) { + final MemorySegment dstSeg) { reset(); union(sketchA); union(sketchB); - final CompactSketch csk = getResult(dstOrdered, dstMem); + final CompactSketch csk = getResult(dstOrdered, dstSeg); reset(); return csk; } @@ -307,12 +274,12 @@ public void union(final Sketch sketchIn) { return; } //sketchIn is valid and not empty - ThetaUtil.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); + Util.checkSeedHashes(expectedSeedHash_, sketchIn.getSeedHash()); if (sketchIn instanceof SingleItemSketch) { gadget_.hashUpdate(sketchIn.getCache()[0]); return; } - Sketch.checkSketchAndMemoryFlags(sketchIn); + Sketch.checkSketchAndMemorySegmentFlags(sketchIn); unionThetaLong_ = min(min(unionThetaLong_, sketchIn.getThetaLong()), gadget_.getThetaLong()); //Theta rule unionEmpty_ = false; @@ -327,18 +294,17 @@ public void union(final Sketch sketchIn) { } } unionThetaLong_ = min(unionThetaLong_, gadget_.getThetaLong()); //Theta rule with gadget - if (gadget_.hasMemory()) { - final WritableMemory wmem = (WritableMemory)gadget_.getMemory(); - PreambleUtil.insertUnionThetaLong(wmem, unionThetaLong_); - PreambleUtil.clearEmpty(wmem); + if (gadget_.hasMemorySegment()) { + final MemorySegment wseg = gadget_.getMemorySegment(); + PreambleUtil.insertUnionThetaLong(wseg, unionThetaLong_); + PreambleUtil.clearEmpty(wseg); } } @Override - public void union(final Memory skMem) { - if (skMem != null) { - union(Sketch.wrap(skMem)); - } + public void union(final MemorySegment seg) { + Objects.requireNonNull(seg, "MemorySegment must be non-null"); + union(Sketch.wrap(seg.asReadOnly())); } @Override diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java index cb6854b02..30d3b6f49 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java @@ -19,6 +19,7 @@ package org.apache.datasketches.theta; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.datasketches.common.Util.LONG_MAX_VALUE_AS_DOUBLE; import static org.apache.datasketches.common.Util.checkBounds; @@ -32,24 +33,24 @@ import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.SER_VER; import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; -import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash; +import static org.apache.datasketches.theta.PreambleUtil.checkSegmentSeedHash; import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID; import static org.apache.datasketches.theta.PreambleUtil.extractFlags; import static org.apache.datasketches.theta.PreambleUtil.extractLgResizeFactor; import static org.apache.datasketches.theta.PreambleUtil.extractP; import static org.apache.datasketches.theta.PreambleUtil.extractSerVer; import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong; -import static org.apache.datasketches.theta.PreambleUtil.getMemBytes; +import static org.apache.datasketches.theta.PreambleUtil.getSegBytes; import static org.apache.datasketches.theta.UpdateReturnState.RejectedNullOrEmpty; +import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; import java.util.Objects; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -64,47 +65,45 @@ public abstract class UpdateSketch extends Sketch { UpdateSketch() {} /** - * Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto + * Wrap takes the writable sketch image in MemorySegment and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct objects can be wrapped. This method assumes the - * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. + * been explicitly stored as writable, direct objects can be wrapped. This method assumes the + * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. * Default Update Seed. - * @param srcMem an image of a Sketch where the image seed hash matches the default seed hash. + * @param srcWSeg an image of a writable sketch where the image seed hash matches the default seed hash. * It must have a size of at least 24 bytes. - * See Memory - * @return a Sketch backed by the given Memory + * @return an UpdateSketch backed by the given MemorySegment */ - public static UpdateSketch wrap(final WritableMemory srcMem) { - return wrap(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + public static UpdateSketch wrap(final MemorySegment srcWSeg) { + return wrap(srcWSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Wrap takes the sketch image in Memory and refers to it directly. There is no data copying onto + * Wrap takes the sketch image in MemorySegment and refers to it directly. There is no data copying onto * the java heap. Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have - * been explicitly stored as direct objects can be wrapped. + * been explicitly stored as writable direct objects can be wrapped. * An attempt to "wrap" earlier version sketches will result in a "heapified", normal * Java Heap version of the sketch where all data will be copied to the heap. - * @param srcMem an image of a Sketch where the image seed hash matches the given seed hash. + * @param srcWSeg an image of a writable sketch where the image seed hash matches the given seed hash. * It must have a size of at least 24 bytes. - * See Memory - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * Compact sketches store a 16-bit hash of the seed, but not the seed itself. - * @return a UpdateSketch backed by the given Memory + * @return a UpdateSketch backed by the given MemorySegment */ - public static UpdateSketch wrap(final WritableMemory srcMem, final long expectedSeed) { - Objects.requireNonNull(srcMem, "Source Memory must not be null"); - checkBounds(0, 24, srcMem.getCapacity()); //need min 24 bytes - final int preLongs = srcMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; - final int serVer = srcMem.getByte(SER_VER_BYTE) & 0XFF; - final int familyID = srcMem.getByte(FAMILY_BYTE) & 0XFF; + public static UpdateSketch wrap(final MemorySegment srcWSeg, final long expectedSeed) { + Objects.requireNonNull(srcWSeg, "Source MemorySeg e t must not be null"); + checkBounds(0, 24, srcWSeg.byteSize()); //need min 24 bytes + final int preLongs = srcWSeg.get(JAVA_BYTE, PREAMBLE_LONGS_BYTE) & 0X3F; + final int serVer = srcWSeg.get(JAVA_BYTE, SER_VER_BYTE) & 0XFF; + final int familyID = srcWSeg.get(JAVA_BYTE, FAMILY_BYTE) & 0XFF; final Family family = Family.idToFamily(familyID); if (family != Family.QUICKSELECT) { throw new SketchesArgumentException( "A " + family + " sketch cannot be wrapped as an UpdateSketch."); } if ((serVer == 3) && (preLongs == 3)) { - return DirectQuickSelectSketch.writableWrap(srcMem, expectedSeed); + return DirectQuickSelectSketch.writableWrap(srcWSeg, expectedSeed); } else { throw new SketchesArgumentException( "Corrupted: An UpdateSketch image must have SerVer = 3 and preLongs = 3"); @@ -112,40 +111,40 @@ public static UpdateSketch wrap(final WritableMemory srcMem, final long expected } /** - * Instantiates an on-heap UpdateSketch from Memory. This method assumes the - * {@link org.apache.datasketches.thetacommon.ThetaUtil#DEFAULT_UPDATE_SEED}. - * @param srcMem See Memory + * Instantiates an on-heap UpdateSketch from a MemorySegment. This method assumes the + * {@link org.apache.datasketches.common.Util#DEFAULT_UPDATE_SEED}. + * @param srcSeg the given MemorySegment with a sketch image. * It must have a size of at least 24 bytes. * @return an UpdateSketch */ - public static UpdateSketch heapify(final Memory srcMem) { - return heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED); + public static UpdateSketch heapify(final MemorySegment srcSeg) { + return heapify(srcSeg, Util.DEFAULT_UPDATE_SEED); } /** - * Instantiates an on-heap UpdateSketch from Memory. - * @param srcMem See Memory + * Instantiates an on-heap UpdateSketch from a MemorySegment. + * @param srcSeg the given MemorySegment. * It must have a size of at least 24 bytes. - * @param expectedSeed the seed used to validate the given Memory image. + * @param expectedSeed the seed used to validate the given MemorySegment image. * See Update Hash Seed. * @return an UpdateSketch */ - public static UpdateSketch heapify(final Memory srcMem, final long expectedSeed) { - Objects.requireNonNull(srcMem, "Source Memory must not be null"); - checkBounds(0, 24, srcMem.getCapacity()); //need min 24 bytes - final Family family = Family.idToFamily(srcMem.getByte(FAMILY_BYTE)); + public static UpdateSketch heapify(final MemorySegment srcSeg, final long expectedSeed) { + Objects.requireNonNull(srcSeg, "Source MemorySegment must not be null"); + checkBounds(0, 24, srcSeg.byteSize()); //need min 24 bytes + final Family family = Family.idToFamily(srcSeg.get(JAVA_BYTE, FAMILY_BYTE)); if (family.equals(Family.ALPHA)) { - return HeapAlphaSketch.heapifyInstance(srcMem, expectedSeed); + return HeapAlphaSketch.heapifyInstance(srcSeg, expectedSeed); } - return HeapQuickSelectSketch.heapifyInstance(srcMem, expectedSeed); + return HeapQuickSelectSketch.heapifyInstance(srcSeg, expectedSeed); } //Sketch interface @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstWSeg) { return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(), - false, false, dstOrdered, dstMem, getCache()); + false, false, dstOrdered, dstWSeg, getCache()); } @Override @@ -160,16 +159,31 @@ int getCurrentDataLongs() { return 1 << getLgArrLongs(); } + @Override + public boolean hasMemorySegment() { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).hasMemorySegment()); + } + @Override public boolean isCompact() { return false; } + @Override + public boolean isDirect() { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isDirect()); + } + @Override public boolean isOrdered() { return false; } + @Override + public boolean isSameResource(final MemorySegment that) { + return (this instanceof DirectQuickSelectSketchR && ((DirectQuickSelectSketchR)this).isSameResource(that)); + } + //UpdateSketch interface /** @@ -378,10 +392,10 @@ public UpdateReturnState update(final long[] data) { */ abstract boolean isOutOfSpace(int numEntries); - static void checkUnionQuickSelectFamily(final Memory mem, final int preambleLongs, + static void checkUnionQuickSelectFamily(final MemorySegment seg, final int preambleLongs, final int lgNomLongs) { //Check Family - final int familyID = extractFamilyID(mem); //byte 2 + final int familyID = extractFamilyID(seg); //byte 2 final Family family = Family.idToFamily(familyID); if (family.equals(Family.UNION)) { if (preambleLongs != Family.UNION.getMinPreLongs()) { @@ -402,45 +416,45 @@ else if (family.equals(Family.QUICKSELECT)) { //Check lgNomLongs if (lgNomLongs < ThetaUtil.MIN_LG_NOM_LONGS) { throw new SketchesArgumentException( - "Possible corruption: Current Memory lgNomLongs < min required size: " + "Possible corruption: Current MemorySegment lgNomLongs < min required size: " + lgNomLongs + " < " + ThetaUtil.MIN_LG_NOM_LONGS); } } - static void checkMemIntegrity(final Memory srcMem, final long expectedSeed, final int preambleLongs, + static void checkSegIntegrity(final MemorySegment srcSeg, final long expectedSeed, final int preambleLongs, final int lgNomLongs, final int lgArrLongs) { //Check SerVer - final int serVer = extractSerVer(srcMem); //byte 1 + final int serVer = extractSerVer(srcSeg); //byte 1 if (serVer != SER_VER) { throw new SketchesArgumentException( "Possible corruption: Invalid Serialization Version: " + serVer); } //Check flags - final int flags = extractFlags(srcMem); //byte 5 + final int flags = extractFlags(srcSeg); //byte 5 final int flagsMask = ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK | BIG_ENDIAN_FLAG_MASK; if ((flags & flagsMask) > 0) { throw new SketchesArgumentException( - "Possible corruption: Input srcMem cannot be: big-endian, compact, ordered, or read-only"); + "Possible corruption: Input srcSeg cannot be: big-endian, compact, ordered, nor read-only"); } //Check seed hashes - final short seedHash = checkMemorySeedHash(srcMem, expectedSeed); //byte 6,7 - ThetaUtil.checkSeedHashes(seedHash, ThetaUtil.computeSeedHash(expectedSeed)); + final short seedHash = checkSegmentSeedHash(srcSeg, expectedSeed); //byte 6,7 + Util.checkSeedHashes(seedHash, Util.computeSeedHash(expectedSeed)); - //Check mem capacity, lgArrLongs - final long curCapBytes = srcMem.getCapacity(); - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + //Check seg capacity, lgArrLongs + final long curCapBytes = srcSeg.byteSize(); + final int minReqBytes = getSegBytes(lgArrLongs, preambleLongs); if (curCapBytes < minReqBytes) { throw new SketchesArgumentException( - "Possible corruption: Current Memory size < min required size: " + "Possible corruption: Current MemorySegment size < min required size: " + curCapBytes + " < " + minReqBytes); } //check Theta, p - final float p = extractP(srcMem); //bytes 12-15 - final long thetaLong = extractThetaLong(srcMem); //bytes 16-23 + final float p = extractP(srcSeg); //bytes 12-15 + final long thetaLong = extractThetaLong(srcSeg); //bytes 16-23 final double theta = thetaLong / LONG_MAX_VALUE_AS_DOUBLE; //if (lgArrLongs <= lgNomLongs) the sketch is still resizing, thus theta cannot be < p. if ((lgArrLongs <= lgNomLongs) && (theta < p) ) { @@ -451,19 +465,19 @@ static void checkMemIntegrity(final Memory srcMem, final long expectedSeed, fina } /** - * This checks to see if the memory RF factor was set correctly as early versions may not + * This checks to see if the MemorySegment RF factor was set correctly as early versions may not * have set it. - * @param srcMem the source memory + * @param srcSeg the source MemorySegment * @param lgNomLongs the current lgNomLongs * @param lgArrLongs the current lgArrLongs - * @return true if the the memory RF factor is incorrect and the caller can either + * @return true if the the MemorySegment RF factor is incorrect and the caller can either * correct it or throw an error. */ - static boolean isResizeFactorIncorrect(final Memory srcMem, final int lgNomLongs, + static boolean isResizeFactorIncorrect(final MemorySegment srcSeg, final int lgNomLongs, final int lgArrLongs) { final int lgT = lgNomLongs + 1; final int lgA = lgArrLongs; - final int lgR = extractLgResizeFactor(srcMem); + final int lgR = extractLgResizeFactor(srcSeg); if (lgR == 0) { return lgA != lgT; } return !(((lgT - lgA) % lgR) == 0); } diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java b/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java index eec91e881..834778f87 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketchBuilder.java @@ -23,14 +23,14 @@ import static org.apache.datasketches.common.Util.TAB; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.common.SuppressFBWarnings; -import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.MemoryRequestServer; -import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.common.Util; import org.apache.datasketches.thetacommon.ThetaUtil; /** @@ -38,13 +38,12 @@ * * @author Lee Rhodes */ -public class UpdateSketchBuilder { +public final class UpdateSketchBuilder { private int bLgNomLongs; private long bSeed; private ResizeFactor bRF; private Family bFam; private float bP; - private MemoryRequestServer bMemReqSvr; //Fields for concurrent theta sketch private int bNumPoolThreads; @@ -57,30 +56,28 @@ public class UpdateSketchBuilder { * Constructor for building a new UpdateSketch. The default configuration is *The parameters unique to the shared concurrent sketch are: *
Key parameters that are in common with other Theta sketches: *
The parameters unique to the shared concurrent sketch are: @@ -450,23 +429,23 @@ public UpdateSketch buildShared(final WritableMemory dstMem) { *
Key parameters that are in common with other Theta sketches: *
This sketch can only be associated with a Serialization Version 4 format binary image.
*/ -class WrappedCompactCompressedSketch extends WrappedCompactSketch { - +final class WrappedCompactCompressedSketch extends WrappedCompactSketch { + /** * Construct this sketch with the given bytes. * @param bytes containing serialized compact compressed sketch. @@ -50,7 +50,7 @@ class WrappedCompactCompressedSketch extends WrappedCompactSketch { * @return this sketch */ static WrappedCompactCompressedSketch wrapInstance(final byte[] bytes, final short seedHash) { - ThetaUtil.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); + Util.checkSeedHashes(ByteArrayUtil.getShortLE(bytes, PreambleUtil.SEED_HASH_SHORT), seedHash); return new WrappedCompactCompressedSketch(bytes); } @@ -66,7 +66,7 @@ public int getCurrentBytes() { private static final int START_PACKED_DATA_EXACT_MODE = 8; private static final int START_PACKED_DATA_ESTIMATION_MODE = 16; - + @Override public int getRetainedEntries(final boolean valid) { //compact is always valid // number of entries is stored using variable length encoding diff --git a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java index 519857d21..a5b67363f 100644 --- a/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/WrappedCompactSketch.java @@ -22,20 +22,19 @@ import static org.apache.datasketches.common.ByteArrayUtil.getIntLE; import static org.apache.datasketches.common.ByteArrayUtil.getLongLE; import static org.apache.datasketches.common.ByteArrayUtil.getShortLE; -import static org.apache.datasketches.theta.CompactOperations.memoryToCompact; +import static org.apache.datasketches.theta.CompactOperations.segmentToCompact; import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK; -import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE; -import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT; +import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK; import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE; -import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import static org.apache.datasketches.theta.PreambleUtil.RETAINED_ENTRIES_INT; import static org.apache.datasketches.theta.PreambleUtil.SEED_HASH_SHORT; +import static org.apache.datasketches.theta.PreambleUtil.THETA_LONG; +import java.lang.foreign.MemorySegment; import java.util.Arrays; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.thetacommon.ThetaUtil; +import org.apache.datasketches.common.Util; /** * Wrapper around a serialized compact read-only sketch. It is not empty, not a single item. @@ -54,22 +53,22 @@ class WrappedCompactSketch extends CompactSketch { } /** - * Wraps the given Memory, which must be a SerVer 3 CompactSketch image. + * Wraps the given byteArray, which must be a SerVer 3 CompactSketch image. * @param bytes representation of serialized compressed compact sketch. * @param seedHash The update seedHash. * See Seed Hash. * @return this sketch */ static WrappedCompactSketch wrapInstance(final byte[] bytes, final short seedHash) { - ThetaUtil.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); + Util.checkSeedHashes(getShortLE(bytes, SEED_HASH_SHORT), seedHash); return new WrappedCompactSketch(bytes); } //Sketch Overrides @Override - public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { - return memoryToCompact(Memory.wrap(bytes_), dstOrdered, dstMem); + public CompactSketch compact(final boolean dstOrdered, final MemorySegment dstSeg) { + return segmentToCompact(MemorySegment.ofArray(bytes_), dstOrdered, dstSeg); } @Override @@ -91,16 +90,6 @@ public long getThetaLong() { return (preLongs > 2) ? getLongLE(bytes_, THETA_LONG) : Long.MAX_VALUE; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return (bytes_[FLAGS_BYTE] & EMPTY_FLAG_MASK) > 0; @@ -111,11 +100,6 @@ public boolean isOrdered() { return (bytes_[FLAGS_BYTE] & ORDERED_FLAG_MASK) > 0; } - @Override - public boolean isSameResource(final Memory that) { - return false; - } - @Override public HashIterator iterator() { return new BytesCompactHashIterator( @@ -153,11 +137,6 @@ int getCurrentPreambleLongs() { return bytes_[PREAMBLE_LONGS_BYTE]; } - @Override - Memory getMemory() { - return null; - } - @Override short getSeedHash() { return getShortLE(bytes_, SEED_HASH_SHORT); diff --git a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java index f6b22cb50..7044551f0 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java +++ b/src/main/java/org/apache/datasketches/thetacommon/HashOperations.java @@ -19,13 +19,14 @@ package org.apache.datasketches.thetacommon; +import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED; import static java.lang.Math.max; import static org.apache.datasketches.common.Util.ceilingPowerOf2; +import java.lang.foreign.MemorySegment; + import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; /** * Helper class for the common hash table methods. @@ -183,22 +184,22 @@ public static int hashArrayInsert(final long[] srcArr, final long[] hashTable, return count; } - //With Memory or WritableMemory + //With MemorySegment /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for Memory. - * Returns the index if found, -1 if not found. + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for MemorySegment. + * Returns the index if found, -1 if not found. The input MemorySegment may be read only. * - * @param mem The Memory containing the hash table to search. + * @param seg The MemorySegment containing the hash table to search. * The hash table portion must be a power of 2 in size. * @param lgArrLongs The log_base2(hashTable.length). * See lgArrLongs. * @param hash The hash value to search for. Must not be zero. - * @param memOffsetBytes offset in the memory where the hashTable starts + * @param segOffsetBytes offset in the MemorySegment where the hashTable starts * @return Current probe index if found, -1 if not found. */ - public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final long hash, - final int memOffsetBytes) { + public static int hashSearchMemorySegment(final MemorySegment seg, final int lgArrLongs, final long hash, + final int segOffsetBytes) { if (hash == 0) { throw new SketchesArgumentException("Given hash must not be zero: " + hash); } @@ -207,8 +208,8 @@ public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final int curProbe = (int) (hash & arrayMask); final int loopIndex = curProbe; do { - final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = mem.getLong(curProbeOffsetBytes); + final int curProbeOffsetBytes = (curProbe << 3) + segOffsetBytes; + final long curArrayHash = seg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); if (curArrayHash == EMPTY) { return -1; } else if (curArrayHash == hash) { return curProbe; } curProbe = (curProbe + stride) & arrayMask; @@ -217,21 +218,21 @@ public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final } /** - * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for Memory. + * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for MemorySegment. * This method assumes that the input hash is not a duplicate. * Useful for rebuilding tables to avoid unnecessary comparisons. * Returns the index of insertion, which is always positive or zero. * Throws an exception if table has no empty slot. * - * @param wmem The WritableMemory that contains the hashTable to insert into. + * @param wseg The writable MemorySegment that contains the hashTable to insert into. * The size of the hashTable portion must be a power of 2. * @param lgArrLongs The log_base2(hashTable.length. * See lgArrLongs. * @param hash value that must not be zero and will be inserted into the array into an empty slot. - * @param memOffsetBytes offset in the WritableMemory where the hashTable starts + * @param memOffsetBytes offset in the writable MemorySegment where the hashTable starts * @return index of insertion. Always positive or zero. */ - public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgArrLongs, + public static int hashInsertOnlyMemorySegment(final MemorySegment wseg, final int lgArrLongs, final long hash, final int memOffsetBytes) { final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 final int stride = getStride(hash, lgArrLongs); @@ -240,9 +241,9 @@ public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgAr final int loopIndex = curProbe; do { final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wmem.getLong(curProbeOffsetBytes); + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); if (curArrayHash == EMPTY) { - wmem.putLong(curProbeOffsetBytes, hash); + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); return curProbe; } curProbe = (curProbe + stride) & arrayMask; @@ -252,19 +253,19 @@ public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgAr /** * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts - * values directly into a Memory. + * values directly into a writable MemorySegment. * Returns index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). * Throws an exception if the value is not found and table has no empty slot. * - * @param wmem The WritableMemory that contains the hashTable to insert into. + * @param wseg The writable MemorySegment that contains the hashTable to insert into. * @param lgArrLongs The log_base2(hashTable.length). * See lgArrLongs. * @param hash The hash value to be potentially inserted into an empty slot only if it is not * a duplicate of any other hash value in the table. It must not be zero. - * @param memOffsetBytes offset in the WritableMemory where the hash array starts + * @param memOffsetBytes offset in the writable MemorySegment where the hash array starts * @return index ≥ 0 if found (duplicate); < 0 if inserted, inserted at -(index + 1). */ - public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int lgArrLongs, + public static int hashSearchOrInsertMemorySegment(final MemorySegment wseg, final int lgArrLongs, final long hash, final int memOffsetBytes) { final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1 final int stride = getStride(hash, lgArrLongs); @@ -273,9 +274,9 @@ public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int final int loopIndex = curProbe; do { final int curProbeOffsetBytes = (curProbe << 3) + memOffsetBytes; - final long curArrayHash = wmem.getLong(curProbeOffsetBytes); + final long curArrayHash = wseg.get(JAVA_LONG_UNALIGNED, curProbeOffsetBytes); if (curArrayHash == EMPTY) { - wmem.putLong(curProbeOffsetBytes, hash); + wseg.set(JAVA_LONG_UNALIGNED, curProbeOffsetBytes, hash); return ~curProbe; } else if (curArrayHash == hash) { return curProbe; } // curArrayHash is a duplicate // curArrayHash is not a duplicate and not zero, continue searching diff --git a/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java b/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java index be209ece1..4012cb412 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java +++ b/src/main/java/org/apache/datasketches/thetacommon/ThetaUtil.java @@ -19,8 +19,6 @@ package org.apache.datasketches.thetacommon; -import static org.apache.datasketches.hash.MurmurHash3.hash; - import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; @@ -55,27 +53,6 @@ public final class ThetaUtil { * See Default Nominal Entries */ public static final int DEFAULT_NOMINAL_ENTRIES = 4096; - /** - * The seed 9001 used in the sketch update methods is a prime number that - * was chosen very early on in experimental testing. Choosing a seed is somewhat arbitrary, and - * the author cannot prove that this particular seed is somehow superior to other seeds. There - * was some early Internet discussion that a seed of 0 did not produce as clean avalanche diagrams - * as non-zero seeds, but this may have been more related to the MurmurHash2 release, which did - * have some issues. As far as the author can determine, MurmurHash3 does not have these problems. - * - *In order to perform set operations on two sketches it is critical that the same hash - * function and seed are identical for both sketches, otherwise the assumed 1:1 relationship - * between the original source key value and the hashed bit string would be violated. Once - * you have developed a history of stored sketches you are stuck with it. - * - *
WARNING: This seed is used internally by library sketches in different - * packages and thus must be declared public. However, this seed value must not be used by library - * users with the MurmurHash3 function. It should be viewed as existing for exclusive, private - * use by the library. - * - *
See Default Update Seed
- */
- public static final long DEFAULT_UPDATE_SEED = 9001L;
private ThetaUtil() {}
@@ -84,39 +61,6 @@ private ThetaUtil() {}
*/
public static final int MIN_LG_ARR_LONGS = 5;
- /**
- * Check if the two seed hashes are equal. If not, throw an SketchesArgumentException.
- * @param seedHashA the seedHash A
- * @param seedHashB the seedHash B
- * @return seedHashA if they are equal
- */
- public static short checkSeedHashes(final short seedHashA, final short seedHashB) {
- if (seedHashA != seedHashB) {
- throw new SketchesArgumentException(
- "Incompatible Seed Hashes. " + Integer.toHexString(seedHashA & 0XFFFF)
- + ", " + Integer.toHexString(seedHashB & 0XFFFF));
- }
- return seedHashA;
- }
-
- /**
- * Computes and checks the 16-bit seed hash from the given long seed.
- * The seed hash may not be zero in order to maintain compatibility with older serialized
- * versions that did not have this concept.
- * @param seed See Update Hash Seed
- * @return the seed hash.
- */
- public static short computeSeedHash(final long seed) {
- final long[] seedArr = {seed};
- final short seedHash = (short)(hash(seedArr, 0L)[0] & 0xFFFFL);
- if (seedHash == 0) {
- throw new SketchesArgumentException(
- "The given seed: " + seed + " produced a seedHash of zero. "
- + "You must choose a different seed.");
- }
- return seedHash;
- }
-
/**
* Gets the smallest allowed exponent of 2 that it is a sub-multiple of the target by zero,
* one or more resize factors.
diff --git a/src/main/java/org/apache/datasketches/tuple/AnotB.java b/src/main/java/org/apache/datasketches/tuple/AnotB.java
index 908e8f6be..fed710bdd 100644
--- a/src/main/java/org/apache/datasketches/tuple/AnotB.java
+++ b/src/main/java/org/apache/datasketches/tuple/AnotB.java
@@ -31,9 +31,9 @@
import org.apache.datasketches.common.SketchesStateException;
import org.apache.datasketches.common.SuppressFBWarnings;
import org.apache.datasketches.thetacommon.SetOperationCornerCases;
+import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.thetacommon.SetOperationCornerCases.AnotbAction;
import org.apache.datasketches.thetacommon.SetOperationCornerCases.CornerCase;
-import org.apache.datasketches.thetacommon.ThetaUtil;
/**
* Computes a set difference, A-AND-NOT-B, of two generic tuple sketches.
diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
index ec58a5e76..20eac81fb 100644
--- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java
@@ -19,15 +19,18 @@
package org.apache.datasketches.tuple;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
import static org.apache.datasketches.thetacommon.HashOperations.count;
+import java.lang.foreign.MemorySegment;
import java.lang.reflect.Array;
import java.nio.ByteOrder;
import org.apache.datasketches.common.ByteArrayUtil;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
/**
* CompactSketches are never created directly. They are created as a result of
@@ -69,24 +72,24 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
/**
* This is to create an instance of a CompactSketch given a serialized form
*
- * @param mem Memory object with serialized CompactSketch
+ * @param seg MemorySegment object with serialized CompactSketch
* @param deserializer the SummaryDeserializer
*/
- CompactSketch(final Memory mem, final SummaryDeserializer Nulls and empty sketches are ignored. This implementation uses data in a given Memory that is owned and managed by the caller.
- * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection. This implementation uses data in a given Memory that is owned and managed by the caller.
- * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection. This implementation uses data in a given Memory that is owned and managed by the caller.
- * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection. This implementation uses data in a given Memory that is owned and managed by the caller.
- * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection. This implementation uses data in a given Memory that is owned and managed by the caller.
- * This Memory can be off-heap, which if managed properly will greatly reduce the need for
+ * This implementation uses data in a given MemorySegment that is owned and managed by the caller.
+ * This MemorySegment can be off-heap, which if managed properly will greatly reduce the need for
* the JVM to perform garbage collection. V1 dates from roughly Aug 2014 to about May 2015.
@@ -75,36 +82,37 @@ public class BackwardConversions {
* deserializer) {
+ CompactSketch(final MemorySegment seg, final SummaryDeserializer deserializer) {
super(Long.MAX_VALUE, true, null);
int offset = 0;
- final byte preambleLongs = mem.getByte(offset++);
- final byte version = mem.getByte(offset++);
- final byte familyId = mem.getByte(offset++);
+ final byte preambleLongs = seg.get(JAVA_BYTE, offset++);
+ final byte version = seg.get(JAVA_BYTE, offset++);
+ final byte familyId = seg.get(JAVA_BYTE, offset++);
SerializerDeserializer.validateFamily(familyId, preambleLongs);
if (version > serialVersionUID) {
throw new SketchesArgumentException(
"Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + version);
}
SerializerDeserializer
- .validateType(mem.getByte(offset++), SerializerDeserializer.SketchType.CompactSketch);
+ .validateType(seg.get(JAVA_BYTE, offset++), SerializerDeserializer.SketchType.CompactSketch);
if (version <= serialVersionUIDLegacy) { // legacy serial format
- final byte flags = mem.getByte(offset++);
+ final byte flags = seg.get(JAVA_BYTE, offset++);
final boolean isBigEndian = (flags & 1 << FlagsLegacy.IS_BIG_ENDIAN.ordinal()) > 0;
if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
throw new SketchesArgumentException("Byte order mismatch");
@@ -94,7 +97,7 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
empty_ = (flags & 1 << FlagsLegacy.IS_EMPTY.ordinal()) > 0;
final boolean isThetaIncluded = (flags & 1 << FlagsLegacy.IS_THETA_INCLUDED.ordinal()) > 0;
if (isThetaIncluded) {
- thetaLong_ = mem.getLong(offset);
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset);
offset += Long.BYTES;
} else {
thetaLong_ = Long.MAX_VALUE;
@@ -103,9 +106,9 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
if (hasEntries) {
int classNameLength = 0;
if (version == serialVersionWithSummaryClassNameUID) {
- classNameLength = mem.getByte(offset++);
+ classNameLength = seg.get(JAVA_BYTE, offset++);
}
- final int count = mem.getInt(offset);
+ final int count = seg.get(JAVA_INT_UNALIGNED, offset);
offset += Integer.BYTES;
if (version == serialVersionWithSummaryClassNameUID) {
offset += classNameLength;
@@ -113,11 +116,11 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
hashArr_ = new long[count];
for (int i = 0; i < count; i++) {
- hashArr_[i] = mem.getLong(offset);
+ hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset);
offset += Long.BYTES;
}
for (int i = 0; i < count; i++) {
- offset += readSummary(mem, offset, i, count, deserializer);
+ offset += readSummary(seg, offset, i, count, deserializer);
}
} else {
hashArr_ = new long[0];
@@ -125,7 +128,7 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
}
} else { // current serial format
offset++; //skip unused byte
- final byte flags = mem.getByte(offset++);
+ final byte flags = seg.get(JAVA_BYTE, offset++);
offset += 2; //skip 2 unused bytes
empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0;
thetaLong_ = Long.MAX_VALUE;
@@ -134,11 +137,11 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
if (preambleLongs == 1) {
count = 1;
} else {
- count = mem.getInt(offset);
+ count = seg.get(JAVA_INT_UNALIGNED, offset);
offset += Integer.BYTES;
offset += 4; // unused
if (preambleLongs > 2) {
- thetaLong_ = mem.getLong(offset);
+ thetaLong_ = seg.get(JAVA_LONG_UNALIGNED, offset);
offset += Long.BYTES;
}
}
@@ -146,18 +149,18 @@ private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDER
hashArr_ = new long[count];
for (int i = 0; i < count; i++) {
- hashArr_[i] = mem.getLong(offset);
+ hashArr_[i] = seg.get(JAVA_LONG_UNALIGNED, offset);
offset += Long.BYTES;
- offset += readSummary(mem, offset, i, count, deserializer);
+ offset += readSummary(seg, offset, i, count, deserializer);
}
}
}
@SuppressWarnings({"unchecked"})
- private int readSummary(final Memory mem, final int offset, final int i, final int count,
+ private int readSummary(final MemorySegment seg, final int offset, final int i, final int count,
final SummaryDeserializer deserializer) {
- final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
- final DeserializeResult result = deserializer.heapifySummary(memRegion);
+ final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset);
+ final DeserializeResult result = deserializer.heapifySummary(segRegion);
final S summary = result.getObject();
final Class summaryType = (Class) result.getObject().getClass();
if (summaryArr_ == null) {
diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
index 4b9afb1a1..708486df1 100644
--- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
@@ -19,22 +19,26 @@
package org.apache.datasketches.tuple;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_FLOAT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_INT_UNALIGNED;
+import static java.lang.foreign.ValueLayout.JAVA_LONG_UNALIGNED;
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
import static org.apache.datasketches.common.Util.checkBounds;
import static org.apache.datasketches.common.Util.exactLog2OfLong;
import static org.apache.datasketches.thetacommon.HashOperations.count;
+import java.lang.foreign.MemorySegment;
import java.lang.reflect.Array;
import java.nio.ByteOrder;
import java.util.Objects;
import org.apache.datasketches.common.ByteArrayUtil;
import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.QuickSelect;
import org.apache.datasketches.common.ResizeFactor;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.thetacommon.HashOperations;
-import org.apache.datasketches.thetacommon.QuickSelect;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
@@ -167,7 +171,7 @@ private QuickSelectSketch(
/**
* This is to create an instance of a QuickSelectSketch given a serialized form
- * @param mem Memory object with serialized QuickSelectSketch
+ * @param seg MemorySegment object with serialized QuickSelectSketch
* @param deserializer the SummaryDeserializer
* @param summaryFactory the SummaryFactory
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
@@ -176,26 +180,26 @@ private QuickSelectSketch(
*/
@Deprecated
QuickSelectSketch(
- final Memory mem,
+ final MemorySegment seg,
final SummaryDeserializer deserializer,
final SummaryFactory summaryFactory) {
- this(new Validate<>(), mem, deserializer, summaryFactory);
+ this(new Validate<>(), seg, deserializer, summaryFactory);
}
/*
* This private constructor is used to protect against "Finalizer attacks".
* The private static inner class Validate performs validation and deserialization
- * from the input Memory and may throw exceptions. In order to protect against the attack, we must
+ * from the input MemorySegment and may throw exceptions. In order to protect against the attack, we must
* perform this validation prior to the constructor's super reaches the Object class.
* Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass.
* Using an empty final finalizer() is not recommended and is deprecated as of Java9.
*/
private QuickSelectSketch(
final Validate val,
- final Memory mem,
+ final MemorySegment seg,
final SummaryDeserializer deserializer,
final SummaryFactory summaryFactory) {
- super(val.validate(mem, deserializer), val.myEmpty, summaryFactory);
+ super(val.validate(seg, deserializer), val.myEmpty, summaryFactory);
nomEntries_ = val.myNomEntries;
lgResizeFactor_ = val.myLgResizeFactor;
samplingProbability_ = val.mySamplingProbability;
@@ -222,43 +226,43 @@ private static final class Validate {
@SuppressWarnings("unchecked")
long validate(
- final Memory mem,
+ final MemorySegment seg,
final SummaryDeserializer> deserializer) {
- Objects.requireNonNull(mem, "SourceMemory must not be null.");
+ Objects.requireNonNull(seg, "Source MemorySegment must not be null.");
Objects.requireNonNull(deserializer, "Deserializer must not be null.");
- checkBounds(0, 8, mem.getCapacity());
+ checkBounds(0, 8, seg.byteSize());
int offset = 0;
- final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs
- final byte version = mem.getByte(offset++); //byte 1 SerVer
- final byte familyId = mem.getByte(offset++); //byte 2 FamID
+ final byte preambleLongs = seg.get(JAVA_BYTE, offset++); //byte 0 PreLongs
+ final byte version = seg.get(JAVA_BYTE, offset++); //byte 1 SerVer
+ final byte familyId = seg.get(JAVA_BYTE, offset++); //byte 2 FamID
SerializerDeserializer.validateFamily(familyId, preambleLongs);
if (version > serialVersionUID) {
throw new SketchesArgumentException(
"Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: "
+ version);
}
- SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3
+ SerializerDeserializer.validateType(seg.get(JAVA_BYTE, offset++), //byte 3
SerializerDeserializer.SketchType.QuickSelectSketch);
- final byte flags = mem.getByte(offset++); //byte 4
+ final byte flags = seg.get(JAVA_BYTE, offset++); //byte 4
final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0;
if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) {
throw new SketchesArgumentException("Endian byte order mismatch");
}
- myNomEntries = 1 << mem.getByte(offset++); //byte 5
- myLgCurrentCapacity = mem.getByte(offset++); //byte 6
- myLgResizeFactor = mem.getByte(offset++); //byte 7
+ myNomEntries = 1 << seg.get(JAVA_BYTE, offset++); //byte 5
+ myLgCurrentCapacity = seg.get(JAVA_BYTE, offset++); //byte 6
+ myLgResizeFactor = seg.get(JAVA_BYTE, offset++); //byte 7
- checkBounds(0, preambleLongs * 8L, mem.getCapacity());
+ checkBounds(0, preambleLongs * 8L, seg.byteSize());
final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0;
- mySamplingProbability = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11
+ mySamplingProbability = isInSamplingMode ? seg.get(JAVA_FLOAT_UNALIGNED, offset) : 1f; //bytes 8 - 11
if (isInSamplingMode) {
offset += Float.BYTES;
}
final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0;
if (isThetaIncluded) {
- myThetaLong = mem.getLong(offset);
+ myThetaLong = seg.get(JAVA_LONG_UNALIGNED, offset);
offset += Long.BYTES;
} else {
myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability);
@@ -267,16 +271,16 @@ long validate(
int count = 0;
final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0;
if (hasEntries) {
- count = mem.getInt(offset);
+ count = seg.get(JAVA_INT_UNALIGNED, offset);
offset += Integer.BYTES;
}
final int currentCapacity = 1 << myLgCurrentCapacity;
myHashTable = new long[currentCapacity];
for (int i = 0; i < count; i++) {
- final long hash = mem.getLong(offset);
+ final long hash = seg.get(JAVA_LONG_UNALIGNED, offset);
offset += Long.BYTES;
- final Memory memRegion = mem.region(offset, mem.getCapacity() - offset);
- final DeserializeResult> summaryResult = deserializer.heapifySummary(memRegion);
+ final MemorySegment segRegion = seg.asSlice(offset, seg.byteSize() - offset);
+ final DeserializeResult> summaryResult = deserializer.heapifySummary(segRegion);
final S summary = (S) summaryResult.getObject();
offset += summaryResult.getSize();
//in-place equivalent to insert(hash, summary):
diff --git a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java
index a30d47edf..2ca7c29df 100644
--- a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java
+++ b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java
@@ -19,9 +19,12 @@
package org.apache.datasketches.tuple;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+
+import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
/**
* Multipurpose serializer-deserializer for a collection of sketches defined by the enum.
@@ -77,12 +80,12 @@ public static void validateType(final byte sketchTypeByte, final SketchType expe
}
/**
- * Gets the sketch type byte from the given Memory image
- * @param mem the given Memory image
+ * Gets the sketch type byte from the given MemorySegment image
+ * @param seg the given MemorySegment image
* @return the SketchType
*/
- public static SketchType getSketchType(final Memory mem) {
- final byte sketchTypeByte = mem.getByte(TYPE_BYTE_OFFSET);
+ public static SketchType getSketchType(final MemorySegment seg) {
+ final byte sketchTypeByte = seg.get(JAVA_BYTE, TYPE_BYTE_OFFSET);
return getSketchType(sketchTypeByte);
}
diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java
index f0003bf3c..8fd81f78c 100644
--- a/src/main/java/org/apache/datasketches/tuple/Sketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java
@@ -24,7 +24,7 @@
import org.apache.datasketches.thetacommon.BinomialBoundsN;
/**
- * This is an equivalent to org.apache.datasketches.theta.Sketch with
+ * This is an equivalent to org.apache.datasketches.theta2.Sketch with
* addition of a user-defined Summary object associated with every unique entry
* in the sketch.
* @param Type of Summary
diff --git a/src/main/java/org/apache/datasketches/tuple/Sketches.java b/src/main/java/org/apache/datasketches/tuple/Sketches.java
index fba8ab604..6eef7f88d 100644
--- a/src/main/java/org/apache/datasketches/tuple/Sketches.java
+++ b/src/main/java/org/apache/datasketches/tuple/Sketches.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.tuple;
-import org.apache.datasketches.memory.Memory;
+import java.lang.foreign.MemorySegment;
/**
* Convenient static methods to instantiate generic tuple sketches.
@@ -37,36 +37,36 @@ public static Sketch createEmptySketch() {
}
/**
- * Instantiate a Sketch from a given Memory.
+ * Instantiate a Sketch from a given MemorySegment.
* @param Type of Summary
- * @param mem Memory object representing a Sketch
+ * @param seg MemorySegment object representing a Sketch
* @param deserializer instance of SummaryDeserializer
- * @return Sketch created from its Memory representation
+ * @return Sketch created from its MemorySegment representation
*/
public static Sketch heapifySketch(
- final Memory mem,
+ final MemorySegment seg,
final SummaryDeserializer deserializer) {
- final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(mem);
+ final SerializerDeserializer.SketchType sketchType = SerializerDeserializer.getSketchType(seg);
if (sketchType == SerializerDeserializer.SketchType.QuickSelectSketch) {
- return new QuickSelectSketch<>(mem, deserializer, null);
+ return new QuickSelectSketch<>(seg, deserializer, null);
}
- return new CompactSketch<>(mem, deserializer);
+ return new CompactSketch<>(seg, deserializer);
}
/**
- * Instantiate UpdatableSketch from a given Memory
+ * Instantiate UpdatableSketch from a given MemorySegment
* @param Type of update value
* @param Type of Summary
- * @param mem Memory object representing a Sketch
+ * @param seg MemorySegment object representing a Sketch
* @param deserializer instance of SummaryDeserializer
* @param summaryFactory instance of SummaryFactory
- * @return Sketch created from its Memory representation
+ * @return Sketch created from its MemorySegment representation
*/
public static > UpdatableSketch heapifyUpdatableSketch(
- final Memory mem,
+ final MemorySegment seg,
final SummaryDeserializer deserializer,
final SummaryFactory summaryFactory) {
- return new UpdatableSketch<>(mem, deserializer, summaryFactory);
+ return new UpdatableSketch<>(seg, deserializer, summaryFactory);
}
}
diff --git a/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java
index 8edbc3318..6393019b1 100644
--- a/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java
+++ b/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java
@@ -19,7 +19,7 @@
package org.apache.datasketches.tuple;
-import org.apache.datasketches.memory.Memory;
+import java.lang.foreign.MemorySegment;
/**
* Interface for deserializing user-defined Summary
@@ -29,14 +29,14 @@ public interface SummaryDeserializer {
/**
* This is to create an instance of a Summary given a serialized representation.
- * The user may assume that the start of the given Memory is the correct place to start
+ * The user may assume that the start of the given MemorySegment is the correct place to start
* deserializing. However, the user must be able to determine the number of bytes required to
- * deserialize the summary as the capacity of the given Memory may
+ * deserialize the summary as the capacity of the given MemorySegment may
* include multiple such summaries and may be much larger than required for a single summary.
- * @param mem Memory object with serialized representation of a Summary
- * @return DeserializedResult object, which contains a Summary object and number of bytes read
- * from the Memory
+ * @param seg MemorySegment object with serialized representation of a Summary
+ * @return DeserializedResult object, which contains a Summary object and number of bytes read
+ * from the MemorySegment
*/
- public DeserializeResult heapifySummary(Memory mem);
+ public DeserializeResult heapifySummary(MemorySegment seg);
}
diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java
index acefa2ab5..f67626d1b 100644
--- a/src/main/java/org/apache/datasketches/tuple/Union.java
+++ b/src/main/java/org/apache/datasketches/tuple/Union.java
@@ -21,8 +21,8 @@
import static java.lang.Math.min;
+import org.apache.datasketches.common.QuickSelect;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.thetacommon.QuickSelect;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java
index 36743618b..b706ca11e 100644
--- a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java
@@ -19,11 +19,12 @@
package org.apache.datasketches.tuple;
+import static org.apache.datasketches.common.Util.DEFAULT_UPDATE_SEED;
+
+import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import org.apache.datasketches.hash.MurmurHash3;
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.thetacommon.ThetaUtil;
/**
* An extension of QuickSelectSketch<S>, which can be updated with many types of keys.
@@ -63,7 +64,7 @@ public UpdatableSketch(final int nomEntries, final int lgResizeFactor,
/**
* This is to create an instance of a sketch given a serialized form
- * @param srcMem Memory object with data of a serialized UpdatableSketch
+ * @param srcSeg MemorySegment object with data of a serialized UpdatableSketch
* @param deserializer instance of SummaryDeserializer
* @param summaryFactory instance of SummaryFactory
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
@@ -72,10 +73,10 @@ public UpdatableSketch(final int nomEntries, final int lgResizeFactor,
*/
@Deprecated
public UpdatableSketch(
- final Memory srcMem,
+ final MemorySegment srcSeg,
final SummaryDeserializer deserializer,
final SummaryFactory summaryFactory) {
- super(srcMem, deserializer, summaryFactory);
+ super(srcSeg, deserializer, summaryFactory);
}
/**
@@ -136,7 +137,7 @@ public void update(final String key, final U value) {
*/
public void update(final byte[] key, final U value) {
if ((key == null) || (key.length == 0)) { return; }
- insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value);
}
/**
@@ -148,7 +149,7 @@ public void update(final byte[] key, final U value) {
*/
public void update(final ByteBuffer buffer, final U value) {
if (buffer == null || buffer.hasRemaining() == false) { return; }
- insertOrIgnore(MurmurHash3.hash(buffer, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ insertOrIgnore(MurmurHash3.hash(buffer, DEFAULT_UPDATE_SEED)[0] >>> 1, value);
}
/**
@@ -160,7 +161,7 @@ public void update(final ByteBuffer buffer, final U value) {
*/
public void update(final int[] key, final U value) {
if ((key == null) || (key.length == 0)) { return; }
- insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value);
}
/**
@@ -172,7 +173,7 @@ public void update(final int[] key, final U value) {
*/
public void update(final long[] key, final U value) {
if ((key == null) || (key.length == 0)) { return; }
- insertOrIgnore(MurmurHash3.hash(key, ThetaUtil.DEFAULT_UPDATE_SEED)[0] >>> 1, value);
+ insertOrIgnore(MurmurHash3.hash(key, DEFAULT_UPDATE_SEED)[0] >>> 1, value);
}
void insertOrIgnore(final long hash, final U value) {
diff --git a/src/main/java/org/apache/datasketches/tuple/Util.java b/src/main/java/org/apache/datasketches/tuple/Util.java
index 92193ca56..46f069724 100644
--- a/src/main/java/org/apache/datasketches/tuple/Util.java
+++ b/src/main/java/org/apache/datasketches/tuple/Util.java
@@ -21,13 +21,11 @@
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.datasketches.common.Util.ceilingPowerOf2;
-import static org.apache.datasketches.hash.MurmurHash3.hash;
-import static org.apache.datasketches.memory.XxHash.hashCharArr;
-import static org.apache.datasketches.memory.XxHash.hashString;
+import static org.apache.datasketches.hash.XxHash.hashCharArr;
+import static org.apache.datasketches.hash.XxHash.hashString;
import java.lang.reflect.Array;
-import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
@@ -58,36 +56,6 @@ public static final byte[] stringToByteArray(final String value) {
return value.getBytes(UTF_8);
}
- /**
- * Computes and checks the 16-bit seed hash from the given long seed.
- * The seed hash may not be zero in order to maintain compatibility with older serialized
- * versions that did not have this concept.
- * @param seed See Update Hash Seed
- * @return the seed hash.
- */
- public static short computeSeedHash(final long seed) {
- final long[] seedArr = {seed};
- final short seedHash = (short)((hash(seedArr, 0L)[0]) & 0xFFFFL);
- if (seedHash == 0) {
- throw new SketchesArgumentException(
- "The given seed: " + seed + " produced a seedHash of zero. "
- + "You must choose a different seed.");
- }
- return seedHash;
- }
-
- /**
- * Checks the two given seed hashes. If they are not equal, this method throws an Exception.
- * @param seedHashA given seed hash A
- * @param seedHashB given seed hash B
- */
- public static final void checkSeedHashes(final short seedHashA, final short seedHashB) {
- if (seedHashA != seedHashB) {
- throw new SketchesArgumentException("Incompatible Seed Hashes. " + seedHashA + ", "
- + seedHashB);
- }
- }
-
/**
* Gets the starting capacity of a new sketch given the Nominal Entries and the log Resize Factor.
* @param nomEntries the given Nominal Entries
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java
index 1bb9edeca..d51451cf5 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java
@@ -19,8 +19,9 @@
package org.apache.datasketches.tuple.adouble;
+import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.ResizeFactor;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.UpdatableSketch;
/**
@@ -59,17 +60,17 @@ public DoubleSketch(final int lgK, final int lgResizeFactor, final float samplin
}
/**
- * Constructs this sketch from a Memory image, which must be from an DoubleSketch, and
+ * Constructs this sketch from a MemorySegment image, which must be from an DoubleSketch, and
* usually with data.
- * @param mem the given Memory
+ * @param seg the given MemorySegment
* @param mode The DoubleSummary mode to be used
* @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated.
* This capability will be removed in a future release.
* Heapifying a CompactSketch is not deprecated.
*/
@Deprecated
- public DoubleSketch(final Memory mem, final DoubleSummary.Mode mode) {
- super(mem, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
+ public DoubleSketch(final MemorySegment seg, final DoubleSummary.Mode mode) {
+ super(seg, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
}
@Override
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
index f678fba06..e7268ffef 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
@@ -19,8 +19,12 @@
package org.apache.datasketches.tuple.adouble;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.lang.foreign.ValueLayout.JAVA_DOUBLE_UNALIGNED;
+
+import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.ByteArrayUtil;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.DeserializeResult;
import org.apache.datasketches.tuple.UpdatableSummary;
@@ -146,13 +150,13 @@ public byte[] toByteArray() {
/**
* Creates an instance of the DoubleSummary given a serialized representation
- * @param mem Memory object with serialized DoubleSummary
+ * @param seg MemorySegment object with serialized DoubleSummary
* @return DeserializedResult object, which contains a DoubleSummary object and number of bytes
- * read from the Memory
+ * read from the MemorySegment
*/
- public static DeserializeResult
V2 is short-lived and dates from roughly Mid May 2015 to about June 1st, 2015.
@@ -179,54 +187,54 @@ public static Memory convertSerVer3toSerVer1(final CompactSketch skV3) {
*
* @param skV3 a SerVer3, ordered CompactSketch
* @param seed used for checking the seed hash (if one exists).
- * @return a SerVer2 SetSketch as Memory object.
+ * @return a SerVer2 SetSketch as MemorySegment object.
*/
- public static Memory convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) {
- final short seedHash = ThetaUtil.computeSeedHash(seed);
- WritableMemory wmem = null;
+ public static MemorySegment convertSerVer3toSerVer2(final CompactSketch skV3, final long seed) {
+ final short seedHash = Util.computeSeedHash(seed);
+ MemorySegment wseg = null;
if (skV3 instanceof EmptyCompactSketch) {
- wmem = WritableMemory.allocate(8);
- wmem.putByte(0, (byte) 1); //preLongs
- wmem.putByte(1, (byte) 2); //SerVer
- wmem.putByte(2, (byte) 3); //SetSketch
+ wseg = MemorySegment.ofArray(new long[1]);
+ wseg.set(JAVA_BYTE, 0, (byte) 1); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
final byte flags = (byte) 0xE; //NoRebuild, Empty, ReadOnly, LE
- wmem.putByte(5, flags);
- wmem.putShort(6, seedHash);
- return wmem;
+ wseg.set(JAVA_BYTE, 5, flags);
+ wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash);
+ return wseg;
}
if (skV3 instanceof SingleItemSketch) {
final SingleItemSketch sis = (SingleItemSketch) skV3;
- wmem = WritableMemory.allocate(24);
- wmem.putByte(0, (byte) 2); //preLongs
- wmem.putByte(1, (byte) 2); //SerVer
- wmem.putByte(2, (byte) 3); //SetSketch
+ wseg = MemorySegment.ofArray(new long[3]);
+ wseg.set(JAVA_BYTE, 0, (byte) 2); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
final byte flags = (byte) 0xA; //NoRebuild, notEmpty, ReadOnly, LE
- wmem.putByte(5, flags);
- wmem.putShort(6, seedHash);
- wmem.putInt(8, 1);
+ wseg.set(JAVA_BYTE, 5, flags);
+ wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash);
+ wseg.set(JAVA_INT_UNALIGNED, 8, 1);
final long[] arr = sis.getCache();
- wmem.putLong(16, arr[0]);
- return wmem;
+ wseg.set(JAVA_LONG_UNALIGNED, 16, arr[0]);
+ return wseg;
}
//General CompactSketch
final int preLongs = skV3.getCompactPreambleLongs();
final int entries = skV3.getRetainedEntries(true);
final boolean unordered = !(skV3.isOrdered());
final byte flags = (byte) (0xA | (unordered ? 16 : 0)); //Unordered, NoRebuild, notEmpty, ReadOnly, LE
- wmem = WritableMemory.allocate((preLongs + entries) << 3);
- wmem.putByte(0, (byte) preLongs); //preLongs
- wmem.putByte(1, (byte) 2); //SerVer
- wmem.putByte(2, (byte) 3); //SetSketch
+ wseg = MemorySegment.ofArray(new byte[(preLongs + entries) << 3]);
+ wseg.set(JAVA_BYTE, 0, (byte) preLongs); //preLongs
+ wseg.set(JAVA_BYTE, 1, (byte) 2); //SerVer
+ wseg.set(JAVA_BYTE, 2, (byte) 3); //SetSketch
- wmem.putByte(5, flags);
- wmem.putShort(6, seedHash);
- wmem.putInt(8, entries);
+ wseg.set(JAVA_BYTE, 5, flags);
+ wseg.set(JAVA_SHORT_UNALIGNED, 6, seedHash);
+ wseg.set(JAVA_INT_UNALIGNED, 8, entries);
if (preLongs == 3) {
- wmem.putLong(16, skV3.getThetaLong());
+ wseg.set(JAVA_LONG_UNALIGNED, 16, skV3.getThetaLong());
}
final long[] arr = skV3.getCache();
- wmem.putLongArray(preLongs * 8L, arr, 0, entries);
- return wmem;
+ MemorySegment.copy(arr, 0, wseg, JAVA_LONG_UNALIGNED, preLongs << 3, entries);
+ return wseg;
}
}
diff --git a/src/test/java/org/apache/datasketches/theta/BitPackingTest.java b/src/test/java/org/apache/datasketches/theta/BitPackingTest.java
index c155bef77..35de3e342 100644
--- a/src/test/java/org/apache/datasketches/theta/BitPackingTest.java
+++ b/src/test/java/org/apache/datasketches/theta/BitPackingTest.java
@@ -22,6 +22,7 @@
import static org.testng.Assert.assertEquals;
import org.apache.datasketches.common.Util;
+import org.apache.datasketches.theta.BitPacking;
import org.testng.annotations.Test;
public class BitPackingTest {
diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
index 906c1914b..8541ed6bc 100644
--- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
@@ -26,10 +26,21 @@
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
+import java.lang.foreign.MemorySegment;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.DirectCompactSketch;
+import org.apache.datasketches.theta.EmptyCompactSketch;
+import org.apache.datasketches.theta.HashIterator;
+import org.apache.datasketches.theta.HeapCompactSketch;
+import org.apache.datasketches.theta.Intersection;
+import org.apache.datasketches.theta.SingleItemSketch;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.datasketches.theta.Sketches;
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.WrappedCompactCompressedSketch;
+import org.apache.datasketches.theta.WrappedCompactSketch;
import org.testng.annotations.Test;
import java.lang.foreign.Arena;
@@ -60,36 +71,36 @@ public void checkHeapifyWrap(int k, int u, boolean ordered) {
usk.update(i);
}
- /****ON HEAP MEMORY -- HEAPIFY****/
+ /****ON HEAP MemorySegment -- HEAPIFY****/
CompactSketch refSk = usk.compact(ordered, null);
byte[] barr = refSk.toByteArray();
- Memory srcMem = Memory.wrap(barr);
- CompactSketch testSk = (CompactSketch) Sketch.heapify(srcMem);
+ MemorySegment srcSeg = MemorySegment.ofArray(barr);
+ CompactSketch testSk = (CompactSketch) Sketch.heapify(srcSeg);
checkByRange(refSk, testSk, u, ordered);
/**Via byte[]**/
byte[] byteArray = refSk.toByteArray();
- Memory heapROMem = Memory.wrap(byteArray);
- testSk = (CompactSketch)Sketch.heapify(heapROMem);
+ MemorySegment heapROSeg = MemorySegment.ofArray(byteArray).asReadOnly();
+ testSk = (CompactSketch)Sketch.heapify(heapROSeg);
checkByRange(refSk, testSk, u, ordered);
- /****OFF HEAP MEMORY -- WRAP****/
- //Prepare Memory for direct
+ /****OFF HEAP MemorySegment -- WRAP****/
+ //Prepare MemorySegment for direct
int bytes = usk.getCompactBytes(); //for Compact
try (Arena arena = Arena.ofConfined()) {
- WritableMemory directMem = WritableMemory.allocateDirect(bytes, arena);
+ MemorySegment directSeg = arena.allocate(bytes);
/**Via CompactSketch.compact**/
- refSk = usk.compact(ordered, directMem);
- testSk = (CompactSketch)Sketch.wrap(directMem);
+ refSk = usk.compact(ordered, directSeg);
+ testSk = (CompactSketch)Sketch.wrap(directSeg);
checkByRange(refSk, testSk, u, ordered);
/**Via CompactSketch.compact**/
- testSk = (CompactSketch)Sketch.wrap(directMem);
+ testSk = (CompactSketch)Sketch.wrap(directSeg);
checkByRange(refSk, testSk, u, ordered);
} catch (final Exception e) {
throw new RuntimeException(e);
@@ -111,9 +122,9 @@ private static void checkEmptySketch(Sketch testSk) {
assertTrue(testSk instanceof EmptyCompactSketch);
assertTrue(testSk.isEmpty());
assertTrue(testSk.isOrdered());
- assertNull(testSk.getMemory());
+ assertNull(testSk.getMemorySegment());
assertFalse(testSk.isDirect());
- assertFalse(testSk.hasMemory());
+ assertFalse(testSk.hasMemorySegment());
assertEquals(testSk.getSeedHash(), 0);
assertEquals(testSk.getRetainedEntries(true), 0);
assertEquals(testSk.getEstimate(), 0.0, 0.0);
@@ -129,9 +140,9 @@ private static void checkSingleItemSketch(Sketch testSk, Sketch refSk) {
assertTrue(testSk instanceof SingleItemSketch);
assertFalse(testSk.isEmpty());
assertTrue(testSk.isOrdered());
- assertNull(testSk.getMemory());
+ assertNull(testSk.getMemorySegment());
assertFalse(testSk.isDirect());
- assertFalse(testSk.hasMemory());
+ assertFalse(testSk.hasMemorySegment());
assertEquals(testSk.getSeedHash(), refSk.getSeedHash());
assertEquals(testSk.getRetainedEntries(true), 1);
assertEquals(testSk.getEstimate(), 1.0, 0.0);
@@ -147,9 +158,9 @@ private static void checkOtherCompactSketch(Sketch testSk, Sketch refSk, boolean
assertFalse(testSk.isEmpty());
assertNotNull(testSk.iterator());
assertEquals(testSk.isOrdered(), ordered);
- if (refSk.hasMemory()) {
- assertTrue(testSk.hasMemory());
- assertNotNull(testSk.getMemory());
+ if (refSk.hasMemorySegment()) {
+ assertTrue(testSk.hasMemorySegment());
+ assertNotNull(testSk.getMemorySegment());
if (ordered) {
assertTrue(testSk.isOrdered());
} else {
@@ -161,7 +172,7 @@ private static void checkOtherCompactSketch(Sketch testSk, Sketch refSk, boolean
assertFalse(testSk.isDirect());
}
} else {
- assertFalse(testSk.hasMemory());
+ assertFalse(testSk.hasMemorySegment());
assertTrue(testSk instanceof HeapCompactSketch);
}
assertEquals(testSk.getSeedHash(), refSk.getSeedHash());
@@ -178,14 +189,14 @@ public void checkDirectSingleItemSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
sk.update(1);
int bytes = sk.getCompactBytes();
- WritableMemory wmem = WritableMemory.allocate(bytes);
- sk.compact(true, wmem);
- Sketch csk2 = Sketch.heapify(wmem);
+ MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ sk.compact(true, wseg);
+ Sketch csk2 = Sketch.heapify(wseg);
assertTrue(csk2 instanceof SingleItemSketch);
}
@Test(expectedExceptions = SketchesArgumentException.class)
- public void checkMemTooSmall() {
+ public void checkSegTooSmall() {
int k = 512;
int u = k;
boolean ordered = false;
@@ -196,12 +207,12 @@ public void checkMemTooSmall() {
int bytes = usk.getCompactBytes();
byte[] byteArray = new byte[bytes -8]; //too small
- WritableMemory mem = WritableMemory.writableWrap(byteArray);
- usk.compact(ordered, mem);
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ usk.compact(ordered, seg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
- public void checkMemTooSmallOrdered() {
+ public void checkSegTooSmallOrdered() {
int k = 512;
int u = k;
boolean ordered = true;
@@ -212,8 +223,8 @@ public void checkMemTooSmallOrdered() {
int bytes = usk.getCompactBytes();
byte[] byteArray = new byte[bytes -8]; //too small
- WritableMemory mem = WritableMemory.writableWrap(byteArray);
- usk.compact(ordered, mem);
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ usk.compact(ordered, seg);
}
@Test
@@ -230,38 +241,38 @@ public void checkCompactCachePart() {
private static final boolean COMPACT = true;
private static final boolean EMPTY = true;
private static final boolean DIRECT = true;
- private static final boolean MEMORY = true;
+ private static final boolean SEGMENT = true;
private static final boolean ORDERED = true;
private static final boolean ESTIMATION = true;
@Test
/**
- * Empty, memory-based Compact sketches are always ordered
+ * Empty, segment-based Compact sketches are always ordered
*/
- public void checkEmptyMemoryCompactSketch() {
+ public void checkEmptyMemorySegmentCompactSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
- WritableMemory wmem1 = WritableMemory.allocate(16);
- CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when empty
- State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+ MemorySegment wseg1 = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk1 = sk.compact(false, wseg1); //the first parameter is ignored when empty
+ State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION);
state1.check(csk1);
- WritableMemory wmem2 = WritableMemory.allocate(16);
- CompactSketch csk2 = sk.compact(false, wmem2);
+ MemorySegment wseg2 = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk2 = sk.compact(false, wseg2);
state1.check(csk2);
- assertNotEquals(csk1, csk2); //different object because memory is valid
+ assertNotEquals(csk1, csk2); //different object because MemorySegment is valid
assertFalse(csk1 == csk2);
- WritableMemory wmem3 = WritableMemory.allocate(16);
- CompactSketch csk3 = csk1.compact(false, wmem3);
+ MemorySegment wseg3 = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk3 = csk1.compact(false, wseg3);
state1.check(csk3);
- assertNotEquals(csk1, csk3); //different object because memory is valid
+ assertNotEquals(csk1, csk3); //different object because MemorySegment is valid
assertFalse(csk1 == csk3);
CompactSketch csk4 = csk1.compact(false, null);
- State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION);
state4.check(csk4);
assertNotEquals(csk1, csk4); //different object because on heap
@@ -276,29 +287,29 @@ public void checkEmptyMemoryCompactSketch() {
@Test
/**
- * Single-Item, memory-based Compact sketches are always ordered:
+ * Single-Item, segment-based Compact sketches are always ordered:
*/
- public void checkSingleItemMemoryCompactSketch() {
+ public void checkSingleItemMemorySegmentCompactSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
sk.update(1);
- WritableMemory wmem1 = WritableMemory.allocate(16);
- CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when single item
- State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+ MemorySegment wseg1 = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk1 = sk.compact(false, wseg1); //the first parameter is ignored when single item
+ State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION);
state1.check(csk1);
- WritableMemory wmem2 = WritableMemory.allocate(16);
- CompactSketch csk2 = sk.compact(false, wmem2); //the first parameter is ignored when single item
+ MemorySegment wseg2 = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk2 = sk.compact(false, wseg2); //the first parameter is ignored when single item
state1.check(csk2);
- assertNotEquals(csk1, csk2); //different object because memory is valid
+ assertNotEquals(csk1, csk2); //different object because segment is valid
assertFalse(csk1 == csk2);
- WritableMemory wmem3 = WritableMemory.allocate(16);
- CompactSketch csk3 = csk1.compact(false, wmem3);
+ MemorySegment wseg3 = MemorySegment.ofArray(new byte[16]);
+ CompactSketch csk3 = csk1.compact(false, wseg3);
state1.check(csk3);
- assertNotEquals(csk1, csk3); //different object because memory is valid
+ assertNotEquals(csk1, csk3); //different object because segment is valid
assertFalse(csk1 == csk3);
CompactSketch cskc = csk1.compact();
@@ -309,31 +320,31 @@ public void checkSingleItemMemoryCompactSketch() {
}
@Test
- public void checkMultipleItemMemoryCompactSketch() {
+ public void checkMultipleItemMemorySegmentCompactSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
//This sequence is naturally out-of-order by the hash values.
sk.update(1);
sk.update(2);
sk.update(3);
- WritableMemory wmem1 = WritableMemory.allocate(50);
- CompactSketch csk1 = sk.compact(true, wmem1);
- State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+ MemorySegment wseg1 = MemorySegment.ofArray(new byte[50]);
+ CompactSketch csk1 = sk.compact(true, wseg1);
+ State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, SEGMENT, ORDERED, !ESTIMATION);
state1.check(csk1);
- WritableMemory wmem2 = WritableMemory.allocate(50);
- CompactSketch csk2 = sk.compact(false, wmem2);
- State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION);
+ MemorySegment wseg2 = MemorySegment.ofArray(new byte[50]);
+ CompactSketch csk2 = sk.compact(false, wseg2);
+ State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, SEGMENT, !ORDERED, !ESTIMATION);
state2.check(csk2);
- assertNotEquals(csk1, csk2); //different object because memory is valid
+ assertNotEquals(csk1, csk2); //different object because segment is valid
assertFalse(csk1 == csk2);
- WritableMemory wmem3 = WritableMemory.allocate(50);
- CompactSketch csk3 = csk1.compact(false, wmem3);
+ MemorySegment wseg3 = MemorySegment.ofArray(new byte[50]);
+ CompactSketch csk3 = csk1.compact(false, wseg3);
state2.check(csk3);
- assertNotEquals(csk1, csk3); //different object because memory is valid
+ assertNotEquals(csk1, csk3); //different object because segment is valid
assertFalse(csk1 == csk3);
CompactSketch cskc = csk1.compact();
@@ -352,7 +363,7 @@ public void checkEmptyHeapCompactSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty
- State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION);
state1.check(csk1);
CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty
@@ -383,7 +394,7 @@ public void checkSingleItemHeapCompactSketch() {
sk.update(1);
CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item
- State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION);
state1.check(csk1);
CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item
@@ -414,11 +425,11 @@ public void checkMultipleItemHeapCompactSketch() {
sk.update(3);
CompactSketch csk1 = sk.compact(true, null); //creates a new object
- State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !SEGMENT, ORDERED, !ESTIMATION);
state1.check(csk1);
CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered
- State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION);
+ State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !SEGMENT, !ORDERED, !ESTIMATION);
state2.check(csk2);
assertNotEquals(csk1, csk2); //order is different and different objects
@@ -427,10 +438,10 @@ public void checkMultipleItemHeapCompactSketch() {
CompactSketch csk3 = csk1.compact(true, null);
state1.check(csk3);
- assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered
+ assertEquals(csk1, csk3); //the same object because wseg = null and csk1.ordered = dstOrdered
assertTrue(csk1 == csk3);
- assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true
+ assertNotEquals(csk2, csk3); //different object because wseg = null and csk2.ordered = false && dstOrdered = true
assertFalse(csk2 == csk3);
CompactSketch cskc = csk1.compact();
@@ -445,19 +456,19 @@ public void checkHeapifySingleItemSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
sk.update(1);
int bytes = Sketches.getMaxCompactSketchBytes(2); //1 more than needed
- WritableMemory wmem = WritableMemory.allocate(bytes);
- sk.compact(false, wmem);
- Sketch csk = Sketch.heapify(wmem);
+ MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ sk.compact(false, wseg);
+ Sketch csk = Sketch.heapify(wseg);
assertTrue(csk instanceof SingleItemSketch);
}
@Test
public void checkHeapifyEmptySketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
- WritableMemory wmem = WritableMemory.allocate(16); //empty, but extra bytes
- CompactSketch csk = sk.compact(false, wmem); //ignores order because it is empty
+ MemorySegment wseg = MemorySegment.ofArray(new byte[16]); //empty, but extra bytes
+ CompactSketch csk = sk.compact(false, wseg); //ignores order because it is empty
assertTrue(csk instanceof DirectCompactSketch);
- Sketch csk2 = Sketch.heapify(wmem);
+ Sketch csk2 = Sketch.heapify(wseg);
assertTrue(csk2 instanceof EmptyCompactSketch);
}
@@ -466,7 +477,7 @@ public void checkGetCache() {
UpdateSketch sk = Sketches.updateSketchBuilder().setP((float).5).build();
sk.update(7);
int bytes = sk.getCompactBytes();
- CompactSketch csk = sk.compact(true, WritableMemory.allocate(bytes));
+ CompactSketch csk = sk.compact(true, MemorySegment.ofArray(new byte[bytes]));
long[] cache = csk.getCache();
assertTrue(cache.length == 0);
}
@@ -484,12 +495,12 @@ public void checkHeapCompactSketchCompact() {
/**
* This is checking the empty, single, exact and estimating cases of an off-heap
* sketch to make sure they are being stored properly and to check the new capability
- * of calling compact(boolean, Memory) on an already compact sketch. This allows the
+ * of calling compact(boolean, MemorySegment) on an already compact sketch. This allows the
* user to be able to change the order and heap status of an already compact sketch.
*/
@Test
public void checkDirectCompactSketchCompact() {
- WritableMemory wmem1, wmem2;
+ MemorySegment wseg1, wseg2;
CompactSketch csk1, csk2;
int bytes;
int lgK = 6;
@@ -497,12 +508,12 @@ public void checkDirectCompactSketchCompact() {
//empty
UpdateSketch sk = Sketches.updateSketchBuilder().setLogNominalEntries(lgK).build();
bytes = sk.getCompactBytes(); //empty, 8 bytes
- wmem1 = WritableMemory.allocate(bytes);
- wmem2 = WritableMemory.allocate(bytes);
- csk1 = sk.compact(false, wmem1); //place into memory as unordered
+ wseg1 = MemorySegment.ofArray(new byte[bytes]);
+ wseg2 = MemorySegment.ofArray(new byte[bytes]);
+ csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered
assertTrue(csk1 instanceof DirectCompactSketch);
assertTrue(csk1.isOrdered()); //empty is always ordered
- csk2 = csk1.compact(false, wmem2); //set to unordered again
+ csk2 = csk1.compact(false, wseg2); //set to unordered again
assertTrue(csk2 instanceof DirectCompactSketch);
assertTrue(csk2.isOrdered()); //empty is always ordered
assertTrue(csk2.getSeedHash() == 0); //empty has no seed hash
@@ -511,12 +522,12 @@ public void checkDirectCompactSketchCompact() {
//single
sk.update(1);
bytes = sk.getCompactBytes(); //single, 16 bytes
- wmem1 = WritableMemory.allocate(bytes);
- wmem2 = WritableMemory.allocate(bytes);
- csk1 = sk.compact(false, wmem1); //place into memory as unordered
+ wseg1 = MemorySegment.ofArray(new byte[bytes]);
+ wseg2 = MemorySegment.ofArray(new byte[bytes]);
+ csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered
assertTrue(csk1 instanceof DirectCompactSketch);
assertTrue(csk1.isOrdered()); //single is always ordered
- csk2 = csk1.compact(false, wmem2); //set to unordered again
+ csk2 = csk1.compact(false, wseg2); //set to unordered again
assertTrue(csk2 instanceof DirectCompactSketch);
assertTrue(csk2.isOrdered()); //single is always ordered
assertTrue(csk2.getSeedHash() != 0); //has a seed hash
@@ -525,12 +536,12 @@ public void checkDirectCompactSketchCompact() {
//exact
sk.update(2);
bytes = sk.getCompactBytes(); //exact, 16 bytes preamble, 16 bytes data
- wmem1 = WritableMemory.allocate(bytes);
- wmem2 = WritableMemory.allocate(bytes);
- csk1 = sk.compact(false, wmem1); //place into memory as unordered
+ wseg1 = MemorySegment.ofArray(new byte[bytes]);
+ wseg2 = MemorySegment.ofArray(new byte[bytes]);
+ csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered
assertTrue(csk1 instanceof DirectCompactSketch);
assertFalse(csk1.isOrdered()); //should be unordered
- csk2 = csk1.compact(true, wmem2); //set to ordered
+ csk2 = csk1.compact(true, wseg2); //set to ordered
assertTrue(csk2 instanceof DirectCompactSketch);
assertTrue(csk2.isOrdered()); //should be ordered
assertTrue(csk2.getSeedHash() != 0); //has a seed hash
@@ -540,12 +551,12 @@ public void checkDirectCompactSketchCompact() {
int n = 1 << (lgK + 1);
for (int i = 2; i < n; i++) { sk.update(i); }
bytes = sk.getCompactBytes(); //24 bytes preamble + curCount * 8,
- wmem1 = WritableMemory.allocate(bytes);
- wmem2 = WritableMemory.allocate(bytes);
- csk1 = sk.compact(false, wmem1); //place into memory as unordered
+ wseg1 = MemorySegment.ofArray(new byte[bytes]);
+ wseg2 = MemorySegment.ofArray(new byte[bytes]);
+ csk1 = sk.compact(false, wseg1); //place into MemorySegment as unordered
assertTrue(csk1 instanceof DirectCompactSketch);
assertFalse(csk1.isOrdered()); //should be unordered
- csk2 = csk1.compact(true, wmem2); //set to ordered
+ csk2 = csk1.compact(true, wseg2); //set to ordered
assertTrue(csk2 instanceof DirectCompactSketch);
assertTrue(csk2.isOrdered()); //should be ordered
assertTrue(csk2.getSeedHash() != 0); //has a seed hash
@@ -561,7 +572,7 @@ public void serializeDeserializeHeapV4() {
}
CompactSketch cs1 = sk.compact();
byte[] bytes = cs1.toByteArrayCompressed();
- CompactSketch cs2 = CompactSketch.heapify(Memory.wrap(bytes));
+ CompactSketch cs2 = CompactSketch.heapify(MemorySegment.ofArray(bytes));
assertEquals(cs1.getRetainedEntries(), cs2.getRetainedEntries());
HashIterator it1 = cs1.iterator();
HashIterator it2 = cs2.iterator();
@@ -576,9 +587,9 @@ public void serializeDeserializeDirectV4() {
for (int i = 0; i < 10000; i++) {
sk.update(i);
}
- CompactSketch cs1 = sk.compact(true, WritableMemory.allocate(sk.getCompactBytes()));
+ CompactSketch cs1 = sk.compact(true, MemorySegment.ofArray(new byte[sk.getCompactBytes()]));
byte[] bytes = cs1.toByteArrayCompressed();
- CompactSketch cs2 = CompactSketch.wrap(Memory.wrap(bytes));
+ CompactSketch cs2 = CompactSketch.wrap(MemorySegment.ofArray(bytes));
assertEquals(cs1.getRetainedEntries(), cs2.getRetainedEntries());
HashIterator it1 = cs1.iterator();
HashIterator it2 = cs2.iterator();
@@ -630,19 +641,19 @@ private static class State {
boolean compact = false;
boolean empty = false;
boolean direct = false;
- boolean memory = false;
+ boolean hasSeg = false;
boolean ordered = false;
boolean estimation = false;
State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct,
- boolean memory, boolean ordered, boolean estimation) {
+ boolean hasSeg, boolean ordered, boolean estimation) {
this.classType = classType;
this.count = count;
this.bytes = bytes;
this.compact = compact;
this.empty = empty;
this.direct = direct;
- this.memory = memory;
+ this.hasSeg = hasSeg;
this.ordered = ordered;
this.estimation = estimation;
}
@@ -654,7 +665,7 @@ void check(CompactSketch csk) {
assertEquals(csk.isCompact(), compact, "Compact");
assertEquals(csk.isEmpty(), empty, "Empty");
assertEquals(csk.isDirect(), direct, "Direct");
- assertEquals(csk.hasMemory(), memory, "Memory");
+ assertEquals(csk.hasMemorySegment(), hasSeg, "MemorySegment");
assertEquals(csk.isOrdered(), ordered, "Ordered");
assertEquals(csk.isEstimationMode(), estimation, "Estimation");
}
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
index 6d6af7047..ac08fd9e6 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.theta;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
import static org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.waitForBgPropagationToComplete;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE;
@@ -27,37 +28,46 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
+import java.lang.foreign.MemorySegment;
+
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.ConcurrentDirectQuickSelectSketch;
+import org.apache.datasketches.theta.ConcurrentHeapThetaBuffer;
+import org.apache.datasketches.theta.ConcurrentSharedThetaSketch;
+import org.apache.datasketches.theta.DirectQuickSelectSketch;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.datasketches.theta.Sketches;
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
import org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketchTest.SharedLocal;
import org.apache.datasketches.thetacommon.HashOperations;
-import org.apache.datasketches.thetacommon.ThetaUtil;
import org.testng.annotations.Test;
/**
* @author eshcar
*/
public class ConcurrentDirectQuickSelectSketchTest {
- private static final long SEED = ThetaUtil.DEFAULT_UPDATE_SEED;
+ private static final long SEED = Util.DEFAULT_UPDATE_SEED;
@Test
public void checkDirectCompactConversion() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
assertTrue(sl.shared instanceof ConcurrentDirectQuickSelectSketch);
assertTrue(sl.shared.compact().isCompact());
}
@Test
- public void checkHeapifyMemoryEstimating() {
+ public void checkHeapifyMemorySegmentEstimating() {
int lgK = 9;
int k = 1 << lgK;
int u = 2*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared; //off-heap
UpdateSketch local = sl.local;
@@ -69,10 +79,10 @@ public void checkHeapifyMemoryEstimating() {
assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer");
//This sharedHeap is not linked to the concurrent local buffer
- UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wmem);
+ UpdateSketch sharedHeap = Sketches.heapifyUpdateSketch(sl.wseg);
assertEquals(sharedHeap.getClass().getSimpleName(), "HeapQuickSelectSketch");
- checkMemoryDirectProxyMethods(local, shared);
+ checkMemorySegmentDirectProxyMethods(local, shared);
checkOtherProxyMethods(local, shared);
checkOtherProxyMethods(local, sharedHeap);
@@ -90,8 +100,8 @@ public void checkHeapifyMemoryEstimating() {
public void checkHeapifyByteArrayExact() {
int lgK = 9;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -99,13 +109,13 @@ public void checkHeapifyByteArrayExact() {
waitForBgPropagationToComplete(shared);
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- Sketch recoveredShared = Sketch.heapify(srcMem);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ Sketch recoveredShared = Sketch.heapify(srcSeg);
//reconstruct to Native/Direct
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), k, 0.0);
@@ -127,8 +137,8 @@ public void checkHeapifyByteArrayEstimating() {
int k = 1 << lgK;
int u = 2*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -141,13 +151,13 @@ public void checkHeapifyByteArrayEstimating() {
assertEquals(local.isEstimationMode(), true);
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- Sketch recoveredShared = Sketch.heapify(srcMem);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ Sketch recoveredShared = Sketch.heapify(srcSeg);
//reconstruct to Native/Direct
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), uskEst);
@@ -159,14 +169,14 @@ public void checkHeapifyByteArrayEstimating() {
}
@Test
- public void checkWrapMemoryEst() {
+ public void checkWrapMemorySegmentEst() {
int lgK = 9;
int k = 1 << lgK;
int u = 2*k;
//boolean estimating = (u > k);
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
@@ -178,7 +188,7 @@ public void checkWrapMemoryEst() {
double sk1ub = local.getUpperBound(2);
assertTrue(local.isEstimationMode());
- Sketch local2 = Sketch.wrap(sl.wmem);
+ Sketch local2 = Sketch.wrap(sl.wseg);
assertEquals(local2.getEstimate(), sk1est);
assertEquals(local2.getLowerBound(2), sk1lb);
@@ -193,14 +203,14 @@ public void checkDQStoCompactForms() {
int k = 1 << lgK;
int u = 4*k;
//boolean estimating = (u > k);
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer");
assertFalse(local.isDirect());
- assertTrue(local.hasMemory());
+ assertTrue(local.hasMemorySegment());
for (int i=0; i lgNomLongs +1.
@Test
- public void checkResizeInBigMem() {
+ public void checkResizeInBigSeg() {
int lgK = 14;
int u = 1 << 20;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useMem, true, 8); //mem is 8X larger than needed
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, SEED, useSeg, true, 8); //seg is 8X larger than needed
UpdateSketch local = sl.local;
for (int i = 0; i < u; i++) { local.update(i); }
@@ -536,37 +546,37 @@ public void checkResizeInBigMem() {
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkConstructorKtooSmall() {
int lgK = 3;
- boolean useMem = true;
- new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ new SharedLocal(lgK, lgK, useSeg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
- public void checkConstructorMemTooSmall() {
+ public void checkConstructorSegTooSmall() {
int lgK = 4;
int k = 1 << lgK;
- WritableMemory wmem = WritableMemory.allocate(k/2);
+ MemorySegment wseg = MemorySegment.ofArray(new byte[k/2]);
UpdateSketchBuilder bldr = new UpdateSketchBuilder();
bldr.setLogNominalEntries(lgK);
- bldr.buildShared(wmem);
+ bldr.buildShared(wseg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifyIllegalFamilyID_heapify() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
- sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte
- //try to heapify the corrupted mem
- Sketch.heapify(sl.wmem); //catch in Sketch.constructHeapSketch
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Family ID byte
+ //try to heapify the corrupted seg
+ Sketch.heapify(sl.wseg); //catch in Sketch.constructHeapSketch
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkBadLgNomLongs() {
int lgK = 4;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
- sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte
- DirectQuickSelectSketch.writableWrap(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
+ sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte) 3); //Corrupt LgNomLongs byte
+ DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED);
}
@Test
@@ -574,8 +584,8 @@ public void checkBackgroundPropagation() {
int lgK = 4;
int k = 1 << lgK;
int u = 10*k;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -612,8 +622,8 @@ public void checkBackgroundPropagation() {
public void checkBadSerVer() {
int lgK = 9;
int k = 1 << lgK;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
UpdateSketch local = sl.local;
assertTrue(local.isEmpty());
@@ -625,60 +635,60 @@ public void checkBadSerVer() {
assertEquals(local.getEstimate(), k, 0.0);
assertEquals(shared.getRetainedEntries(false), k);
- sl.wmem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
- Sketch.wrap(sl.wmem);
+ sl.wseg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+ Sketch.wrap(sl.wseg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkWrapIllegalFamilyID_wrap() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
- sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
- //try to wrap the corrupted mem
- Sketch.wrap(sl.wmem); //catch in Sketch.constructDirectSketch
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ //try to wrap the corrupted seg
+ Sketch.wrap(sl.wseg); //catch in Sketch.constructDirectSketch
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkWrapIllegalFamilyID_direct() {
int lgK = 9;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
- sl.wmem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
- //try to wrap the corrupted mem
- DirectQuickSelectSketch.writableWrap(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ sl.wseg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ //try to wrap the corrupted seg
+ DirectQuickSelectSketch.writableWrap(sl.wseg, Util.DEFAULT_UPDATE_SEED);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifySeedConflict() {
int lgK = 9;
long seed1 = 1021;
- long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useMem, true, 1);
+ long seed2 = Util.DEFAULT_UPDATE_SEED;
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, seed1, useSeg, true, 1);
UpdateSketch shared = sl.shared;
- Memory srcMem = Memory.wrap(shared.toByteArray());
- Sketch.heapify(srcMem, seed2);
+ MemorySegment srcSeg = MemorySegment.ofArray(shared.toByteArray()).asReadOnly();
+ Sketch.heapify(srcSeg, seed2);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkCorruptLgNomLongs() {
int lgK = 4;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
- sl.wmem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt
- Sketch.heapify(sl.wmem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ sl.wseg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt
+ Sketch.heapify(sl.wseg, Util.DEFAULT_UPDATE_SEED);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
public void checkIllegalHashUpdate() {
int lgK = 4;
- boolean useMem = true;
- SharedLocal sl = new SharedLocal(lgK, lgK, useMem);
+ boolean useSeg = true;
+ SharedLocal sl = new SharedLocal(lgK, lgK, useSeg);
UpdateSketch shared = sl.shared;
shared.hashUpdate(1);
}
@@ -695,12 +705,14 @@ static void println(String s) {
//System.out.println(s); //disable here
}
- private static void checkMemoryDirectProxyMethods(Sketch local, Sketch shared) {
- assertEquals(local.hasMemory(), shared.hasMemory());
+ private static void checkMemorySegmentDirectProxyMethods(Sketch local, Sketch shared) {
+ assertEquals(
+ local.hasMemorySegment(),
+ shared.hasMemorySegment());
assertEquals(local.isDirect(), shared.isDirect());
}
- //Does not check hasMemory(), isDirect()
+ //Does not check hasMemorySegment(), isDirect()
private static void checkOtherProxyMethods(Sketch local, Sketch shared) {
assertEquals(local.getCompactBytes(), shared.getCompactBytes());
assertEquals(local.getCurrentBytes(), shared.getCurrentBytes());
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
index 84ddcb80e..e8c517afd 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.theta;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.LG_NOM_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
@@ -27,13 +28,23 @@
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
+import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.thetacommon.ThetaUtil;
+import org.apache.datasketches.common.Util;
+import org.apache.datasketches.theta.CompactSketch;
+import org.apache.datasketches.theta.ConcurrentHeapQuickSelectSketch;
+import org.apache.datasketches.theta.ConcurrentHeapThetaBuffer;
+import org.apache.datasketches.theta.ConcurrentPropagationService;
+import org.apache.datasketches.theta.ConcurrentSharedThetaSketch;
+import org.apache.datasketches.theta.HeapQuickSelectSketch;
+import org.apache.datasketches.theta.PreambleUtil;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.datasketches.theta.Sketches;
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
import org.testng.annotations.Test;
/**
@@ -63,12 +74,12 @@ public void checkBadSerVer() {
assertEquals(shared.getRetainedEntries(false), u);
byte[] serArr = shared.toByteArray();
- WritableMemory mem = WritableMemory.writableWrap(serArr);
- Sketch sk = Sketch.heapify(mem, sl.seed);
+ MemorySegment seg = MemorySegment.ofArray(serArr);
+ Sketch sk = Sketch.heapify(seg, sl.seed);
assertTrue(sk instanceof HeapQuickSelectSketch); //Intentional promotion to Parent
- mem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
- Sketch.heapify(mem, sl.seed);
+ seg.set(JAVA_BYTE, SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte
+ Sketch.heapify(seg, sl.seed);
}
@Test
@@ -111,22 +122,22 @@ public void checkIllegalSketchID_UpdateSketch() {
assertEquals(local.getEstimate(), u, 0.0);
assertEquals(shared.getRetainedEntries(false), u);
byte[] byteArray = shared.toByteArray();
- WritableMemory mem = WritableMemory.writableWrap(byteArray);
- mem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
+ MemorySegment seg = MemorySegment.ofArray(byteArray);
+ seg.set(JAVA_BYTE, FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte
- //try to heapify the corrupted mem
- Sketch.heapify(mem, sl.seed);
+ //try to heapify the corrupted seg
+ Sketch.heapify(seg, sl.seed);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkHeapifySeedConflict() {
int lgK = 9;
long seed = 1021;
- long seed2 = ThetaUtil.DEFAULT_UPDATE_SEED;
+ long seed2 = Util.DEFAULT_UPDATE_SEED;
SharedLocal sl = new SharedLocal(lgK, lgK, seed);
byte[] byteArray = sl.shared.toByteArray();
- Memory srcMem = Memory.wrap(byteArray);
- Sketch.heapify(srcMem, seed2);
+ MemorySegment srcSeg = MemorySegment.ofArray(byteArray);
+ Sketch.heapify(srcSeg, seed2);
}
@Test(expectedExceptions = SketchesArgumentException.class)
@@ -134,9 +145,9 @@ public void checkHeapifyCorruptLgNomLongs() {
int lgK = 4;
SharedLocal sl = new SharedLocal(lgK);
byte[] serArr = sl.shared.toByteArray();
- WritableMemory srcMem = WritableMemory.writableWrap(serArr);
- srcMem.putByte(LG_NOM_LONGS_BYTE, (byte)2); //corrupt
- Sketch.heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr);
+ srcSeg.set(JAVA_BYTE, LG_NOM_LONGS_BYTE, (byte)2); //corrupt
+ Sketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED);
}
@Test(expectedExceptions = UnsupportedOperationException.class)
@@ -161,13 +172,13 @@ public void checkHeapifyByteArrayExact() {
waitForBgPropagationToComplete(shared);
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- Sketch recoveredShared = Sketches.heapifyUpdateSketch(srcMem);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ Sketch recoveredShared = Sketches.heapifyUpdateSketch(srcSeg);
//reconstruct to Native/Direct
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch((UpdateSketch)recoveredShared, wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), u, 0.0);
@@ -200,12 +211,12 @@ public void checkHeapifyByteArrayEstimating() {
assertTrue(local.isEstimationMode());
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- UpdateSketch recoveredShared = UpdateSketch.heapify(srcMem, sl.seed);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, sl.seed);
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch(recoveredShared, wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), localEst);
assertEquals(local2.getLowerBound(2), localLB);
@@ -216,7 +227,7 @@ public void checkHeapifyByteArrayEstimating() {
}
@Test
- public void checkHeapifyMemoryEstimating() {
+ public void checkHeapifyMemorySegmentEstimating() {
int lgK = 9;
int k = 1 << lgK;
int u = 2*k; //thus estimating
@@ -235,16 +246,16 @@ public void checkHeapifyMemoryEstimating() {
double localUB = local.getUpperBound(2);
assertTrue(local.isEstimationMode());
assertFalse(local.isDirect());
- assertFalse(local.hasMemory());
+ assertFalse(local.hasMemorySegment());
byte[] serArr = shared.toByteArray();
- Memory srcMem = Memory.wrap(serArr);
- UpdateSketch recoveredShared = UpdateSketch.heapify(srcMem, ThetaUtil.DEFAULT_UPDATE_SEED);
+ MemorySegment srcSeg = MemorySegment.ofArray(serArr).asReadOnly();
+ UpdateSketch recoveredShared = UpdateSketch.heapify(srcSeg, Util.DEFAULT_UPDATE_SEED);
final int bytes = Sketch.getMaxUpdateSketchBytes(k);
- final WritableMemory wmem = WritableMemory.allocate(bytes);
- shared = sl.bldr.buildSharedFromSketch(recoveredShared, wmem);
+ final MemorySegment wseg = MemorySegment.ofArray(new byte[bytes]);
+ shared = sl.bldr.buildSharedFromSketch(recoveredShared, wseg);
UpdateSketch local2 = sl.bldr.buildLocal(shared);
assertEquals(local2.getEstimate(), localEst);
@@ -268,7 +279,7 @@ public void checkHQStoCompactForms() {
assertEquals(local.getClass().getSimpleName(), "ConcurrentHeapThetaBuffer");
assertFalse(local.isDirect());
- assertFalse(local.hasMemory());
+ assertFalse(local.hasMemorySegment());
for (int i=0; i k);
println("Est: "+est);
- final byte[] memArr3 = inter2.toByteArray();
- final WritableMemory srcMem2 = WritableMemory.writableWrap(memArr3);
- inter3 = Sketches.wrapIntersection(srcMem2);
+ final byte[] segArr3 = inter2.toByteArray();
+ final MemorySegment srcSeg2 = MemorySegment.ofArray(segArr3);
+ inter3 = Sketches.wrapIntersection(srcSeg2);
resultComp2 = inter3.getResult(false, null);
est2 = resultComp2.getEstimate();
println("Est2: "+est2);
@@ -660,15 +669,15 @@ public void checkWrap() {
@Test
public void checkDefaultMinSize() {
final int k = 32;
- final WritableMemory mem = WritableMemory.writableWrap(new byte[k*8 + PREBYTES]);
- IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem);
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkExceptionMinSize() {
final int k = 16;
- final WritableMemory mem = WritableMemory.writableWrap(new byte[k*8 + PREBYTES]);
- IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem);
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*8 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg);
}
@Test
@@ -676,11 +685,11 @@ public void checkGetResult() {
final int k = 1024;
final UpdateSketch sk = Sketches.updateSketchBuilder().build();
- final int memBytes = getMaxIntersectionBytes(k);
- final byte[] memArr = new byte[memBytes];
- final WritableMemory iMem = WritableMemory.writableWrap(memArr);
+ final int segBytes = getMaxIntersectionBytes(k);
+ final byte[] segArr = new byte[segBytes];
+ final MemorySegment iSeg = MemorySegment.ofArray(segArr);
- final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iMem);
+ final Intersection inter = Sketches.setOperationBuilder().buildIntersection(iSeg);
inter.intersect(sk);
final CompactSketch csk = inter.getResult();
assertEquals(csk.getCompactBytes(), 8);
@@ -690,31 +699,31 @@ public void checkGetResult() {
public void checkFamily() {
//cheap trick
final int k = 16;
- final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]);
- final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem);
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]);
+ final IntersectionImpl impl = IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg);
assertEquals(impl.getFamily(), Family.INTERSECTION);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkExceptions1() {
final int k = 16;
- final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]);
- IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem);
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg);
//corrupt SerVer
- mem.putByte(PreambleUtil.SER_VER_BYTE, (byte) 2);
- IntersectionImpl.wrapInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ seg.set(JAVA_BYTE, PreambleUtil.SER_VER_BYTE, (byte) 2);
+ IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false);
}
@Test(expectedExceptions = SketchesArgumentException.class)
public void checkExceptions2() {
final int k = 16;
- final WritableMemory mem = WritableMemory.writableWrap(new byte[k*16 + PREBYTES]);
- IntersectionImpl.initNewDirectInstance(ThetaUtil.DEFAULT_UPDATE_SEED, mem);
- //mem now has non-empty intersection
+ final MemorySegment seg = MemorySegment.ofArray(new byte[k*16 + PREBYTES]);
+ IntersectionImpl.initNewDirectInstance(Util.DEFAULT_UPDATE_SEED, seg);
+ //seg now has non-empty intersection
//corrupt empty and CurCount
- mem.setBits(PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK);
- mem.putInt(PreambleUtil.RETAINED_ENTRIES_INT, 2);
- IntersectionImpl.wrapInstance(mem, ThetaUtil.DEFAULT_UPDATE_SEED, false);
+ Util.setBits(seg, PreambleUtil.FLAGS_BYTE, (byte) PreambleUtil.EMPTY_FLAG_MASK);
+ seg.set(JAVA_INT_UNALIGNED, PreambleUtil.RETAINED_ENTRIES_INT, 2);
+ IntersectionImpl.wrapInstance(seg, Util.DEFAULT_UPDATE_SEED, false);
}
//Check Alex's bug intersecting 2 direct full sketches with only overlap of 2
@@ -722,26 +731,26 @@ public void checkExceptions2() {
@Test
public void checkOverlappedDirect() {
final int k = 1 << 4;
- final int memBytes = 2*k*16 +PREBYTES; //plenty of room
+ final int segBytes = 2*k*16 +PREBYTES; //plenty of room
final UpdateSketch sk1 = Sketches.updateSketchBuilder().setNominalEntries(k).build();
final UpdateSketch sk2 = Sketches.updateSketchBuilder().setNominalEntries(k).build();
for (int i=0; i