[5] MemorySegment Component, See - * JEP 454: Foreign Function & Memory API + * JEP 454: Foreign Function And Memory API * *
[6] MacBook Pro 2.3 GHz 8-Core Intel Core i9 * diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java index 63a81e047..af0579eef 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java @@ -250,7 +250,7 @@ static void checkDirectSegCapacity(final int k, final long n, final long segCapB /** * Checks a sketch's serial version and flags to see if the sketch can be wrapped as a * DirectCompactDoubleSketch. Throws an exception if the sketch is neither empty nor compact - * and ordered, unles the sketch uses serialization version 2. + * and ordered, unless the sketch uses serialization version 2. * @param serVer the serialization version * @param flags Flags from the sketch to evaluate */ diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java index 1025aeb86..4976039d5 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java @@ -384,7 +384,7 @@ static void checkDirectSegCapacity(final int k, final long n, final long segCapB static void checkCompact(final int serVer, final int flags) { final boolean compact = (serVer == 2) || ((flags & COMPACT_FLAG_MASK) > 0); if (compact) { - throw new SketchesArgumentException("Compact MemorySegment is not supported for Wrap Instance."); + throw new SketchesArgumentException("MemorySegment is in compact form and is not supported for this writableWrap Instance."); } } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index b8b0b7633..e14d5301e 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -151,29 +151,63 @@ public static DoublesSketch heapify(final MemorySegment srcSeg) { } /** - * Wrap this sketch around the given updatable MemorySegment image of a DoublesSketch, compact or updatable. + * Wrap this sketch around the given MemorySegment image of a compact, read-only DoublesSketch. * - * @param srcSeg the given MemorySegment image of a DoublesSketch that may have data - * @return a sketch that wraps the given srcSeg in read-only mode. + * @param srcSeg the given MemorySegment image of a compact, read-only DoublesSketch. + * @return a compact, read-only sketch that wraps the given MemorySegment. */ public static DoublesSketch wrap(final MemorySegment srcSeg) { + if (!checkIsMemorySegmentCompact(srcSeg)) { + throw new SketchesArgumentException( + "MemorySegment sketch image must be in compact form. " + + "Use {@link #writableWrap(MemorySegment writableWrap(...)} for updatable sketches."); + } + return DirectCompactDoublesSketch.wrapInstance(srcSeg); + } + + /** + * Wrap this sketch around the given MemorySegment image of an updatable DoublesSketch. + * + *
The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.
+ * + *NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap.
+ * + * @param srcSeg the given MemorySegment image of an UpdateDoublesSketch. + * @return an updatable sketch that wraps the given MemorySegment. + */ + public static DoublesSketch writableWrap(final MemorySegment srcSeg) { if (checkIsMemorySegmentCompact(srcSeg)) { - return DirectCompactDoublesSketch.wrapInstance(srcSeg); + throw new SketchesArgumentException( + "MemorySegment sketch image must be in updatable form. " + + "Use {@link #wrap(MemorySegment wrap(...)} for compact sketches."); } return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null); } /** - * Wrap this sketch around the given updatable MemorySegment image of a DoublesSketch, compact or updatable. + * Wrap this sketch around the given MemorySegment image of an updatable DoublesSketch. + * + *The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.
+ * + *NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap. It is up to the user to optionally extend this interface if more flexible + * handling of requests for more capacity is required.
* - * @param srcSeg the given MemorySegment image of a DoublesSketch that may have data. + * @param srcSeg the given MemorySegment image of a DoublesSketch. * @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand. * Otherwise, it can be null and the default MemorySegmentRequest will be used. - * @return a sketch that wraps the given srcSeg in read-only mode. + * @return a sketch that wraps the given MemorySegment. */ - public static DoublesSketch wrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) { + public static DoublesSketch writableWrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) { if (checkIsMemorySegmentCompact(srcSeg)) { - return DirectCompactDoublesSketch.wrapInstance(srcSeg); + throw new SketchesArgumentException("MemorySegment sketch image must be in updatable form."); } return DirectUpdateDoublesSketch.wrapInstance(srcSeg, mSegReq); } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java b/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java index 59c1021ea..0c7a4468f 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java @@ -20,6 +20,7 @@ package org.apache.datasketches.quantiles; import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.quantiles.ClassicUtil.checkIsMemorySegmentCompact; import static org.apache.datasketches.quantiles.DoublesUtil.copyToHeap; import java.lang.foreign.MemorySegment; @@ -111,8 +112,8 @@ static DoublesUnionImpl heapifyInstance(final MemorySegment srcSeg) { } /** - * Returns an updatable Union object that wraps the data of the given MemorySegment - * image of a updatable DoublesSketch. The data of the Union will remain in the MemorySegment. + * Returns an Union object that wraps the data of the given MemorySegment image of a UpdateDoublesSketch. + * The data of the Union will remain in the MemorySegment. * * @param srcSeg A MemorySegment image of an updatable DoublesSketch to be used as the data structure for the union and will be modified. * @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand. @@ -138,7 +139,12 @@ public void union(final DoublesSketch sketchIn) { @Override public void union(final MemorySegment seg) { Objects.requireNonNull(seg); - gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.wrap(seg, null)); + if (checkIsMemorySegmentCompact(seg)) { + gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.wrap(seg)); + } else { + gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.writableWrap(seg, null)); + } + gadget_.doublesSV = null; } diff --git a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java index cf3c8ac60..d511d8fa7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java @@ -34,22 +34,40 @@ public abstract class UpdateDoublesSketch extends DoublesSketch { } /** - * Wrap this sketch around the given MemorySegment image of an UpdateDoublesSketch. + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch. * - * @param srcSeg the given MemorySegment image of an UpdateDoublesSketch and must not be null. - * @return a sketch that wraps the given srcSeg + *The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.
+ * + *NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap.
+ * + * @param srcSeg a MemorySegment that contains sketch data. + * @return an instance of this sketch that wraps the given MemorySegment. */ public static UpdateDoublesSketch wrap(final MemorySegment srcSeg) { return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null); } /** - * Wrap this sketch around the given MemorySegment image of an UpdateDoublesSketch. + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch and including an + * optional, user defined {@link MemorySegmentRequest MemorySegmentRequest}. + * + *The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.
+ * + *NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap. It is up to the user to optionally extend this interface if more flexible + * handling of requests for more capacity is required.
* - * @param srcSeg the given MemorySegment image of an UpdateDoublesSketch and must not be null. + * @param srcSeg a MemorySegment that contains sketch data. * @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand. * Otherwise, it can be null and the default MemorySegmentRequest will be used. - * @return a sketch that wraps the given srcSeg + * @return an instance of this sketch that wraps the given MemorySegment. */ public static UpdateDoublesSketch wrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) { return DirectUpdateDoublesSketch.wrapInstance(srcSeg, mSegReq); diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java index 265357296..926600638 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java +++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java @@ -253,7 +253,7 @@ static MemorySegment loadCompactMemorySegment( } if (preLongs > 1) { insertCurCount(dstWSeg, curCount); - insertP(dstWSeg, (float) 1.0); + insertP(dstWSeg, (float) 0.0); //0.0 to be consistent with C++ } if (preLongs > 2) { insertThetaLong(dstWSeg, thetaLong); diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 26e263852..b3451fcd1 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -109,6 +109,36 @@ * 3 ||----------------------Start of Compact Long Array----------------------------------| * * + *The compressed CompactSketch has 8 bytes of preamble in exact mode because Theta can + * be assumed to be 1.0. In estimating mode, the 2nd 8 bytes is Theta as a Long. The following + * table assumes estimating mode. In any case the number of retained entries starts immediately + * after, followed immediately by the delta encoded compressed byte array.
+ * Unique to this table: + *+ * Long || Start Byte Adr: + * Adr: + * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + * 0 || Seed Hash | Flags | numEB | entBits| FamID | SerVer | PreLongs = 3 | + * + * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | + * 1 ||------------------------------THETA_LONG-------------------------------------------| + * + * || | | | (20) | (19) | (18) | (17) | 16 | + * 2 ||----------------Retained Entries stored as 1 to 4 bytes----------------------------| + * + * || | | | | | | | | + * 3 ||------------------Delta encoded compressed byte array------------------------------| + *+ * *
The UpdateSketch and AlphaSketch require 24 bytes of preamble followed by a non-compact * array of longs representing a hash table.
* diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java index 946eca5ca..973b42846 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java @@ -97,7 +97,7 @@ public void wrapEmptyCompactSketch() { final CompactDoublesSketch s1 = DoublesSketch.builder().build().compact(); final MemorySegment seg = MemorySegment.ofBuffer(ByteBuffer.wrap(s1.toByteArray()).order(ByteOrder.nativeOrder())); - final DoublesSketch s2 = DoublesSketch.wrap(seg, MemorySegmentRequest.DEFAULT); + final DoublesSketch s2 = DoublesSketch.wrap(seg); assertTrue(s2.isEmpty()); assertEquals(s2.getN(), 0); assertTrue(Double.isNaN(s2.isEmpty() ? Double.NaN : s2.getMinItem())); diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java index 6ced470e6..17c7b0384 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java @@ -230,7 +230,7 @@ public void serializeDeserialize() { final byte[] arr2 = sketch2.toByteArray(false); assertEquals(arr2.length, sketch2.getSerializedSizeBytes()); - final DoublesSketch sketch3 = DoublesSketch.wrap(MemorySegment.ofArray(arr2), null); + final DoublesSketch sketch3 = DoublesSketch.writableWrap(MemorySegment.ofArray(arr2), null); assertEquals(sketch3.getMinItem(), 0.0); assertEquals(sketch3.getMaxItem(), 1999.0); assertEquals(sketch3.getQuantile(0.5), 1000.0, 10.0); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java index 049ed7da8..40b349d84 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java @@ -44,7 +44,7 @@ public void wrapAndUpdating() { Assert.assertEquals(bytes.length, curBytes); //convert to MemorySegment final MemorySegment seg = MemorySegment.ofArray(bytes); - final UpdateDoublesSketch sk2 = (UpdateDoublesSketch) DoublesSketch.wrap(seg, null); + final UpdateDoublesSketch sk2 = (UpdateDoublesSketch) DoublesSketch.writableWrap(seg, null); assertEquals(seg.byteSize(), curBytes); sk2.update(3); sk2.update(4); @@ -60,10 +60,8 @@ public void wrapCompactSketch() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); s1.update(1); s1.update(2); - //MemorySegment seg = MemorySegment.ofArray(ByteBuffer.wrap(s1.compact().toByteArray()) - // .asReadOnlyBuffer().order(ByteOrder.nativeOrder()));); final MemorySegment seg = MemorySegment.ofArray(s1.compact().toByteArray()); - final DoublesSketch s2 = DoublesSketch.wrap(seg, null); // compact, so this is ok + final DoublesSketch s2 = DoublesSketch.wrap(seg); // compact, so this is ok assertEquals(s2.getMinItem(), 1.0); assertEquals(s2.getMaxItem(), 2.0); assertEquals(s2.getN(), 2); @@ -123,7 +121,7 @@ public void heapifyEmptyCompactSketch() { public void wrapEmptyUpdateSketch() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); final MemorySegment seg = MemorySegment.ofArray(s1.toByteArray()).asReadOnly(); - final UpdateDoublesSketch s2 = (UpdateDoublesSketch) DoublesSketch.wrap(seg, null); + final UpdateDoublesSketch s2 = (UpdateDoublesSketch) DoublesSketch.writableWrap(seg, null); assertTrue(s2.isEmpty()); // ensure the various put calls fail @@ -189,7 +187,7 @@ public void wrapEmptyUpdateSketch() { public void wrapEmptyCompactSketch() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); final MemorySegment seg = MemorySegment.ofArray(s1.compact().toByteArray()); - final DoublesSketch s2 = DoublesSketch.wrap(seg, null); // compact, so this is ok + final DoublesSketch s2 = DoublesSketch.wrap(seg); // compact, so this is ok Assert.assertTrue(s2.isEmpty()); } @@ -207,7 +205,7 @@ public void heapifyUnionFromSparse() { } @Test - public void heapifyUnionFromCompact() { + public void initializeUnionFromCompactSegment() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); s1.update(1); s1.update(2); @@ -219,6 +217,19 @@ public void heapifyUnionFromCompact() { Assert.assertEquals(s2.getMaxItem(), 3.0); } + @Test + public void unionFromUpdatableSegment() { + final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); + s1.update(1); + s1.update(2); + final MemorySegment seg = MemorySegment.ofArray(s1.toByteArray(false)); + final DoublesUnion u = DoublesUnion.wrap(seg); + u.update(3); + final DoublesSketch s2 = u.getResult(); + Assert.assertEquals(s2.getMinItem(), 1.0); + Assert.assertEquals(s2.getMaxItem(), 3.0); + } + @Test public void wrapUnionFromHeap() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); @@ -239,7 +250,7 @@ public void wrapUnionFromCompact() { s1.update(1); s1.update(2); final MemorySegment seg = MemorySegment.ofArray(s1.toByteArray(true)); - DoublesUnion.wrap(seg, null); //not from compact + DoublesUnion.wrap(seg, null); //compact seg fail(); } diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index 8d64c6bec..bcbb16cc2 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -43,7 +43,7 @@ public void heapToDirect() { for (int i = 0; i < 1000; i++) { heapSketch.update(i); } - final DoublesSketch directSketch = DoublesSketch.wrap(MemorySegment.ofArray(heapSketch.toByteArray(false)), null); + final DoublesSketch directSketch = DoublesSketch.writableWrap(MemorySegment.ofArray(heapSketch.toByteArray(false)), null); assertEquals(directSketch.getMinItem(), 0.0); assertEquals(directSketch.getMaxItem(), 999.0); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java index dd58f16e3..094b786d3 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java @@ -371,7 +371,7 @@ public void checkUnionQuantiles() { union.union(sketch2); union.union(sketch1); final MemorySegment seg = MemorySegment.ofArray(union.getResult().toByteArray(true)); - final DoublesSketch result = DoublesSketch.wrap(seg, null); + final DoublesSketch result = DoublesSketch.wrap(seg); assertEquals(result.getN(), n1 + n2); assertEquals(result.getK(), k); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java index 0124b39cb..91f86f74f 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java @@ -85,7 +85,7 @@ public void checkCopyToHeap() { // DirectCompactDoublesSketch final MemorySegment seg2 = MemorySegment.ofArray(hcds.toByteArray()); - final DirectCompactDoublesSketch dcds = (DirectCompactDoublesSketch) DoublesSketch.wrap(seg2, null); + final DirectCompactDoublesSketch dcds = (DirectCompactDoublesSketch) DoublesSketch.wrap(seg2); final HeapUpdateDoublesSketch target4 = DoublesUtil.copyToHeap(dcds); DoublesSketchTest.testSketchEquality(huds, dcds); DoublesSketchTest.testSketchEquality(dcds, target4); diff --git a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java index 0a4ed3c15..20ceab5ca 100644 --- a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java @@ -92,7 +92,7 @@ public void checkDoublesSketch() throws IOException { final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (final int n: nArr) { final byte[] byteArr = Files.readAllBytes(cppPath.resolve("quantiles_double_n" + n + "_cpp.sk")); - final DoublesSketch sk = DoublesSketch.wrap(MemorySegment.ofArray(byteArr), null); + final DoublesSketch sk = DoublesSketch.wrap(MemorySegment.ofArray(byteArr)); assertTrue(n == 0 ? sk.isEmpty() : !sk.isEmpty()); assertTrue(n > 128 ? sk.isEstimationMode() : !sk.isEstimationMode()); assertEquals(sk.getN(), n);