From 3e4eae07719491550c7195f2efe0f884225169c9 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 15 Aug 2025 15:27:10 -0700 Subject: [PATCH 01/11] Correcting javadocs --- .../quantiles/DirectCompactDoublesSketch.java | 2 +- .../quantiles/DirectUpdateDoublesSketch.java | 2 +- .../datasketches/quantiles/DoublesSketch.java | 48 +++++++++++++++---- .../quantiles/DoublesUnionImpl.java | 2 +- .../quantiles/UpdateDoublesSketch.java | 30 +++++++++--- 5 files changed, 66 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java index 63a81e047..af0579eef 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketch.java @@ -250,7 +250,7 @@ static void checkDirectSegCapacity(final int k, final long n, final long segCapB /** * Checks a sketch's serial version and flags to see if the sketch can be wrapped as a * DirectCompactDoubleSketch. Throws an exception if the sketch is neither empty nor compact - * and ordered, unles the sketch uses serialization version 2. + * and ordered, unless the sketch uses serialization version 2. * @param serVer the serialization version * @param flags Flags from the sketch to evaluate */ diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java index 1025aeb86..4976039d5 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketch.java @@ -384,7 +384,7 @@ static void checkDirectSegCapacity(final int k, final long n, final long segCapB static void checkCompact(final int serVer, final int flags) { final boolean compact = (serVer == 2) || ((flags & COMPACT_FLAG_MASK) > 0); if (compact) { - throw new SketchesArgumentException("Compact MemorySegment is not supported for Wrap Instance."); + throw new SketchesArgumentException("MemorySegment is in compact form and is not supported for this writableWrap Instance."); } } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index b8b0b7633..f39b6b69a 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -151,29 +151,59 @@ public static DoublesSketch heapify(final MemorySegment srcSeg) { } /** - * Wrap this sketch around the given updatable MemorySegment image of a DoublesSketch, compact or updatable. + * Wrap this sketch around the given MemorySegment image of a compact, read-only DoublesSketch. * - * @param srcSeg the given MemorySegment image of a DoublesSketch that may have data - * @return a sketch that wraps the given srcSeg in read-only mode. + * @param srcSeg the given MemorySegment image of a compact, read-only DoublesSketch. + * @return a compact, read-only sketch that wraps the given MemorySegment. */ public static DoublesSketch wrap(final MemorySegment srcSeg) { + if (!checkIsMemorySegmentCompact(srcSeg)) { + throw new SketchesArgumentException("MemorySegment sketch image must be in compact form."); + } + return DirectCompactDoublesSketch.wrapInstance(srcSeg); + } + + /** + * Wrap this sketch around the given MemorySegment image of an updatable DoublesSketch. + * + *

The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.

+ * + *

NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap.

+ * + * @param srcSeg the given MemorySegment image of an UpdateDoublesSketch. + * @return an updatable sketch that wraps the given MemorySegment. + */ + public static DoublesSketch writableWrap(final MemorySegment srcSeg) { if (checkIsMemorySegmentCompact(srcSeg)) { - return DirectCompactDoublesSketch.wrapInstance(srcSeg); + throw new SketchesArgumentException("MemorySegment sketch image must be in updatable form."); } return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null); } /** - * Wrap this sketch around the given updatable MemorySegment image of a DoublesSketch, compact or updatable. + * Wrap this sketch around the given MemorySegment image of an updatable DoublesSketch. + * + *

The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.

+ * + *

NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap. It is up to the user to optionally extend this interface if more flexible + * handling of requests for more capacity is required.

* - * @param srcSeg the given MemorySegment image of a DoublesSketch that may have data. + * @param srcSeg the given MemorySegment image of a DoublesSketch. * @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand. * Otherwise, it can be null and the default MemorySegmentRequest will be used. - * @return a sketch that wraps the given srcSeg in read-only mode. + * @return a sketch that wraps the given MemorySegment. */ - public static DoublesSketch wrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) { + public static DoublesSketch writableWrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) { if (checkIsMemorySegmentCompact(srcSeg)) { - return DirectCompactDoublesSketch.wrapInstance(srcSeg); + throw new SketchesArgumentException("MemorySegment sketch image must be in updatable form."); } return DirectUpdateDoublesSketch.wrapInstance(srcSeg, mSegReq); } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java b/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java index 59c1021ea..2f2c55bbf 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java @@ -138,7 +138,7 @@ public void union(final DoublesSketch sketchIn) { @Override public void union(final MemorySegment seg) { Objects.requireNonNull(seg); - gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.wrap(seg, null)); + gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.writableWrap(seg, null)); gadget_.doublesSV = null; } diff --git a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java index cf3c8ac60..d511d8fa7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java @@ -34,22 +34,40 @@ public abstract class UpdateDoublesSketch extends DoublesSketch { } /** - * Wrap this sketch around the given MemorySegment image of an UpdateDoublesSketch. + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch. * - * @param srcSeg the given MemorySegment image of an UpdateDoublesSketch and must not be null. - * @return a sketch that wraps the given srcSeg + *

The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.

+ * + *

NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap.

+ * + * @param srcSeg a MemorySegment that contains sketch data. + * @return an instance of this sketch that wraps the given MemorySegment. */ public static UpdateDoublesSketch wrap(final MemorySegment srcSeg) { return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null); } /** - * Wrap this sketch around the given MemorySegment image of an UpdateDoublesSketch. + * Wrap a sketch around the given source MemorySegment containing sketch data that originated from this sketch and including an + * optional, user defined {@link MemorySegmentRequest MemorySegmentRequest}. + * + *

The given MemorySegment must be writable and it must contain a UpdateDoublesSketch. + * The sketch will be updated and managed totally within the MemorySegment. If the given source + * MemorySegment is created off-heap, then all the management of the sketch's internal data will be off-heap as well.

+ * + *

NOTE:If during updating of the sketch the sketch requires more capacity than the given size of the MemorySegment, the sketch + * will request more capacity using the {@link MemorySegmentRequest MemorySegmentRequest} interface. The default of this interface will + * return a new MemorySegment on the heap. It is up to the user to optionally extend this interface if more flexible + * handling of requests for more capacity is required.

* - * @param srcSeg the given MemorySegment image of an UpdateDoublesSketch and must not be null. + * @param srcSeg a MemorySegment that contains sketch data. * @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand. * Otherwise, it can be null and the default MemorySegmentRequest will be used. - * @return a sketch that wraps the given srcSeg + * @return an instance of this sketch that wraps the given MemorySegment. */ public static UpdateDoublesSketch wrap(final MemorySegment srcSeg, final MemorySegmentRequest mSegReq) { return DirectUpdateDoublesSketch.wrapInstance(srcSeg, mSegReq); From 0795a6ea0b3bc63c63cfd1e764ced152429bf99d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 15 Aug 2025 17:30:03 -0700 Subject: [PATCH 02/11] Improve & correct Javadocs Add some missing methods. --- .github/workflows/auto-jdk-matrix.yml | 2 +- .github/workflows/auto-os-matrix.yml | 2 +- .github/workflows/check_cpp_files.yml | 2 +- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/javadoc.yml | 2 +- .../quantiles/DoublesUnionImpl.java | 12 ++++++--- .../DirectCompactDoublesSketchTest.java | 2 +- .../DirectUpdateDoublesSketchTest.java | 2 +- .../quantiles/DoublesMiscTest.java | 27 +++++++++++++------ .../quantiles/DoublesSketchTest.java | 2 +- .../quantiles/DoublesUnionImplTest.java | 2 +- .../quantiles/DoublesUtilTest.java | 2 +- .../QuantilesSketchCrossLanguageTest.java | 2 +- 13 files changed, 39 insertions(+), 22 deletions(-) diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index a556d3e82..2ad97017c 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - jdk: [ 21 ] + jdk: [ 24 ] env: JDK_VERSION: ${{ matrix.jdk }} diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml index 413b7a957..4aca36642 100644 --- a/.github/workflows/auto-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -21,7 +21,7 @@ jobs: fail-fast: false matrix: - jdk: [ 21 ] + jdk: [ 24 ] os: [ windows-latest, ubuntu-latest, macos-latest ] include: - os: windows-latest diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml index 778859d0d..de53004f6 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/check_cpp_files.yml @@ -27,7 +27,7 @@ jobs: - name: Setup Java uses: actions/setup-java@v4 with: - java-version: '21' + java-version: '24' distribution: 'temurin' - name: Configure C++ build diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f3fde1de0..d934c4878 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -35,7 +35,7 @@ jobs: with: distribution: 'temurin' cache: 'maven' - java-version: '21' + java-version: '24' - name: Initialize CodeQL uses: github/codeql-action/init@v3 diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 977c87b27..42ee0981c 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -16,7 +16,7 @@ jobs: - name: Setup Java uses: actions/setup-java@v4 with: - java-version: '21' + java-version: '24' distribution: 'temurin' - name: Echo Java Version diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java b/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java index 2f2c55bbf..0c7a4468f 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesUnionImpl.java @@ -20,6 +20,7 @@ package org.apache.datasketches.quantiles; import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.quantiles.ClassicUtil.checkIsMemorySegmentCompact; import static org.apache.datasketches.quantiles.DoublesUtil.copyToHeap; import java.lang.foreign.MemorySegment; @@ -111,8 +112,8 @@ static DoublesUnionImpl heapifyInstance(final MemorySegment srcSeg) { } /** - * Returns an updatable Union object that wraps the data of the given MemorySegment - * image of a updatable DoublesSketch. The data of the Union will remain in the MemorySegment. + * Returns an Union object that wraps the data of the given MemorySegment image of a UpdateDoublesSketch. + * The data of the Union will remain in the MemorySegment. * * @param srcSeg A MemorySegment image of an updatable DoublesSketch to be used as the data structure for the union and will be modified. * @param mSegReq the MemorySegmentRequest used if the given MemorySegment needs to expand. @@ -138,7 +139,12 @@ public void union(final DoublesSketch sketchIn) { @Override public void union(final MemorySegment seg) { Objects.requireNonNull(seg); - gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.writableWrap(seg, null)); + if (checkIsMemorySegmentCompact(seg)) { + gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.wrap(seg)); + } else { + gadget_ = updateLogic(maxK_, gadget_, DoublesSketch.writableWrap(seg, null)); + } + gadget_.doublesSV = null; } diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java index 946eca5ca..973b42846 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectCompactDoublesSketchTest.java @@ -97,7 +97,7 @@ public void wrapEmptyCompactSketch() { final CompactDoublesSketch s1 = DoublesSketch.builder().build().compact(); final MemorySegment seg = MemorySegment.ofBuffer(ByteBuffer.wrap(s1.toByteArray()).order(ByteOrder.nativeOrder())); - final DoublesSketch s2 = DoublesSketch.wrap(seg, MemorySegmentRequest.DEFAULT); + final DoublesSketch s2 = DoublesSketch.wrap(seg); assertTrue(s2.isEmpty()); assertEquals(s2.getN(), 0); assertTrue(Double.isNaN(s2.isEmpty() ? Double.NaN : s2.getMinItem())); diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java index 6ced470e6..17c7b0384 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectUpdateDoublesSketchTest.java @@ -230,7 +230,7 @@ public void serializeDeserialize() { final byte[] arr2 = sketch2.toByteArray(false); assertEquals(arr2.length, sketch2.getSerializedSizeBytes()); - final DoublesSketch sketch3 = DoublesSketch.wrap(MemorySegment.ofArray(arr2), null); + final DoublesSketch sketch3 = DoublesSketch.writableWrap(MemorySegment.ofArray(arr2), null); assertEquals(sketch3.getMinItem(), 0.0); assertEquals(sketch3.getMaxItem(), 1999.0); assertEquals(sketch3.getQuantile(0.5), 1000.0, 10.0); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java index 049ed7da8..40b349d84 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesMiscTest.java @@ -44,7 +44,7 @@ public void wrapAndUpdating() { Assert.assertEquals(bytes.length, curBytes); //convert to MemorySegment final MemorySegment seg = MemorySegment.ofArray(bytes); - final UpdateDoublesSketch sk2 = (UpdateDoublesSketch) DoublesSketch.wrap(seg, null); + final UpdateDoublesSketch sk2 = (UpdateDoublesSketch) DoublesSketch.writableWrap(seg, null); assertEquals(seg.byteSize(), curBytes); sk2.update(3); sk2.update(4); @@ -60,10 +60,8 @@ public void wrapCompactSketch() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); s1.update(1); s1.update(2); - //MemorySegment seg = MemorySegment.ofArray(ByteBuffer.wrap(s1.compact().toByteArray()) - // .asReadOnlyBuffer().order(ByteOrder.nativeOrder()));); final MemorySegment seg = MemorySegment.ofArray(s1.compact().toByteArray()); - final DoublesSketch s2 = DoublesSketch.wrap(seg, null); // compact, so this is ok + final DoublesSketch s2 = DoublesSketch.wrap(seg); // compact, so this is ok assertEquals(s2.getMinItem(), 1.0); assertEquals(s2.getMaxItem(), 2.0); assertEquals(s2.getN(), 2); @@ -123,7 +121,7 @@ public void heapifyEmptyCompactSketch() { public void wrapEmptyUpdateSketch() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); final MemorySegment seg = MemorySegment.ofArray(s1.toByteArray()).asReadOnly(); - final UpdateDoublesSketch s2 = (UpdateDoublesSketch) DoublesSketch.wrap(seg, null); + final UpdateDoublesSketch s2 = (UpdateDoublesSketch) DoublesSketch.writableWrap(seg, null); assertTrue(s2.isEmpty()); // ensure the various put calls fail @@ -189,7 +187,7 @@ public void wrapEmptyUpdateSketch() { public void wrapEmptyCompactSketch() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); final MemorySegment seg = MemorySegment.ofArray(s1.compact().toByteArray()); - final DoublesSketch s2 = DoublesSketch.wrap(seg, null); // compact, so this is ok + final DoublesSketch s2 = DoublesSketch.wrap(seg); // compact, so this is ok Assert.assertTrue(s2.isEmpty()); } @@ -207,7 +205,7 @@ public void heapifyUnionFromSparse() { } @Test - public void heapifyUnionFromCompact() { + public void initializeUnionFromCompactSegment() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); s1.update(1); s1.update(2); @@ -219,6 +217,19 @@ public void heapifyUnionFromCompact() { Assert.assertEquals(s2.getMaxItem(), 3.0); } + @Test + public void unionFromUpdatableSegment() { + final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); + s1.update(1); + s1.update(2); + final MemorySegment seg = MemorySegment.ofArray(s1.toByteArray(false)); + final DoublesUnion u = DoublesUnion.wrap(seg); + u.update(3); + final DoublesSketch s2 = u.getResult(); + Assert.assertEquals(s2.getMinItem(), 1.0); + Assert.assertEquals(s2.getMaxItem(), 3.0); + } + @Test public void wrapUnionFromHeap() { final UpdateDoublesSketch s1 = DoublesSketch.builder().build(); @@ -239,7 +250,7 @@ public void wrapUnionFromCompact() { s1.update(1); s1.update(2); final MemorySegment seg = MemorySegment.ofArray(s1.toByteArray(true)); - DoublesUnion.wrap(seg, null); //not from compact + DoublesUnion.wrap(seg, null); //compact seg fail(); } diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index 8d64c6bec..bcbb16cc2 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -43,7 +43,7 @@ public void heapToDirect() { for (int i = 0; i < 1000; i++) { heapSketch.update(i); } - final DoublesSketch directSketch = DoublesSketch.wrap(MemorySegment.ofArray(heapSketch.toByteArray(false)), null); + final DoublesSketch directSketch = DoublesSketch.writableWrap(MemorySegment.ofArray(heapSketch.toByteArray(false)), null); assertEquals(directSketch.getMinItem(), 0.0); assertEquals(directSketch.getMaxItem(), 999.0); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java index dd58f16e3..094b786d3 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesUnionImplTest.java @@ -371,7 +371,7 @@ public void checkUnionQuantiles() { union.union(sketch2); union.union(sketch1); final MemorySegment seg = MemorySegment.ofArray(union.getResult().toByteArray(true)); - final DoublesSketch result = DoublesSketch.wrap(seg, null); + final DoublesSketch result = DoublesSketch.wrap(seg); assertEquals(result.getN(), n1 + n2); assertEquals(result.getK(), k); diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java index 0124b39cb..91f86f74f 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesUtilTest.java @@ -85,7 +85,7 @@ public void checkCopyToHeap() { // DirectCompactDoublesSketch final MemorySegment seg2 = MemorySegment.ofArray(hcds.toByteArray()); - final DirectCompactDoublesSketch dcds = (DirectCompactDoublesSketch) DoublesSketch.wrap(seg2, null); + final DirectCompactDoublesSketch dcds = (DirectCompactDoublesSketch) DoublesSketch.wrap(seg2); final HeapUpdateDoublesSketch target4 = DoublesUtil.copyToHeap(dcds); DoublesSketchTest.testSketchEquality(huds, dcds); DoublesSketchTest.testSketchEquality(dcds, target4); diff --git a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java index 0a4ed3c15..20ceab5ca 100644 --- a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java @@ -92,7 +92,7 @@ public void checkDoublesSketch() throws IOException { final int[] nArr = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; for (final int n: nArr) { final byte[] byteArr = Files.readAllBytes(cppPath.resolve("quantiles_double_n" + n + "_cpp.sk")); - final DoublesSketch sk = DoublesSketch.wrap(MemorySegment.ofArray(byteArr), null); + final DoublesSketch sk = DoublesSketch.wrap(MemorySegment.ofArray(byteArr)); assertTrue(n == 0 ? sk.isEmpty() : !sk.isEmpty()); assertTrue(n > 128 ? sk.isEstimationMode() : !sk.isEstimationMode()); assertEquals(sk.getN(), n); From 6e378475afe80d3ebb06e805bbed85e81ad6faf3 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 16 Aug 2025 13:25:15 -0700 Subject: [PATCH 03/11] try fix POM error --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8990e1eb0..e15eaa08e 100644 --- a/pom.xml +++ b/pom.xml @@ -232,9 +232,10 @@ under the License. true public all,-missing + From 31a31a7a8d36948c3e5485bd2d96a95379bff940 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 16 Aug 2025 13:33:03 -0700 Subject: [PATCH 04/11] Fix javadoc errors --- src/main/java/org/apache/datasketches/common/Util.java | 2 +- src/main/java/org/apache/datasketches/hll/package-info.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 9bed9a804..f210e83de 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -432,7 +432,7 @@ public static long floorPowerOf2(final long n) { } /** - * This is a long integer equivalent to Math.ceil(n / (double)(1 << k)) + * This is a long integer equivalent to Math.ceil(n / (double)(1 << k)) * where: 0 < k ≤ 6 and n is a non-negative long. * These limits are not checked for speed reasons. * @param n the input dividend as a positive long greater than zero. diff --git a/src/main/java/org/apache/datasketches/hll/package-info.java b/src/main/java/org/apache/datasketches/hll/package-info.java index 602594b10..114d4da96 100644 --- a/src/main/java/org/apache/datasketches/hll/package-info.java +++ b/src/main/java/org/apache/datasketches/hll/package-info.java @@ -147,7 +147,7 @@ * arXiv 1708.06839, August 22, 2017, Yahoo Research. * *

[5] MemorySegment Component, See - * JEP 454: Foreign Function & Memory API + * JEP 454: Foreign Function And Memory API * *

[6] MacBook Pro 2.3 GHz 8-Core Intel Core i9 * From 0b43a8083bef4d106523469fa0b60ed3bdfdf7dc Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 16 Aug 2025 14:28:52 -0700 Subject: [PATCH 05/11] try update javaoc.yml --- .github/workflows/javadoc.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 42ee0981c..28bd3eb1e 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -5,6 +5,9 @@ on: # branches: main workflow_dispatch: +permissions: + contents: write + jobs: javadoc: runs-on: ubuntu-latest @@ -30,7 +33,7 @@ jobs: run: mvn javadoc:javadoc - name: Deploy JavaDoc - uses: JamesIves/github-pages-deploy-action@v4.6.8 + uses: JamesIves/github-pages-deploy-action@v4 with: token: ${{ secrets.GITHUB_TOKEN }} folder: target/reports/apidocs From 610c6ad2fb08e4b5a30d3c3915e93d0254aceaeb Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 16 Aug 2025 16:17:51 -0700 Subject: [PATCH 06/11] try again --- .github/workflows/javadoc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 28bd3eb1e..0d6837d75 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -33,7 +33,7 @@ jobs: run: mvn javadoc:javadoc - name: Deploy JavaDoc - uses: JamesIves/github-pages-deploy-action@v4 + uses: JamesIves/github-pages-deploy-action@881db5376404c5c8d621010bcbec0310b58d5e29 with: token: ${{ secrets.GITHUB_TOKEN }} folder: target/reports/apidocs From 644291bc2102f25e3357356f28006e74c8b61271 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 16 Aug 2025 16:24:24 -0700 Subject: [PATCH 07/11] Re-enable all GHA workflows --- .github/workflows/auto-jdk-matrix.yml | 12 ++++++------ .github/workflows/auto-os-matrix.yml | 14 +++++++------- .github/workflows/check_cpp_files.yml | 14 +++++++------- .github/workflows/codeql-analysis.yml | 14 +++++++------- .github/workflows/javadoc.yml | 4 ++-- 5 files changed, 29 insertions(+), 29 deletions(-) diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 2ad97017c..f5ba78463 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -1,12 +1,12 @@ name: Auto JDK Matrix Test & Install on: -# push: -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml index 4aca36642..abb9baec3 100644 --- a/.github/workflows/auto-os-matrix.yml +++ b/.github/workflows/auto-os-matrix.yml @@ -1,13 +1,13 @@ name: Auto OS Matrix Test & Install on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: env: diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml index de53004f6..e433dcb87 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/check_cpp_files.yml @@ -1,13 +1,13 @@ name: CPP SerDe Compatibility Test on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index d934c4878..d8114578a 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -1,13 +1,13 @@ name: "CodeQL" on: -# push: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] -# pull_request: -# paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] -# # The branches below must be a subset of the branches above -# branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + push: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] + pull_request: + paths-ignore: [ '**/*.html', '**/*.md', '**/*.txt', '**/*.xml', '**/*.yaml', '**/*.yml', '**/LICENSE', '**/NOTICE' ] + # The branches below must be a subset of the branches above + branches: [ 'main', '[0-9]+.[0-9]+.[Xx]' ] workflow_dispatch: jobs: diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml index 0d6837d75..2fef93616 100644 --- a/.github/workflows/javadoc.yml +++ b/.github/workflows/javadoc.yml @@ -1,8 +1,8 @@ name: JavaDoc on: -# push: -# branches: main + push: + branches: main workflow_dispatch: permissions: From dd45187cf8995fc9860bc35bd3a7083a69f8042f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 16 Aug 2025 17:32:30 -0700 Subject: [PATCH 08/11] fixed problem in POM. --- pom.xml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pom.xml b/pom.xml index e15eaa08e..972d21449 100644 --- a/pom.xml +++ b/pom.xml @@ -232,10 +232,6 @@ under the License. true public all,-missing - From 8044090a2869696933912a88d0b9c1eaea1f9300 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 18 Aug 2025 10:36:43 -0700 Subject: [PATCH 09/11] Set P to zero for compact sketches. Added storage layout documentation for the compact compressed sketch. --- .../datasketches/theta/CompactOperations.java | 2 +- .../datasketches/theta/PreambleUtil.java | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java index 265357296..926600638 100644 --- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java +++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java @@ -253,7 +253,7 @@ static MemorySegment loadCompactMemorySegment( } if (preLongs > 1) { insertCurCount(dstWSeg, curCount); - insertP(dstWSeg, (float) 1.0); + insertP(dstWSeg, (float) 0.0); //0.0 to be consistent with C++ } if (preLongs > 2) { insertThetaLong(dstWSeg, thetaLong); diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 26e263852..b3451fcd1 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -109,6 +109,36 @@ * 3 ||----------------------Start of Compact Long Array----------------------------------| * * + *

The compressed CompactSketch has 8 bytes of preamble in exact mode because Theta can + * be assumed to be 1.0. In estimating mode, the 2nd 8 bytes is Theta as a Long. The following + * table assumes estimating mode. In any case the number of retained entries starts immediately + * after, followed immediately by the delta encoded compressed byte array.

+ * Unique to this table: + *
  • Byte 3: entryBits (entBits): max number of bits for any one 64 bit hash not + * including leading zeros. A value in the range [1,63].
  • + *
  • Byte 4: numEntriesBytes (numEB): number of bytes required to hold the integer of number + * of retained entries not including leading zero bytes. A value in the range [1,4].
  • + *
  • The number of retained entries is stored starting at byte 16 (assuming estimating mode) + * and may extend through bytes 17, 18 and 19. In any case, the delta encoded compressed array + * starts immediately after and could start at byte 17, 18, 19 or 20.
  • + *
+ * + *
+ * Long || Start Byte Adr:
+ * Adr:
+ *      ||    7   |    6   |    5   |    4   |    3   |    2   |    1   |     0              |
+ *  0   ||    Seed Hash    | Flags  | numEB  | entBits| FamID  | SerVer |     PreLongs = 3   |
+ *
+ *      ||   15   |   14   |   13   |   12   |   11   |   10   |    9   |     8              |
+ *  1   ||------------------------------THETA_LONG-------------------------------------------|
+ *
+ *      ||        |        |        |  (20)  |  (19)  |  (18)  |  (17)  |    16              |
+ *  2   ||----------------Retained Entries stored as 1 to 4 bytes----------------------------|
+ *
+ *      ||        |        |        |        |        |        |        |                    |
+ *  3   ||------------------Delta encoded compressed byte array------------------------------|
+ *  
+ * *

The UpdateSketch and AlphaSketch require 24 bytes of preamble followed by a non-compact * array of longs representing a hash table.

* From 3f6d7209aa97e55b784979a2b897c213f12ab43d Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 20 Aug 2025 10:27:23 -0700 Subject: [PATCH 10/11] Improve error messages. --- .../org/apache/datasketches/quantiles/DoublesSketch.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index f39b6b69a..a6daef9d7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -158,7 +158,9 @@ public static DoublesSketch heapify(final MemorySegment srcSeg) { */ public static DoublesSketch wrap(final MemorySegment srcSeg) { if (!checkIsMemorySegmentCompact(srcSeg)) { - throw new SketchesArgumentException("MemorySegment sketch image must be in compact form."); + throw new SketchesArgumentException( + "MemorySegment sketch image must be in compact form. " + + "Use {@link #writableWrap(MemorySegment writableWrap(...)} for updatable sketches."); } return DirectCompactDoublesSketch.wrapInstance(srcSeg); } @@ -179,7 +181,9 @@ public static DoublesSketch wrap(final MemorySegment srcSeg) { */ public static DoublesSketch writableWrap(final MemorySegment srcSeg) { if (checkIsMemorySegmentCompact(srcSeg)) { - throw new SketchesArgumentException("MemorySegment sketch image must be in updatable form."); + throw new SketchesArgumentException( + "MemorySegment sketch image must be in updatable form. " + + "Use {@link #wrap(MemorySegment writableWrap(...)} for compact sketches."); } return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null); } From c38514a1fa408026524e1c11ab59452628f4118b Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 20 Aug 2025 10:33:30 -0700 Subject: [PATCH 11/11] Fix typo --- .../org/apache/datasketches/quantiles/DoublesSketch.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index a6daef9d7..e14d5301e 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -159,7 +159,7 @@ public static DoublesSketch heapify(final MemorySegment srcSeg) { public static DoublesSketch wrap(final MemorySegment srcSeg) { if (!checkIsMemorySegmentCompact(srcSeg)) { throw new SketchesArgumentException( - "MemorySegment sketch image must be in compact form. " + "MemorySegment sketch image must be in compact form. " + "Use {@link #writableWrap(MemorySegment writableWrap(...)} for updatable sketches."); } return DirectCompactDoublesSketch.wrapInstance(srcSeg); @@ -182,8 +182,8 @@ public static DoublesSketch wrap(final MemorySegment srcSeg) { public static DoublesSketch writableWrap(final MemorySegment srcSeg) { if (checkIsMemorySegmentCompact(srcSeg)) { throw new SketchesArgumentException( - "MemorySegment sketch image must be in updatable form. " - + "Use {@link #wrap(MemorySegment writableWrap(...)} for compact sketches."); + "MemorySegment sketch image must be in updatable form. " + + "Use {@link #wrap(MemorySegment wrap(...)} for compact sketches."); } return DirectUpdateDoublesSketch.wrapInstance(srcSeg, null); }