From 7fedb110f7e886db8e679805335e6950d741b498 Mon Sep 17 00:00:00 2001 From: Zhuo Wang Date: Thu, 8 Jan 2026 17:34:35 +0800 Subject: [PATCH] feat: Impl Transform::ToHumanString --- src/iceberg/partition_spec.cc | 5 +- src/iceberg/test/CMakeLists.txt | 1 + src/iceberg/test/location_provider_test.cc | 2 +- src/iceberg/test/meson.build | 1 + src/iceberg/test/partition_spec_test.cc | 5 +- .../test/transform_human_string_test.cc | 185 ++++++++++++++++++ src/iceberg/transform.cc | 45 +++++ src/iceberg/transform.h | 6 + 8 files changed, 243 insertions(+), 7 deletions(-) create mode 100644 src/iceberg/test/transform_human_string_test.cc diff --git a/src/iceberg/partition_spec.cc b/src/iceberg/partition_spec.cc index 9c38d0c53..c00eab7d2 100644 --- a/src/iceberg/partition_spec.cc +++ b/src/iceberg/partition_spec.cc @@ -111,9 +111,8 @@ Result PartitionSpec::PartitionPath(const PartitionValues& data) co if (i > 0) { ss << "/"; } - // TODO(zhuo.wang): transform for partition value, will be fixed after transform util - // is ready - std::string partition_value = value.get().ToString(); + ICEBERG_ASSIGN_OR_RAISE(auto partition_value, + fields_[i].transform()->ToHumanString(value)); ss << UrlEncoder::Encode(fields_[i].name()) << "=" << UrlEncoder::Encode(partition_value); } diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 6124b6bcf..4f4516c7b 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -68,6 +68,7 @@ add_iceberg_test(schema_test schema_util_test.cc sort_field_test.cc sort_order_test.cc + transform_human_string_test.cc transform_test.cc type_test.cc) diff --git a/src/iceberg/test/location_provider_test.cc b/src/iceberg/test/location_provider_test.cc index b287ded76..c78eb588e 100644 --- a/src/iceberg/test/location_provider_test.cc +++ b/src/iceberg/test/location_provider_test.cc @@ -112,7 +112,7 @@ TEST_F(LocationProviderTest, ObjectStorageWithPartition) { std::vector parts = SplitString(location, '/'); ASSERT_GT(parts.size(), 2); - EXPECT_EQ("data%231=%22val%231%22", parts[parts.size() - 2]); + EXPECT_EQ("data%231=val%231", parts[parts.size() - 2]); } TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) { diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index 95c689620..791340be7 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -40,6 +40,7 @@ iceberg_tests = { 'schema_util_test.cc', 'sort_field_test.cc', 'sort_order_test.cc', + 'transform_human_string_test.cc', 'transform_test.cc', 'type_test.cc', ), diff --git a/src/iceberg/test/partition_spec_test.cc b/src/iceberg/test/partition_spec_test.cc index ea3ea6e12..6f1b49952 100644 --- a/src/iceberg/test/partition_spec_test.cc +++ b/src/iceberg/test/partition_spec_test.cc @@ -458,8 +458,7 @@ TEST(PartitionSpecTest, PartitionPath) { PartitionValues part_data( {Literal::Int(123), Literal::String("val2"), Literal::Date(19489)}); ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data)); - std::string expected = - "id_partition=123/name_partition=%22val2%22/ts_partition=19489"; + std::string expected = "id_partition=123/name_partition=val2/ts_partition=2023-05-12"; EXPECT_EQ(expected, path); } @@ -469,7 +468,7 @@ TEST(PartitionSpecTest, PartitionPath) { {Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)}); ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data)); std::string expected = - "id_partition=123/name_partition=%22val%232%22/ts_partition=19489"; + "id_partition=123/name_partition=val%232/ts_partition=2023-05-12"; EXPECT_EQ(expected, path); } } diff --git a/src/iceberg/test/transform_human_string_test.cc b/src/iceberg/test/transform_human_string_test.cc new file mode 100644 index 000000000..76aa218c0 --- /dev/null +++ b/src/iceberg/test/transform_human_string_test.cc @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include + +#include + +#include "iceberg/test/matchers.h" +#include "iceberg/transform.h" + +namespace iceberg { + +struct HumanStringTestParam { + std::string test_name; + std::shared_ptr source_type; + Literal literal; + std::vector expecteds; +}; + +class IdentityHumanStringTest : public ::testing::TestWithParam { + protected: + std::vector> transforms_{{Transform::Identity()}}; +}; + +TEST_P(IdentityHumanStringTest, ToHumanString) { + const auto& param = GetParam(); + for (int32_t i = 0; i < transforms_.size(); ++i) { + EXPECT_THAT(transforms_[i]->ToHumanString(param.literal), + HasValue(::testing::Eq(param.expecteds[i]))); + } +} + +INSTANTIATE_TEST_SUITE_P( + IdentityHumanStringTestCases, IdentityHumanStringTest, + ::testing::Values( + HumanStringTestParam{.test_name = "Null", + .literal = Literal::Null(std::make_shared()), + .expecteds{"null"}}, + HumanStringTestParam{.test_name = "Binary", + .literal = Literal::Binary(std::vector{1, 2, 3}), + .expecteds{"AQID"}}, + HumanStringTestParam{.test_name = "Fixed", + .literal = Literal::Fixed(std::vector{1, 2, 3}), + .expecteds{"AQID"}}, + HumanStringTestParam{.test_name = "Date", + .literal = Literal::Date(17501), + .expecteds{"2017-12-01"}}, + HumanStringTestParam{.test_name = "Time", + .literal = Literal::Time(36775038194), + .expecteds{"10:12:55.038194"}}, + HumanStringTestParam{.test_name = "TimestampWithZone", + .literal = Literal::TimestampTz(1512151975038194), + .expecteds{"2017-12-01T18:12:55.038194+00:00"}}, + HumanStringTestParam{.test_name = "TimestampWithoutZone", + .literal = Literal::Timestamp(1512123175038194), + .expecteds{"2017-12-01T10:12:55.038194"}}, + HumanStringTestParam{.test_name = "Long", + .literal = Literal::Long(-1234567890000L), + .expecteds{"-1234567890000"}}, + HumanStringTestParam{.test_name = "String", + .literal = Literal::String("a/b/c=d"), + .expecteds{"a/b/c=d"}}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +class DateHumanStringTest : public ::testing::TestWithParam { + protected: + std::vector> transforms_{ + Transform::Year(), Transform::Month(), Transform::Day()}; +}; + +TEST_P(DateHumanStringTest, ToHumanString) { + const auto& param = GetParam(); + + for (uint32_t i = 0; i < transforms_.size(); i++) { + ICEBERG_UNWRAP_OR_FAIL(auto trans_func, + transforms_[i]->Bind(std::make_shared())); + ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal)); + EXPECT_THAT(transforms_[i]->ToHumanString(literal), + HasValue(::testing::Eq(param.expecteds[i]))); + } +} + +INSTANTIATE_TEST_SUITE_P( + DateHumanStringTestCases, DateHumanStringTest, + ::testing::Values( + HumanStringTestParam{.test_name = "Date", + .literal = Literal::Date(17501), + .expecteds = {"2017", "2017-12", "2017-12-01"}}, + HumanStringTestParam{.test_name = "NegativeDate", + .literal = Literal::Date(-2), + .expecteds = {"1969", "1969-12", "1969-12-30"}}, + HumanStringTestParam{.test_name = "DateLowerBound", + .literal = Literal::Date(0), + .expecteds = {"1970", "1970-01", "1970-01-01"}}, + HumanStringTestParam{.test_name = "NegativeDateLowerBound", + .literal = Literal::Date(-365), + .expecteds = {"1969", "1969-01", "1969-01-01"}}, + HumanStringTestParam{.test_name = "NegativeDateUpperBound", + .literal = Literal::Date(-1), + .expecteds = {"1969", "1969-12", "1969-12-31"}}, + HumanStringTestParam{.test_name = "Null", + .literal = Literal::Null(std::make_shared()), + .expecteds = {"null", "null", "null"}}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +class TimestampHumanStringTest : public ::testing::TestWithParam { + protected: + std::vector> transforms_{ + Transform::Year(), Transform::Month(), Transform::Day(), Transform::Hour()}; +}; + +TEST_P(TimestampHumanStringTest, ToHumanString) { + const auto& param = GetParam(); + for (uint32_t i = 0; i < transforms_.size(); i++) { + ICEBERG_UNWRAP_OR_FAIL(auto trans_func, transforms_[i]->Bind(param.source_type)); + ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal)); + EXPECT_THAT(transforms_[i]->ToHumanString(literal), + HasValue(::testing::Eq(param.expecteds[i]))); + } +} + +INSTANTIATE_TEST_SUITE_P( + TimestampHumanStringTestCases, TimestampHumanStringTest, + ::testing::Values( + HumanStringTestParam{ + .test_name = "Timestamp", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(1512123175038194), + .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}}, + HumanStringTestParam{ + .test_name = "NegativeTimestamp", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(-136024961806), + .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}}, + HumanStringTestParam{ + .test_name = "TimestampLowerBound", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(0), + .expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}}, + HumanStringTestParam{ + .test_name = "NegativeTimestampLowerBound", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(-172800000000), + .expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"}, + }, + HumanStringTestParam{ + .test_name = "NegativeTimestampUpperBound", + .source_type = std::make_shared(), + .literal = Literal::Timestamp(-1), + .expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}}, + HumanStringTestParam{ + .test_name = "TimestampTz", + .source_type = std::make_shared(), + .literal = Literal::TimestampTz(1512151975038194), + .expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}}, + HumanStringTestParam{.test_name = "Null", + .source_type = std::make_shared(), + .literal = Literal::Null(std::make_shared()), + .expecteds = {"null", "null", "null", "null"}}), + [](const ::testing::TestParamInfo& info) { + return info.param.test_name; + }); + +} // namespace iceberg diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc index 560cc3921..3da66e16c 100644 --- a/src/iceberg/transform.cc +++ b/src/iceberg/transform.cc @@ -31,6 +31,7 @@ #include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" #include "iceberg/util/projection_util_internal.h" +#include "iceberg/util/transform_util.h" namespace iceberg { namespace { @@ -366,6 +367,50 @@ Result> Transform::ProjectStrict( std::unreachable(); } +Result Transform::ToHumanString(const Literal& value) { + if (value.IsNull()) { + return "null"; + } + + switch (transform_type_) { + case TransformType::kYear: + return TransformUtil::HumanYear(std::get(value.value())); + case TransformType::kMonth: + return TransformUtil::HumanMonth(std::get(value.value())); + case TransformType::kDay: + return TransformUtil::HumanDay(std::get(value.value())); + case TransformType::kHour: + return TransformUtil::HumanHour(std::get(value.value())); + default: { + switch (value.type()->type_id()) { + case TypeId::kDate: + return TransformUtil::HumanDay(std::get(value.value())); + case TypeId::kTime: + return TransformUtil::HumanTime(std::get(value.value())); + case TypeId::kTimestamp: + return TransformUtil::HumanTimestamp(std::get(value.value())); + case TypeId::kTimestampTz: + return TransformUtil::HumanTimestampWithZone(std::get(value.value())); + case TypeId::kFixed: + case TypeId::kBinary: { + const auto& binary_data = std::get>(value.value()); + return TransformUtil::Base64Encode( + {reinterpret_cast(binary_data.data()), binary_data.size()}); + } + case TypeId::kDecimal: { + const auto& decimal_type = internal::checked_cast(*value.type()); + const auto& decimal = std::get<::iceberg::Decimal>(value.value()); + return decimal.ToString(decimal_type.scale()); + } + case TypeId::kString: + return std::get(value.value()); + default: + return value.ToString(); + } + } + } +} + bool TransformFunction::Equals(const TransformFunction& other) const { return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_; } diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h index 36da46d91..138fa0be4 100644 --- a/src/iceberg/transform.h +++ b/src/iceberg/transform.h @@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable { Result> ProjectStrict( std::string_view name, const std::shared_ptr& predicate); + /// \brief Returns a human-readable String representation of a transformed value. + /// + /// \param value The literal value to be transformed. + /// \return A human-readable String representation of the value + Result ToHumanString(const Literal& value); + /// \brief Returns a string representation of this transform (e.g., "bucket[16]"). std::string ToString() const override;