From 5ac82cd4cc31cad22d5377137261fe506f24ce74 Mon Sep 17 00:00:00 2001 From: Yibo-Chen13 Date: Mon, 9 Dec 2024 15:15:46 +0800 Subject: [PATCH] support json --- .github/workflows/timeplus_cpp_ci.yml | 2 +- tests/simple/TestFunctions.cpp | 35 +++ tests/simple/TestFunctions.h | 1 + tests/simple/main.cpp | 1 + timeplus/CMakeLists.txt | 3 + timeplus/client.h | 1 + timeplus/columns/factory.cpp | 4 + timeplus/columns/json.cpp | 406 ++++++++++++++++++++++++++ timeplus/columns/json.h | 152 ++++++++++ timeplus/types/type_parser.cpp | 1 + timeplus/types/types.cpp | 7 + timeplus/types/types.h | 5 +- ut/columns_ut.cpp | 128 ++++++++ ut/roundtrip_tests.cpp | 36 ++- ut/value_generators.cpp | 16 + ut/value_generators.h | 3 + 16 files changed, 798 insertions(+), 3 deletions(-) create mode 100644 timeplus/columns/json.cpp create mode 100644 timeplus/columns/json.h diff --git a/.github/workflows/timeplus_cpp_ci.yml b/.github/workflows/timeplus_cpp_ci.yml index 0313650..af5ca18 100644 --- a/.github/workflows/timeplus_cpp_ci.yml +++ b/.github/workflows/timeplus_cpp_ci.yml @@ -118,7 +118,7 @@ jobs: - name: Test - Start timeplusd server in background run: | docker pull timeplus/timeplusd:${{matrix.timeplusd}} - docker run -d --name timeplusd -p 8463:8463 timeplus/timeplusd:${{matrix.timeplusd}} + docker run -d --name timeplusd -p 8463:8463 -v /mnt/timeplusd:/var/lib/timeplusd timeplus/timeplusd:${{matrix.timeplusd}} docker ps -a docker stats -a --no-stream ## Check and wait until timeplusd is ready to accept connections diff --git a/tests/simple/TestFunctions.cpp b/tests/simple/TestFunctions.cpp index a34bd1f..cb48025 100644 --- a/tests/simple/TestFunctions.cpp +++ b/tests/simple/TestFunctions.cpp @@ -879,3 +879,38 @@ void testNestedType(Client& client){ client.Execute("DROP STREAM IF EXISTS nested_example"); } + +void testJsonType(Client& client) { + client.Execute("DROP STREAM IF EXISTS json_example"); + client.Execute("CREATE STREAM IF NOT EXISTS json_example (j json)"); + + { + Block block; + + auto json_col = std::make_shared(); + + auto f1 = std::make_shared(std::vector{3.1415, 9898.5679}); + auto f2 = std::make_shared(std::vector{23.123, 0.999754}); + + auto json1 = std::unordered_map{{"a.b.b.c", f1}, {"`a.b.b`.c", f2}}; + + json_col->Append(json1); + + block.AppendColumn("j", json_col); + + client.Insert("json_example", block); + } + + std::this_thread::sleep_for(std::chrono::seconds(3)); + + client.Select("SELECT j::json FROM table(json_example)", [](const Block& block) { + for (size_t c = 0; c < block.GetRowCount(); ++c) { + auto json = block[0]->As(); + + auto a_b_b_c = (*json)["a.b.b.c"]->As(); + auto abb_c = (*json)["`a.b.b`.c"]->As(); + std::cout << "a.b.b.c[" << c << "]: " << a_b_b_c->At(c) << "\n"; + std::cout << "`a.b.b`.c[" << c << "]: " << abb_c->At(c) << "\n"; + } + }); +} diff --git a/tests/simple/TestFunctions.h b/tests/simple/TestFunctions.h index d540322..6deb4dc 100644 --- a/tests/simple/TestFunctions.h +++ b/tests/simple/TestFunctions.h @@ -43,3 +43,4 @@ void testLowCardinalityStringType(Client& client); void testMapType(Client& client); void testTupleType(Client& client); void testNestedType(Client& client); +void testJsonType(Client& client); diff --git a/tests/simple/main.cpp b/tests/simple/main.cpp index c5a34b9..e8cf68e 100644 --- a/tests/simple/main.cpp +++ b/tests/simple/main.cpp @@ -17,6 +17,7 @@ static void RunTests(Client& client) { testMapType(client); testTupleType(client); testNestedType(client); + testJsonType(client); } int main() diff --git a/timeplus/CMakeLists.txt b/timeplus/CMakeLists.txt index fa5a608..286abdf 100644 --- a/timeplus/CMakeLists.txt +++ b/timeplus/CMakeLists.txt @@ -23,6 +23,7 @@ SET ( timeplus-cpp-lib-src columns/string.cpp columns/tuple.cpp columns/uuid.cpp + columns/json.cpp columns/itemview.cpp @@ -72,6 +73,7 @@ SET ( timeplus-cpp-lib-src columns/tuple.h columns/utils.h columns/uuid.h + columns/json.h types/type_parser.h types/types.h @@ -213,6 +215,7 @@ INSTALL(FILES columns/string.h DESTINATION include/timeplus/columns/) INSTALL(FILES columns/tuple.h DESTINATION include/timeplus/columns/) INSTALL(FILES columns/utils.h DESTINATION include/timeplus/columns/) INSTALL(FILES columns/uuid.h DESTINATION include/timeplus/columns/) +INSTALL(FILES columns/json.h DESTINATION include/timeplus/columns/) # types INSTALL(FILES types/type_parser.h DESTINATION include/timeplus/types/) diff --git a/timeplus/client.h b/timeplus/client.h index eab146e..ed67fea 100644 --- a/timeplus/client.h +++ b/timeplus/client.h @@ -17,6 +17,7 @@ #include "columns/string.h" #include "columns/tuple.h" #include "columns/uuid.h" +#include "columns/json.h" #include #include diff --git a/timeplus/columns/factory.cpp b/timeplus/columns/factory.cpp index f0a8f1c..134bd58 100644 --- a/timeplus/columns/factory.cpp +++ b/timeplus/columns/factory.cpp @@ -16,6 +16,7 @@ #include "string.h" #include "tuple.h" #include "uuid.h" +#include "json.h" #include "../types/type_parser.h" @@ -135,6 +136,9 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) { case Type::MultiPolygon: return std::make_shared(); + case Type::Json: + return std::make_shared(); + default: return nullptr; } diff --git a/timeplus/columns/json.cpp b/timeplus/columns/json.cpp new file mode 100644 index 0000000..0d65cbc --- /dev/null +++ b/timeplus/columns/json.cpp @@ -0,0 +1,406 @@ +#include "json.h" +#include "../base/wire_format.h" +#include "../exceptions.h" +#include "array.h" +#include "factory.h" +#include "numeric.h" +#include "string.h" + +namespace timeplus { + +std::string EscapeJsonPath(const std::string& path) { + bool has_escape = false; + bool has_dot = false; + for (auto& ch : path) { + if (ch == '.') { + has_dot = true; + if (!has_escape) { + break; + } + continue; + } else if (ch == '`') { + if (has_escape) { + has_dot = false; + } + has_escape = !has_escape; + } + } + + if (!has_dot) { + return path; + } + + std::string res; + res.push_back('`'); + for (auto& ch : path) { + if (ch == '`') { + res.push_back('\\'); + } + res.push_back(ch); + } + res.push_back('`'); + return res; +} + +std::string UnescapeJsonPath(const std::string& path) { + if (path.size() <= 2 || path[0] != '`' || path.back() != '`') { + return path; + } + std::string res; + for (size_t i = 1; i < path.size() - 1; ++i) { + if (path[i] == '`') { + return path; + } + if (path.substr(i, 2) == "\\`") { + i++; + } + res.push_back(path[i]); + } + return res; +} + +std::vector SplitJsonPath(const std::string& path) { + std::vector parts; + std::string part; + bool has_escape = false; + for (auto& ch : path) { + if (ch == '`') { + has_escape = !has_escape; + } + if (ch == '.' && !has_escape) { + parts.push_back(UnescapeJsonPath(part)); + part.clear(); + continue; + } + part.push_back(ch); + } + + // last part + parts.push_back(UnescapeJsonPath(part)); + return parts; +} + +std::string BuildJsonPath(const std::vector& parts) { + std::string path; + for (size_t i = 0; i < parts.size(); ++i) { + auto escapeed_part = EscapeJsonPath(parts[i]); + path.append(escapeed_part); + if (i != parts.size() - 1) { + path.push_back('.'); + } + } + return path; +} + +JsonValue& ColumnJson::operator[](const JsonKey& path) { + auto it = data_.find(path); + if (it == data_.end()) { + throw ValidationError("path is invaild or not exists."); + } + return it->second; +} + +JsonValue& ColumnJson::At(const JsonKey& path) { + return (*this)[path]; +} + +void ColumnJson::Append(ColumnRef column) { + auto sz = Size(); + if (auto col = column->As()) { + for (auto& [key, value] : col->data_) { + if (data_.find(key) == data_.end()) { + data_[key] = value->CloneEmpty(); + AppendZeroValueToColumn(data_[key], sz); + } + data_[key]->Append(value); + } + + for (auto& [key, value] : data_) { + if (col->data_.find(key) == col->data_.end()) { + AppendZeroValueToColumn(value, col->Size()); + } + } + } +} + +void ColumnJson::Reserve(size_t new_cap) { + for (auto& col : data_) { + col.second->Reserve(new_cap); + } +} + +bool ColumnJson::LoadPrefix(InputStream* input, [[maybe_unused]] size_t rows) { + uint64_t kind; + if (!WireFormat::ReadUInt64(*input, &kind)) { + return false; + } + return true; +} + +bool ColumnJson::LoadBody(InputStream* input, size_t rows) { + std::string type_with_name; + if (!WireFormat::ReadString(*input, &type_with_name)) { + return false; + } + std::vector now_path; + return DeserialisationJson(type_with_name, now_path, input, rows); +} + +void ColumnJson::SavePrefix(OutputStream* output) { + // serialize Json as tuple. + WireFormat::WriteUInt64(*output, 0ull); +} + +void ColumnJson::SaveBody(OutputStream* output) { + auto nested_json = NestedJson(); + std::string type_with_name = nested_json->NestedJsonType(); + WireFormat::WriteString(*output, type_with_name); + nested_json->SerialisationJson(type_with_name, output); +} + +void ColumnJson::Clear() { + data_.clear(); +} + +size_t ColumnJson::Size() const { + if (data_.empty()) { + return 0ul; + } + return data_.begin()->second->Size(); +} + +ColumnRef ColumnJson::Slice(size_t begin, size_t len) const { + if (begin + len > Size()) throw ValidationError("Slice indexes are out of bounds"); + + auto col = std::dynamic_pointer_cast(CloneEmpty()); + for (auto& [key, value] : data_) { + col->data_[key] = value->Slice(begin, len); + } + return col; +} + +ColumnRef ColumnJson::CloneEmpty() const { + return std::make_shared(); +} + +void ColumnJson::Swap(Column& other) { + ColumnJson& col = dynamic_cast(other); + data_.swap(col.data_); +} + +std::shared_ptr ColumnJson::NestedJson() const { + auto rt = std::make_shared(); + std::shared_ptr cur; + for (auto& [key, value] : data_) { + cur = rt; + auto parts = SplitJsonPath(key); + for (size_t i = 0; i < parts.size(); ++i) { + parts[i] = EscapeJsonPath(parts[i]); + if (i == parts.size() - 1) { + cur->data_[parts[i]] = value; + break; + } + if (cur->data_.find(parts[i]) == cur->data_.end()) { + cur->data_[parts[i]] = CloneEmpty(); + } + cur = cur->data_[parts[i]]->As(); + if (cur == nullptr) { + throw ValidationError("same json path with different value type."); + } + } + } + return rt; +} + +std::string ColumnJson::NestedJsonType() { + std::string res; + res.append("tuple("); + size_t cnt = 0; + for (auto& [key, val] : data_) { + res.append(key + " "); + if (auto col = std::dynamic_pointer_cast(val); col != nullptr) { + res.append(col->NestedJsonType()); + } else { + res.append(val->GetType().GetName()); + } + cnt++; + if (cnt != data_.size()) { + res.append(", "); + } + } + res.append(")"); + return res; +} + +void ColumnJson::AppendZeroValueToColumn(ColumnRef col, size_t rows) { + while (rows--) { + switch (col->Type()->GetCode()) { + case Type::Code::Int8: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::Int16: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::Int32: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::Int64: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::UInt8: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::UInt16: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::UInt32: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::UInt64: { + auto c = std::dynamic_pointer_cast(col); + c->Append(static_cast(0)); + break; + } + case Type::Code::Float32: { + auto c = std::dynamic_pointer_cast(col); + c->Append(0.0f); + break; + } + case Type::Code::Float64: { + auto c = std::dynamic_pointer_cast(col); + c->Append(0.0); + break; + } + case Type::Code::Array: { + auto c = std::dynamic_pointer_cast(col); + c->AppendAsColumn(std::make_shared()); + break; + } + case Type::Code::String: { + auto c = std::dynamic_pointer_cast(col); + c->Append(""); + break; + } + default: + throw ValidationError("json doesn't suppot type " + col->Type()->GetName()); + } + } +} + +size_t ColumnJson::GetFullComplexType(std::string_view type_with_name) { + int st = 1; + size_t len = 0; + for (auto& ch : type_with_name) { + switch (ch) { + case '(': + st++; + break; + case ')': + st--; + break; + } + len++; + if (st == 0) { + break; + } + } + return len; +} + +std::string ColumnJson::GetSubObjectName(std::string_view type_with_name) { + bool has_escape = false; + size_t name_len = 0; + for (; name_len < type_with_name.size(); name_len++) { + if (type_with_name[name_len] == '`') { + has_escape = !has_escape; + } + if (type_with_name[name_len] == ' ' && !has_escape) { + break; + } + } + return std::string{type_with_name.substr(0, name_len)}; +} + +std::string ColumnJson::GetSubObjectType(std::string_view type_with_name) { + size_t type_len = 0; + /// sub object. + if (type_with_name.substr(0, 5) == "tuple" || type_with_name.substr(0, 5) == "array") { + type_len += 6; + type_len += GetFullComplexType(type_with_name.substr(6 /* skip "tuple(" or "array(" */)); + return std::string{type_with_name.substr(0, type_len)}; + } + + /// terminal type. + for (; type_len < type_with_name.size(); type_len++) { + if (type_with_name[type_len] == ')' || type_with_name[type_len] == ',') { + break; + } + } + return std::string{type_with_name.substr(0, type_len)}; +} + +bool ColumnJson::DeserialisationJson(std::string_view type_with_name, std::vector& now_path, InputStream* input, size_t rows) { + size_t cur = 6; /* skip "tuple(" */ + for (; cur < type_with_name.size() - 1 /* without ")" */; cur += 2 /* skip ", " */) { + std::string name = GetSubObjectName(type_with_name.substr(cur)); + cur += name.size() + 1 /* skip " " */; + if (cur >= type_with_name.size()) return false; + now_path.push_back(name); + + std::string type = GetSubObjectType(type_with_name.substr(cur)); + if (type.substr(0, 5) == "tuple") { + /// Deserialisation sub object. + if (!DeserialisationJson(type, now_path, input, rows)) { + return false; + } + } else { + /// Deserialisation terminal type. + auto path = BuildJsonPath(now_path); + data_[path] = CreateColumnByType(type); + if (!data_[path]->Load(input, rows)) { + return false; + } + } + now_path.pop_back(); + cur += type.size(); + } + return true; +} + +void ColumnJson::SerialisationJson(std::string_view type_with_name, OutputStream* output) { + size_t cur = 6; /* skip "tuple(" */ + for (; cur < type_with_name.size() - 1 /* without ")" */; cur += 2 /* skip ", " */) { + std::string name = GetSubObjectName(type_with_name.substr(cur)); + cur += name.size() + 1 /* skip " " */; + if (cur >= type_with_name.size()) throw ValidationError("serialisation type " + std::string{type_with_name} + " failed."); + + std::string type = GetSubObjectType(type_with_name.substr(cur)); + if (type.substr(0, 5) == "tuple") { + /// Serialisation sub object. + auto sub_col = std::dynamic_pointer_cast(data_[name]); + sub_col->SerialisationJson(type, output); + cur += type.size(); + continue; + } else { + /// Serialisation terminal type. + data_[name]->Save(output); + } + cur += type.size(); + } +} + +} // namespace timeplus diff --git a/timeplus/columns/json.h b/timeplus/columns/json.h new file mode 100644 index 0000000..c9398b8 --- /dev/null +++ b/timeplus/columns/json.h @@ -0,0 +1,152 @@ +#pragma once + +#include +#include +#include +#include +#include "column.h" + +namespace timeplus { + +/// Escaped: +/// 1) x.y -> `x.y` +/// 2) `x.y`.a -> `\`x.y\`.a` +/// 3) `x.y``.a -> `\`x.y\`\`.a` (special case) +/// No Escape: +/// 4) x -> x +/// 5) `x.y` -> `x.y` +/// 6) `x.y`a -> `x.y`a +std::string EscapeJsonPath(const std::string&); + +/// Unescaped: +/// 1) `x.y` -> x.y +/// 2) `\`x.y\`.a` -> `x.y`.a +/// No unescape: +/// 3) x -> x +/// 4) `x.y`.a -> `x.y`.a +/// 5) \`x.y\`.a -> \`x.y\`.a +/// 6) `x.y`.a` -> `x.y`.a` (special case) +/// 7) `` -> `` (special case) +std::string UnescapeJsonPath(const std::string&); + +/// Split the Json path to sub-paths For example: +/// 1) "id" -> ["id"] +/// 2) "id.a" -> ["id", "a"] +/// 3) "`x.y`.z" -> ["x.y", "z"] +/// ... Others: unknown behavior +std::vector SplitJsonPath(const std::string&); + +// Split the sub-paths to a whole path For example: +// 1) ["id"] -> "id" +// 2) ["id", "a"] -> "id.a" +// 3) ["x.y", "z"] -> "`x.y`.z" +// ... Others: unknown behavior +std::string BuildJsonPath(const std::vector&); + +using JsonKey = std::string; +using JsonValue = ColumnRef; + +/// Json type only support (u)int(8/16/32/128/256), float(32/64), bool, string, array types. +class ColumnJson : public Column { + /// Data is stored in leaf node, every leaf node is a column of Json fundamental + /// type (i.e. (u)int(8/16/32/128/256), float(32/64), bool, string, array). + /// for example: + // '{"id": 1, "obj": { "x": "abc", "y": 2}}', the elems is: + // <"id", []int32>, + // <"obj.x", []string>, + // <"obj.y", []int32> +public: + /// Constructing ColumnJson from a map object + template , std::disjunction>>>> + explicit ColumnJson(const std::vector& jsons) : Column(Type::CreateJson()) { + for (auto& json : jsons) { + Append(json); + } + } + + /// Append a sequences elements to column. + template , std::disjunction>>>> + void Append(const Map& json) { + auto sz = Size(); + for (const auto& [key, value] : json) { + if (data_.find(key) == data_.end()) { + data_[key] = value->CloneEmpty(); + AppendZeroValueToColumn(data_[key], sz); + } + data_[key]->Append(value); + } + + for (const auto& [key, value] : data_) { + if (json.find(key) == json.end()) { + AppendZeroValueToColumn(value, std::begin(json)->second->Size()); + } + } + } + + /// Get the value corresponding to the key. + /// If path is invaild or not exists , it will throw a error. + JsonValue& operator[](const JsonKey& path); + JsonValue& At(const JsonKey& path); + +public: + ColumnJson() : Column(Type::CreateJson()) {} + + /// Appends content of given column to the end of current one. + void Append(ColumnRef column) override; + + /// Increase the capacity of the column for large block insertion. + void Reserve(size_t new_cap) override; + + /// Loads column prefix from input stream. + bool LoadPrefix(InputStream* input, size_t rows) override; + + /// Loads column data from input stream. + bool LoadBody(InputStream* input, size_t rows) override; + + /// Saves column prefix to output stream. + void SavePrefix(OutputStream* output) override; + + /// Saves column data to output stream. + void SaveBody(OutputStream* output) override; + + /// Clear column data . + void Clear() override; + + /// Returns count of rows in the column. + size_t Size() const override; + + /// Makes slice of the current column. + ColumnRef Slice(size_t begin, size_t len) const override; + ColumnRef CloneEmpty() const override; + void Swap(Column& other) override; + +private: + /// get a nested json from a (path,value) json. + std::shared_ptr NestedJson() const; + + /// Get full type string from a nested Json. + /// Only ColumnJson from NestedJson can call this function. + /// for example: + /// {"a": 1, "b": {"c": 2, "d": 3}, "e": [1,2,3,4]} + /// -> "tuple(a int,b tuple(c int, d int),e array(int))" + std::string NestedJsonType(); + + size_t GetFullComplexType(std::string_view); + + std::string GetSubObjectName(std::string_view); + std::string GetSubObjectType(std::string_view); + + /// deserialisation a path + value Json from a full type string. + bool DeserialisationJson(std::string_view type_with_name, std::vector& now_path, InputStream* input, size_t rows); + + /// serialisation a path + value Json by a full type string. + void SerialisationJson(std::string_view type_with_name, OutputStream* output); + + /// append %rows empty values to %col. + void AppendZeroValueToColumn(ColumnRef col, size_t rows); + + std::unordered_map data_; +}; +} // namespace timeplus \ No newline at end of file diff --git a/timeplus/types/type_parser.cpp b/timeplus/types/type_parser.cpp index c177e89..e5c375c 100644 --- a/timeplus/types/type_parser.cpp +++ b/timeplus/types/type_parser.cpp @@ -68,6 +68,7 @@ static const std::unordered_map kTypeCode = { { "ring", Type::Ring }, { "polygon", Type::Polygon }, { "multi_polygon", Type::MultiPolygon }, + { "json", Type::Json }, }; template diff --git a/timeplus/types/types.cpp b/timeplus/types/types.cpp index f3c3b7d..9c62129 100644 --- a/timeplus/types/types.cpp +++ b/timeplus/types/types.cpp @@ -55,6 +55,7 @@ const char* Type::TypeName(Type::Code code) { case Type::Code::UInt128: return "uint128"; case Type::Code::Int256: return "int256"; case Type::Code::UInt256: return "uint256"; + case Type::Code::Json: return "json"; } return "Unknown type"; @@ -87,6 +88,7 @@ std::string Type::GetName() const { case UInt128: case Int256: case UInt256: + case Json: return TypeName(code_); case FixedString: return As()->GetName(); @@ -149,6 +151,7 @@ uint64_t Type::GetTypeUniqueId() const { case UInt128: case Int256: case UInt256: + case Json: // For simple types, unique ID is the same as Type::Code return code_; @@ -273,6 +276,10 @@ TypeRef Type::CreateMultiPolygon() { return TypeRef(new Type(Type::MultiPolygon)); } +TypeRef Type::CreateJson() { + return TypeRef(new Type(Type::Json)); +} + /// class ArrayType ArrayType::ArrayType(TypeRef item_type) : Type(Array), item_type_(item_type) { diff --git a/timeplus/types/types.h b/timeplus/types/types.h index cd5733c..ebe4da2 100644 --- a/timeplus/types/types.h +++ b/timeplus/types/types.h @@ -64,7 +64,8 @@ class Type { UInt128, Int256, UInt256, - Decimal256 + Decimal256, + Json }; using EnumItem = std::pair; @@ -150,6 +151,8 @@ class Type { static TypeRef CreateMultiPolygon(); + static TypeRef CreateJson(); + private: uint64_t GetTypeUniqueId() const; diff --git a/ut/columns_ut.cpp b/ut/columns_ut.cpp index 349e95a..04eb921 100644 --- a/ut/columns_ut.cpp +++ b/ut/columns_ut.cpp @@ -972,3 +972,131 @@ TEST(ColumnsCase, ColumnMapT_Wrap) { EXPECT_EQ("123", map_view.At(1)); EXPECT_EQ("abc", map_view.At(2)); } + +TEST(ColumnsCase, ColumnJsonAppend) { + auto col = std::make_shared(); + EXPECT_EQ(0u, col->Size()); + auto data = MakeJson(); + for (auto& json : data) { + col->Append(json); + } + EXPECT_EQ(2u, col->Size()); + + auto check = [](std::shared_ptr col) { + auto obj_a = (*col)["obj.a"]->As(); + auto obj_b = (*col)["obj.b"]->As(); + auto obj_c_e = (*col)["obj.c.e"]->As>(); + auto obj_c_f = (*col)["obj.c.f"]->As>(); + auto abb_c = (*col)["`a.b.b`.c"]->As(); + auto a_b_b_c = (*col)["a.b.b.c"]->As(); + EXPECT_NE(obj_a, nullptr); + EXPECT_NE(obj_a, nullptr); + EXPECT_NE(obj_c_e, nullptr); + EXPECT_NE(obj_c_f, nullptr); + EXPECT_NE(abb_c, nullptr); + EXPECT_NE(a_b_b_c, nullptr); + + EXPECT_EQ(obj_a->Size(), 2u); + EXPECT_EQ(obj_a->Size(), 2u); + EXPECT_EQ(obj_c_f->Size(), 2u); + EXPECT_EQ(obj_c_f->Size(), 2u); + EXPECT_EQ(obj_c_e->At(0).At(0), "timeplus"); + EXPECT_EQ(obj_c_e->At(0).At(1), "proton"); + EXPECT_EQ(obj_c_f->At(0).At(0), 123544); + EXPECT_EQ(obj_c_f->At(0).At(1), 123546); + EXPECT_EQ(obj_c_e->At(1).Size(), 0u); + EXPECT_EQ(obj_c_f->At(1).Size(), 0u); + EXPECT_EQ(abb_c->At(0), 0.0); + EXPECT_EQ(a_b_b_c->At(0), 0.0); + EXPECT_EQ(abb_c->At(1), 23.123); + EXPECT_EQ(a_b_b_c->At(1), 3.1415); + }; + + check(col); + + auto new_col = col->CloneEmpty(); + new_col->Append(col); + col->Clear(); + EXPECT_EQ(col->Size(), 0u); + EXPECT_EQ(new_col->Size(), 2u); + + check(new_col->As()); +} + +TEST(ColumnsCase, ColumnJsonSlice) { + auto col = std::make_shared(MakeJson()); + EXPECT_EQ(col->Size(), 2u); + + auto slice_col = col->Slice(0, 1)->As(); + EXPECT_NE(slice_col, nullptr); + + auto check = [](std::shared_ptr col) { + auto obj_a = (*col)["obj.a"]->As(); + auto obj_b = (*col)["obj.b"]->As(); + auto obj_c_e = (*col)["obj.c.e"]->As>(); + auto obj_c_f = (*col)["obj.c.f"]->As>(); + auto abb_c = (*col)["`a.b.b`.c"]->As(); + auto a_b_b_c = (*col)["a.b.b.c"]->As(); + EXPECT_NE(obj_a, nullptr); + EXPECT_NE(obj_a, nullptr); + EXPECT_NE(obj_c_e, nullptr); + EXPECT_NE(obj_c_f, nullptr); + EXPECT_NE(abb_c, nullptr); + EXPECT_NE(a_b_b_c, nullptr); + + EXPECT_EQ(obj_a->Size(), 1u); + EXPECT_EQ(obj_a->Size(), 1u); + EXPECT_EQ(obj_c_f->Size(), 1u); + EXPECT_EQ(obj_c_f->Size(), 1u); + EXPECT_EQ(abb_c->Size(), 1u); + EXPECT_EQ(a_b_b_c->Size(), 1u); + + EXPECT_EQ(obj_a->At(0), (1u << 19)); + EXPECT_EQ(obj_b->At(0), "timeplus"); + EXPECT_EQ(obj_c_e->At(0).At(0), "timeplus"); + EXPECT_EQ(obj_c_e->At(0).At(1), "proton"); + EXPECT_EQ(obj_c_f->At(0).At(0), 123544); + EXPECT_EQ(obj_c_f->At(0).At(1), 123546); + EXPECT_EQ(abb_c->At(0), 0.0); + EXPECT_EQ(a_b_b_c->At(0), 0.0); + }; + + check(slice_col); + + auto new_col = slice_col->CloneEmpty(); + new_col->Swap(*slice_col); + EXPECT_EQ(slice_col->Size(), 0u); + EXPECT_EQ(new_col->Size(), 1u); + + check(new_col->As()); +} + +TEST(ColumnsCase, ColumnJsonHelper) { + EXPECT_EQ("`x.y`", EscapeJsonPath("x.y")); + EXPECT_EQ("`\\`x.y\\`.a`", EscapeJsonPath("`x.y`.a")); + EXPECT_EQ("`\\`x.y\\`\\`.a`", EscapeJsonPath("`x.y``.a")); + EXPECT_EQ("x", EscapeJsonPath("x")); + EXPECT_EQ("`x.y`", EscapeJsonPath("`x.y`")); + EXPECT_EQ("`x.y`a", EscapeJsonPath("`x.y`a")); + + EXPECT_EQ("x.y", UnescapeJsonPath("`x.y`")); + EXPECT_EQ("`x.y`.a", UnescapeJsonPath("`\\`x.y\\`.a`")); + EXPECT_EQ("x", UnescapeJsonPath("x")); + EXPECT_EQ("`x.y`.a", UnescapeJsonPath("`x.y`.a")); + EXPECT_EQ("\\`x.y\\`.a", UnescapeJsonPath("\\`x.y\\`.a")); + EXPECT_EQ("`x.y`.a`", UnescapeJsonPath("`x.y`.a`")); + EXPECT_EQ("``", UnescapeJsonPath("``")); + + auto parts1 = SplitJsonPath("id"); + auto parts2 = SplitJsonPath("id.a"); + auto parts3 = SplitJsonPath("`x.y`.z"); + EXPECT_EQ("id", parts1[0]); + EXPECT_EQ("id", parts2[0]); + EXPECT_EQ("a", parts2[1]); + EXPECT_EQ("x.y", parts3[0]); + EXPECT_EQ("z", parts3[1]); + + EXPECT_EQ("id", BuildJsonPath(parts1)); + EXPECT_EQ("id.a", BuildJsonPath(parts2)); + EXPECT_EQ("`x.y`.z", BuildJsonPath(parts3)); +} \ No newline at end of file diff --git a/ut/roundtrip_tests.cpp b/ut/roundtrip_tests.cpp index e80558d..9a524fe 100644 --- a/ut/roundtrip_tests.cpp +++ b/ut/roundtrip_tests.cpp @@ -1,11 +1,14 @@ #include -#include "utils.h" #include "roundtrip_column.h" +#include "utils.h" +#include "value_generators.h" #include +#include #include #include +#include using namespace timeplus; @@ -323,6 +326,37 @@ TEST_P(RoundtripCase, RoundtripArrayLowCardinalityTString) { EXPECT_TRUE(CompareRecursive(*array, *result_typed)); } +TEST_P(RoundtripCase, RoundtripJson) { + client_->Execute("DROP STREAM IF EXISTS roundtrip_json"); + client_->Execute("CREATE STREAM roundtrip_json (j json)"); + + Block block; + auto json = std::make_shared(MakeJson()); + block.AppendColumn("j", json); + + client_->Insert("roundtrip_json", block); + std::this_thread::sleep_for(std::chrono::seconds(3)); + + client_->Select("SELECT j::json FROM table(roundtrip_json)", [&json](const Block& block) { + if (block.GetRowCount() == 0) return; + auto rv = block[0]->As(); + EXPECT_NE(rv, nullptr); + + EXPECT_EQ(rv->Size(), 2u); + + EXPECT_TRUE(CompareRecursive(*json->At("obj.c.e")->As>(), + *ColumnArrayT::Wrap(std::move(rv->At("obj.c.e"))))); + EXPECT_TRUE(CompareRecursive(*json->At("obj.c.f")->As>(), + *ColumnArrayT::Wrap(std::move(rv->At("obj.c.f"))))); + EXPECT_TRUE(CompareRecursive(*json->At("obj.a")->As(), *rv->At("obj.a")->As())); + EXPECT_TRUE(CompareRecursive(*json->At("obj.b")->As(), *rv->At("obj.b")->As())); + EXPECT_TRUE(CompareRecursive(*json->At("a.b.b.c")->As(), *rv->At("a.b.b.c")->As())); + EXPECT_TRUE(CompareRecursive(*json->At("`a.b.b`.c")->As(), *rv->At("`a.b.b`.c")->As())); + }); + + client_->Execute("DROP STREAM IF EXISTS roundtrip_json"); +} + const auto LocalHostEndpoint = ClientOptions() .SetHost( getEnvOrDefault("TIMEPLUS_HOST", "localhost")) .SetPort( getEnvOrDefault("TIMEPLUS_PORT", "8463")) diff --git a/ut/value_generators.cpp b/ut/value_generators.cpp index 928445e..24b3f0a 100644 --- a/ut/value_generators.cpp +++ b/ut/value_generators.cpp @@ -111,6 +111,22 @@ std::vector MakeDateTimes() { // }; // } +std::vector> MakeJson() { + auto ui = std::make_shared(std::vector{(1u << 19)}); + auto str = std::make_shared(std::vector{"timeplus"}); + auto f1 = std::make_shared(std::vector{3.1415}); + auto f2 = std::make_shared(std::vector{23.123}); + auto arr1 = std::make_shared>(std::make_shared()); + arr1->Append(std::vector{"timeplus", "proton"}); + auto arr2 = std::make_shared>(std::make_shared()); + arr2->Append(std::vector{123544, 123546}); + + auto j1 = std::unordered_map{{"obj.a", ui}, {"obj.b", str}, {"obj.c.e", arr1}, {"obj.c.f", arr2}}; + + auto j2 = std::unordered_map{{"a.b.b.c", f1}, {"`a.b.b`.c", f2}}; + return {j1, j2}; +} + std::vector MakeDecimals(size_t /*precision*/, size_t scale) { const auto scale_multiplier = static_cast(std::pow(10, scale)); const long long int rhs_value = 12345678910; diff --git a/ut/value_generators.h b/ut/value_generators.h index 50b8896..0cd03ea 100644 --- a/ut/value_generators.h +++ b/ut/value_generators.h @@ -3,11 +3,13 @@ #include // for ipv4-ipv6 platform-specific stuff #include #include +#include #include "utils.h" #include #include +#include inline in_addr MakeIPv4(uint32_t ip) { static_assert(sizeof(in_addr) == sizeof(ip)); @@ -40,6 +42,7 @@ std::vector MakeIPv6s(); std::vector MakeUUIDs(); // std::vector MakeInt128s(); std::vector MakeDecimals(size_t precision, size_t scale); +std::vector> MakeJson(); template ::value, bool> = true> inline std::vector MakeNumbers() {