|
29 | 29 | #include <utility> |
30 | 30 |
|
31 | 31 | #include "beta_rowset.h" |
| 32 | +#include "cloud/config.h" |
32 | 33 | #include "common/config.h" |
33 | 34 | #include "common/logging.h" |
34 | 35 | #include "common/status.h" |
| 36 | +#include "cpp/sync_point.h" |
35 | 37 | #include "io/fs/file_reader.h" |
36 | 38 | #include "io/fs/file_system.h" |
37 | 39 | #include "io/fs/local_file_system.h" |
@@ -71,24 +73,90 @@ Status BetaRowset::init() { |
71 | 73 | return Status::OK(); // no op |
72 | 74 | } |
73 | 75 |
|
| 76 | +namespace { |
| 77 | +Status load_segment_rows_from_footer(BetaRowsetSharedPtr rowset, |
| 78 | + std::vector<uint32_t>* segment_rows, bool enable_segment_cache, |
| 79 | + OlapReaderStatistics* read_stats) { |
| 80 | + SegmentCacheHandle segment_cache_handle; |
| 81 | + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( |
| 82 | + rowset, &segment_cache_handle, enable_segment_cache, false, read_stats)); |
| 83 | + for (const auto& segment : segment_cache_handle.get_segments()) { |
| 84 | + segment_rows->emplace_back(segment->num_rows()); |
| 85 | + } |
| 86 | + return Status::OK(); |
| 87 | +} |
| 88 | + |
| 89 | +Status check_segment_rows_consistency(const std::vector<uint32_t>& rows_from_meta, |
| 90 | + const std::vector<uint32_t>& rows_from_footer, |
| 91 | + int64_t tablet_id, const std::string& rowset_id) { |
| 92 | + DCHECK_EQ(rows_from_footer.size(), rows_from_meta.size()); |
| 93 | + for (size_t i = 0; i < rows_from_footer.size(); i++) { |
| 94 | + if (rows_from_footer[i] != rows_from_meta[i]) { |
| 95 | + auto msg = fmt::format( |
| 96 | + "segment rows mismatch between rowset meta and segment footer. " |
| 97 | + "segment index: {}, meta rows: {}, footer rows: {}, tablet={}, rowset={}", |
| 98 | + i, rows_from_meta[i], rows_from_footer[i], tablet_id, rowset_id); |
| 99 | + if (config::enable_segment_rows_check_core) { |
| 100 | + CHECK(false) << msg; |
| 101 | + } |
| 102 | + return Status::InternalError(msg); |
| 103 | + } |
| 104 | + } |
| 105 | + return Status::OK(); |
| 106 | +} |
| 107 | +} // namespace |
| 108 | + |
74 | 109 | Status BetaRowset::get_segment_num_rows(std::vector<uint32_t>* segment_rows, |
| 110 | + bool enable_segment_cache, |
75 | 111 | OlapReaderStatistics* read_stats) { |
76 | 112 | // `ROWSET_UNLOADING` is state for closed() called but owned by some readers. |
77 | 113 | // So here `ROWSET_UNLOADING` is allowed. |
78 | 114 | DCHECK_NE(_rowset_state_machine.rowset_state(), ROWSET_UNLOADED); |
79 | 115 |
|
80 | | - RETURN_IF_ERROR(_load_segment_rows_once.call([this, read_stats] { |
| 116 | + RETURN_IF_ERROR(_load_segment_rows_once.call([this, enable_segment_cache, read_stats] { |
81 | 117 | auto segment_count = num_segments(); |
82 | | - _segments_rows.resize(segment_count); |
83 | | - for (int64_t i = 0; i != segment_count; ++i) { |
84 | | - SegmentCacheHandle segment_cache_handle; |
85 | | - RETURN_IF_ERROR(SegmentLoader::instance()->load_segment( |
86 | | - std::static_pointer_cast<BetaRowset>(shared_from_this()), i, |
87 | | - &segment_cache_handle, false, false, read_stats)); |
88 | | - const auto& tmp_segments = segment_cache_handle.get_segments(); |
89 | | - _segments_rows[i] = tmp_segments[0]->num_rows(); |
| 118 | + |
| 119 | + if (!_rowset_meta->get_num_segment_rows().empty()) { |
| 120 | + if (_rowset_meta->get_num_segment_rows().size() == segment_count) { |
| 121 | + // use segment rows in rowset meta if eligible |
| 122 | + TEST_SYNC_POINT("BetaRowset::get_segment_num_rows:use_segment_rows_from_meta"); |
| 123 | + _segments_rows.assign(_rowset_meta->get_num_segment_rows().cbegin(), |
| 124 | + _rowset_meta->get_num_segment_rows().cend()); |
| 125 | + if (config::enable_segment_rows_consistency_check) { |
| 126 | + // verify segment rows from meta match segment footer |
| 127 | + std::vector<uint32_t> rows_from_footer; |
| 128 | + auto self = std::dynamic_pointer_cast<BetaRowset>(shared_from_this()); |
| 129 | + auto load_status = load_segment_rows_from_footer( |
| 130 | + self, &rows_from_footer, enable_segment_cache, read_stats); |
| 131 | + if (load_status.ok()) { |
| 132 | + return check_segment_rows_consistency( |
| 133 | + _segments_rows, rows_from_footer, _rowset_meta->tablet_id(), |
| 134 | + _rowset_meta->rowset_id().to_string()); |
| 135 | + } |
| 136 | + } |
| 137 | + return Status::OK(); |
| 138 | + } else { |
| 139 | + auto msg = fmt::format( |
| 140 | + "corrupted segment rows info in rowset meta. " |
| 141 | + "segment count: {}, segment rows size: {}, tablet={}, rowset={}", |
| 142 | + segment_count, _rowset_meta->get_num_segment_rows().size(), |
| 143 | + _rowset_meta->tablet_id(), _rowset_meta->rowset_id().to_string()); |
| 144 | + if (config::enable_segment_rows_check_core) { |
| 145 | + CHECK(false) << msg; |
| 146 | + } |
| 147 | + LOG_EVERY_SECOND(WARNING) << msg; |
| 148 | + } |
90 | 149 | } |
91 | | - return Status::OK(); |
| 150 | + if (config::fail_when_segment_rows_not_in_rowset_meta) { |
| 151 | + CHECK(false) << "segment rows info not found in rowset meta. tablet=" |
| 152 | + << _rowset_meta->tablet_id() |
| 153 | + << ", rowset=" << _rowset_meta->rowset_id().to_string(); |
| 154 | + } |
| 155 | + // otherwise, read it from segment footer |
| 156 | + TEST_SYNC_POINT("BetaRowset::get_segment_num_rows:load_from_segment_footer"); |
| 157 | + auto self = std::dynamic_pointer_cast<BetaRowset>(shared_from_this()); |
| 158 | + return load_segment_rows_from_footer(self, &_segments_rows, enable_segment_cache, |
| 159 | + read_stats); |
92 | 160 | })); |
93 | 161 | segment_rows->assign(_segments_rows.cbegin(), _segments_rows.cend()); |
94 | 162 | return Status::OK(); |
|
0 commit comments