diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 53d6fdaf14322f..916dd92875abae 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -15,25 +15,33 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include +#include + #include #include #include "common/status.h" -#include "util/file_utils.h" +#include "env/env.h" #include "gen_cpp/olap_file.pb.h" -#include "olap/options.h" +#include "gen_cpp/segment_v2.pb.h" +#include "gutil/strings/numbers.h" +#include "gutil/strings/split.h" +#include "gutil/strings/substitute.h" +#include "json2pb/pb_to_json.h" #include "olap/data_dir.h" -#include "olap/tablet_meta_manager.h" #include "olap/olap_define.h" +#include "olap/options.h" +#include "olap/rowset/segment_v2/binary_plain_page.h" +#include "olap/rowset/segment_v2/column_reader.h" +#include "olap/tablet_meta_manager.h" #include "olap/tablet_meta.h" #include "olap/utils.h" -#include "json2pb/pb_to_json.h" -#include "gutil/strings/split.h" -#include "gutil/strings/numbers.h" +#include "util/coding.h" +#include "util/crc32c.h" +#include "util/file_utils.h" using boost::filesystem::path; using doris::DataDir; @@ -44,6 +52,17 @@ using doris::Status; using doris::TabletMeta; using doris::TabletMetaManager; using doris::FileUtils; +using doris::Slice; +using doris::RandomAccessFile; +using strings::Substitute; +using doris::segment_v2::SegmentFooterPB; +using doris::segment_v2::ColumnReader; +using doris::segment_v2::BinaryPlainPageDecoder; +using doris::segment_v2::PageHandle; +using doris::segment_v2::PagePointer; +using doris::segment_v2::ColumnReaderOptions; +using doris::segment_v2::ColumnIteratorOptions; +using doris::segment_v2::PageFooterPB; const std::string HEADER_PREFIX = "tabletmeta_"; @@ -56,6 +75,7 @@ DEFINE_int32(schema_hash, 0, "schema_hash for tablet meta"); DEFINE_string(json_meta_path, "", "absolute json meta file path"); DEFINE_string(pb_meta_path, "", "pb meta file path"); DEFINE_string(tablet_file, "", "file to save a set of tablets"); +DEFINE_string(file, "", "segment file path"); std::string get_usage(const std::string& progname) { std::stringstream ss; @@ -71,6 +91,7 @@ std::string get_usage(const std::string& progname) { "--schema_hash=schemahash\n"; ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n"; ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; + ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n"; return ss.str(); } @@ -235,6 +256,77 @@ void batch_delete_meta(const std::string& tablet_file) { return; } +Status get_segment_footer(RandomAccessFile* input_file, SegmentFooterPB* footer) { + // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) + std::string file_name = input_file->file_name(); + uint64_t file_size; + RETURN_IF_ERROR(input_file->size(&file_size)); + + if (file_size < 12) { + return Status::Corruption(Substitute("Bad segment file $0: file size $1 < 12", file_name, file_size)); + } + + uint8_t fixed_buf[12]; + RETURN_IF_ERROR(input_file->read_at(file_size - 12, Slice(fixed_buf, 12))); + + // validate magic number + const char* k_segment_magic = "D0R1"; + const uint32_t k_segment_magic_length = 4; + if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) { + return Status::Corruption(Substitute("Bad segment file $0: magic number not match", file_name)); + } + + // read footer PB + uint32_t footer_length = doris::decode_fixed32_le(fixed_buf); + if (file_size < 12 + footer_length) { + return Status::Corruption( + Substitute("Bad segment file $0: file size $1 < $2", file_name, file_size, 12 + footer_length)); + } + std::string footer_buf; + footer_buf.resize(footer_length); + RETURN_IF_ERROR(input_file->read_at(file_size - 12 - footer_length, footer_buf)); + + // validate footer PB's checksum + uint32_t expect_checksum = doris::decode_fixed32_le(fixed_buf + 4); + uint32_t actual_checksum = doris::crc32c::Value(footer_buf.data(), footer_buf.size()); + if (actual_checksum != expect_checksum) { + return Status::Corruption( + Substitute("Bad segment file $0: footer checksum not match, actual=$1 vs expect=$2", + file_name, actual_checksum, expect_checksum)); + } + + // deserialize footer PB + if (!footer->ParseFromString(footer_buf)) { + return Status::Corruption(Substitute("Bad segment file $0: failed to parse SegmentFooterPB", file_name)); + } + return Status::OK(); +} + +void show_segment_footer(const std::string& file_name) { + std::unique_ptr input_file; + Status status = doris::Env::Default()->new_random_access_file(file_name, &input_file); + if (!status.ok()) { + std::cout << "open file failed: " << status.to_string() << std::endl; + return; + } + SegmentFooterPB footer; + status = get_segment_footer(input_file.get(), &footer); + if (!status.ok()) { + std::cout << "get footer failed: " << status.to_string() << std::endl; + return; + } + std::string json_footer; + json2pb::Pb2JsonOptions json_options; + json_options.pretty_json = true; + bool ret = json2pb::ProtoMessageToJson(footer, &json_footer, json_options); + if (!ret) { + std::cout << "Convert PB to json failed" << std::endl; + return; + } + std::cout << json_footer << std::endl; + return; +} + int main(int argc, char** argv) { std::string usage = get_usage(argv[0]); gflags::SetUsageMessage(usage); @@ -252,6 +344,12 @@ int main(int argc, char** argv) { } batch_delete_meta(tablet_file); + } else if (FLAGS_operation == "show_segment_footer") { + if (FLAGS_file == "") { + std::cout << "no file flag for show dict" << std::endl; + return -1; + } + show_segment_footer(FLAGS_file); } else { // operations that need root path should be written here std::set valid_operations = {"get_meta", "load_meta", @@ -276,7 +374,7 @@ int main(int argc, char** argv) { } else if (FLAGS_operation == "delete_meta") { delete_meta(data_dir.get()); } else { - std::cout << "invalid operation:" << FLAGS_operation << "\n" + std::cout << "invalid operation: " << FLAGS_operation << "\n" << usage << std::endl; return -1; } diff --git a/docs/en/administrator-guide/operation/tablet-meta-tool.md b/docs/en/administrator-guide/operation/tablet-meta-tool.md index 7cc309fc7b4b00..680519a13ca5d4 100644 --- a/docs/en/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/en/administrator-guide/operation/tablet-meta-tool.md @@ -71,7 +71,7 @@ If the final query is successful, the Tablet Meta will be returned as json. Get Tablet Meta on a disk based on the meta\ tool tool. -Order: +Command: ``` ./lib/meta_tool --root_path=/path/to/root_path --operation=get_meta --tablet_id=xxx --schema_hash=xxx @@ -85,7 +85,7 @@ The result is also a presentation of Tablet Meta in JSON format. The function of loading header is provided to realize manual migration of tablet. This function is based on Tablet Meta in JSON format, so if changes in the shard field and version information are involved, they can be changed directly in the JSON content of Tablet Meta. Then use the following commands to load. -Order: +Command: ``` ./lib/meta_tool --operation=load_meta --root_path=/path/to/root_path --json_header_path=path @@ -130,8 +130,18 @@ Batch delete will skip the line with incorrect tablet information format in `tab This command is to view the old file-based management PB format Tablet Meta, and to display Tablet Meta in JSON format. -Order: +Command: ``` ./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path ``` + +### Segment meta in Pb format + +This command is to view the PB format segment meta, and to display segment meta in JSON format. + +Command: + +``` +./meta_tool --operation=show_segment_footer --file=/path/to/segment/file +``` diff --git a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md b/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md index b44588f0ffd48e..386b5bb5330e49 100644 --- a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md @@ -136,4 +136,13 @@ api: ./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path ``` +### 展示 pb 格式的 Segment meta + +这个命令是为了查看SegmentV2 的segment meta信息,以json 形式展示出来 + +命令: + +``` +./meta_tool --operation=show_segment_footer --file=/path/to/segment/file +