diff --git a/build.sh b/build.sh index 9f8614df4ee33b..b0078fdd67eade 100755 --- a/build.sh +++ b/build.sh @@ -651,6 +651,7 @@ if [[ "${BUILD_CLOUD}" -eq 1 ]]; then -DMAKE_TEST=OFF \ "${CMAKE_USE_CCACHE}" \ -DUSE_LIBCPP="${USE_LIBCPP}" \ + -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \ -DSTRIP_DEBUG_INFO="${STRIP_DEBUG_INFO}" \ -DUSE_DWARF="${USE_DWARF}" \ -DUSE_JEMALLOC="${USE_JEMALLOC}" \ diff --git a/cloud/CMakeLists.txt b/cloud/CMakeLists.txt index 575e55f297f34a..02f883339901b7 100644 --- a/cloud/CMakeLists.txt +++ b/cloud/CMakeLists.txt @@ -266,15 +266,19 @@ include_directories( ${GPERFTOOLS_HOME}/include ) -if ("${DORIS_JAVA_HOME}" STREQUAL "") - set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}") -endif() +option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON) +if (ENABLE_HDFS_STORAGE_VAULT) + add_compile_definitions(ENABLE_HDFS_STORAGE_VAULT) + if ("${DORIS_JAVA_HOME}" STREQUAL "") + set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}") + endif() -include_directories(${DORIS_JAVA_HOME}/include) -if (NOT OS_MACOSX) - include_directories(${DORIS_JAVA_HOME}/include/linux) -else() - include_directories(${DORIS_JAVA_HOME}/include/darwin) + include_directories(${DORIS_JAVA_HOME}/include) + if (NOT OS_MACOSX) + include_directories(${DORIS_JAVA_HOME}/include/linux) + else() + include_directories(${DORIS_JAVA_HOME}/include/darwin) + endif() endif() set(WL_START_GROUP "-Wl,--start-group") @@ -316,10 +320,6 @@ set(DORIS_DEPENDENCIES message(STATUS "DORIS_DEPENDENCIES is ${DORIS_DEPENDENCIES}") -if ("${DORIS_JAVA_HOME}" STREQUAL "") - set(DORIS_JAVA_HOME "$ENV{JAVA_HOME}") -endif() - # Add all external dependencies. They should come after the project's libs. # static link gcc's lib set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} @@ -337,9 +337,14 @@ set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} -static-libstdc++ -static-libgcc -lresolv - -L${DORIS_JAVA_HOME}/lib/server - -ljvm ) + +if (ENABLE_HDFS_STORAGE_VAULT) + set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} + -L${DORIS_JAVA_HOME}/lib/server + -ljvm) +endif() + if (NOT (USE_LIBCPP AND COMPILER_CLANG)) set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS} -lstdc++fs) endif() diff --git a/cloud/script/start.sh b/cloud/script/start.sh index 506a279ad78250..befe0a9f2e9df8 100644 --- a/cloud/script/start.sh +++ b/cloud/script/start.sh @@ -54,8 +54,15 @@ if [[ ${RUN_RECYCLYER} -eq 1 ]]; then fi # echo "$@" "daemonized=${daemonized}"} -# export env variables from doris_cloud.conf -# read from doris_cloud.conf +custom_start="${DORIS_HOME}/bin/custom_start.sh" +if [[ -f "${custom_start}" ]]; then + source "${custom_start}" +fi +enable_hdfs=${enable_hdfs:-1} +process_name="${process_name:-doris_cloud}" + +# export env variables from ${process_name}.conf +# read from ${process_name}.conf while read -r line; do envline="$(echo "${line}" | sed 's/[[:blank:]]*=[[:blank:]]*/=/g' | @@ -66,7 +73,7 @@ while read -r line; do if [[ "${envline}" == *"="* ]]; then eval 'export "${envline}"' fi -done <"${DORIS_HOME}/conf/doris_cloud.conf" +done <"${DORIS_HOME}/conf/${process_name}.conf" role='' if [[ ${RUN_METASERVICE} -eq 0 ]] && [[ ${RUN_RECYCLYER} -eq 0 ]]; then @@ -78,53 +85,59 @@ elif [[ ${RUN_METASERVICE} -eq 0 ]] && [[ ${RUN_RECYCLYER} -eq 1 ]]; then elif [[ ${RUN_METASERVICE} -eq 1 ]] && [[ ${RUN_RECYCLYER} -eq 1 ]]; then role='MetaService and Recycler' fi -process=doris_cloud -if [[ ${RUN_VERSION} -eq 0 ]] && [[ -f "${DORIS_HOME}/bin/${process}.pid" ]]; then - pid=$(cat "${DORIS_HOME}/bin/${process}.pid") +if [[ ${RUN_VERSION} -eq 0 ]] && [[ -f "${DORIS_HOME}/bin/${process_name}.pid" ]]; then + pid=$(cat "${DORIS_HOME}/bin/${process_name}.pid") if [[ "${pid}" != "" ]]; then - if kill -0 "$(cat "${DORIS_HOME}/bin/${process}.pid")" >/dev/null 2>&1; then + if kill -0 "$(cat "${DORIS_HOME}/bin/${process_name}.pid")" >/dev/null 2>&1; then echo "pid file existed, ${role} have already started, pid=${pid}" exit 1 fi fi echo "pid file existed but process not alive, remove it, pid=${pid}" - rm -f "${DORIS_HOME}/bin/${process}.pid" + rm -f "${DORIS_HOME}/bin/${process_name}.pid" fi lib_path="${DORIS_HOME}/lib" -bin="${DORIS_HOME}/lib/doris_cloud" +bin="${DORIS_HOME}/lib/${process_name}" export LD_LIBRARY_PATH="${lib_path}:${LD_LIBRARY_PATH}" -chmod 550 "${DORIS_HOME}/lib/doris_cloud" +chmod 550 "${DORIS_HOME}/lib/${process_name}" -if [[ -z "${JAVA_HOME}" ]]; then - echo "The JAVA_HOME environment variable is not defined correctly" - echo "This environment variable is needed to run this program" - echo "NB: JAVA_HOME should point to a JDK not a JRE" - echo "You can set it in doris_cloud.conf" - exit 1 -fi +if [[ ${enable_hdfs} -eq 1 ]]; then + if [[ -z "${JAVA_HOME}" ]]; then + echo "The JAVA_HOME environment variable is not defined correctly" + echo "This environment variable is needed to run this program" + echo "NB: JAVA_HOME should point to a JDK not a JRE" + echo "You can set it in doris_cloud.conf" + exit 1 + fi -if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then - # add hadoop libs - for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do - DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" - done - for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do - DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" - done - for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do - DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" - done - for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do - DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" - done -fi + if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then + # add hadoop libs + for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do + DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/common/lib"/*.jar; do + DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs"/*.jar; do + DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" + done + for f in "${DORIS_HOME}/lib/hadoop_hdfs/hdfs/lib"/*.jar; do + DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" + done + fi + + export CLASSPATH="${DORIS_CLASSPATH}" -export CLASSPATH="${DORIS_CLASSPATH}" + export LD_LIBRARY_PATH="${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}" -export LD_LIBRARY_PATH="${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}" + ## set libhdfs3 conf + if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then + export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml" + fi +fi # filter known leak export LSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/lsan_suppr.conf @@ -136,13 +149,6 @@ export UBSAN_OPTIONS=suppressions=${DORIS_HOME}/conf/ubsan_suppr.conf export ASAN_OPTIONS=symbolize=1:abort_on_error=1:disable_coredump=0:unmap_shadow_on_exit=1:detect_container_overflow=0:check_malloc_usable_size=0:${ASAN_OPTIONS} export UBSAN_OPTIONS=print_stacktrace=1:${UBSAN_OPTIONS} -## set libhdfs3 conf -if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then - export LIBHDFS3_CONF="${DORIS_HOME}/conf/hdfs-site.xml" -fi - -# echo "LIBHDFS3_CONF=${LIBHDFS3_CONF}" - # to enable dump jeprof heap stats prodigally, change `prof_active:false` to `prof_active:true` or curl http://be_host:be_webport/jeheap/prof/true # to control the dump interval change `lg_prof_interval` to a specific value, it is pow/exponent of 2 in size of bytes, default 34 means 2 ** 34 = 16GB # to control the dump path, change `prof_prefix` to a specific path, e.g. /doris_cloud/log/ms_, by default it dumps at the path where the start command called @@ -155,7 +161,7 @@ fi mkdir -p "${DORIS_HOME}/log" echo "$(date +'%F %T') start with args: $*" -out_file=${DORIS_HOME}/log/${process}.out +out_file=${DORIS_HOME}/log/${process_name}.out if [[ "${RUN_DAEMON}" -eq 1 ]]; then # append 10 blank lines to ensure the following tail -n10 works correctly printf "\n\n\n\n\n\n\n\n\n\n" >>"${out_file}" diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index 67bd81c10a65f0..b146e339760699 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -359,6 +359,17 @@ bool normalize_hdfs_fs_name(std::string& fs_name) { static int add_hdfs_storage_vault(InstanceInfoPB& instance, Transaction* txn, StorageVaultPB& hdfs_param, MetaServiceCode& code, std::string& msg) { +#ifndef ENABLE_HDFS_STORAGE_VAULT + code = MetaServiceCode::INVALID_ARGUMENT; + msg = fmt::format( + "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), " + "but HDFS storage vaults were detected: {}", + hdfs_param.name()); + LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), " + << "but HDFS storage vaults were detected: " << hdfs_param.name(); + return -1; +#endif + if (!hdfs_param.has_hdfs_info()) { code = MetaServiceCode::INVALID_ARGUMENT; msg = fmt::format("vault_name={} passed invalid argument", hdfs_param.name()); diff --git a/cloud/src/recycler/CMakeLists.txt b/cloud/src/recycler/CMakeLists.txt index 6dbb8a0d696423..12dc735185386b 100644 --- a/cloud/src/recycler/CMakeLists.txt +++ b/cloud/src/recycler/CMakeLists.txt @@ -9,6 +9,10 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lfdb_c -L${THIRDPARTY_DIR file(GLOB_RECURSE SRC_LIST CONFIGURE_DEPENDS *.cpp) +if (NOT ENABLE_HDFS_STORAGE_VAULT) + list(REMOVE_ITEM SRC_LIST ${CMAKE_CURRENT_SOURCE_DIR}/hdfs_accessor.cpp) +endif() + if(BUILD_AZURE STREQUAL "OFF") list(REMOVE_ITEM SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/azure_obj_client.cpp") endif() diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp index b3a88d2005a7bc..f75d19f4772302 100644 --- a/cloud/src/recycler/checker.cpp +++ b/cloud/src/recycler/checker.cpp @@ -51,7 +51,9 @@ #include "meta-store/keys.h" #include "meta-store/txn_kv.h" #include "meta-store/txn_kv_error.h" +#ifdef ENABLE_HDFS_STORAGE_VAULT #include "recycler/hdfs_accessor.h" +#endif #include "recycler/s3_accessor.h" #include "recycler/storage_vault_accessor.h" #ifdef UNIT_TEST @@ -451,6 +453,7 @@ int InstanceChecker::init_storage_vault_accessors(const InstanceInfoPB& instance TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault", &accessor_map_, &vault); if (vault.has_hdfs_info()) { +#ifdef ENABLE_HDFS_STORAGE_VAULT auto accessor = std::make_shared(vault.hdfs_info()); int ret = accessor->init(); if (ret != 0) { @@ -460,6 +463,10 @@ int InstanceChecker::init_storage_vault_accessors(const InstanceInfoPB& instance } accessor_map_.emplace(vault.id(), std::move(accessor)); +#else + LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), " + << "but HDFS storage vaults were detected"; +#endif } else if (vault.has_obj_info()) { #ifdef UNIT_TEST auto accessor = std::make_shared(); diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index 17bf92c08aae24..9f789cf48e8c12 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -47,7 +47,9 @@ #include "meta-store/txn_kv.h" #include "meta-store/txn_kv_error.h" #include "recycler/checker.h" +#ifdef ENABLE_HDFS_STORAGE_VAULT #include "recycler/hdfs_accessor.h" +#endif #include "recycler/s3_accessor.h" #include "recycler/storage_vault_accessor.h" #ifdef UNIT_TEST @@ -591,6 +593,7 @@ int InstanceRecycler::init_storage_vault_accessors() { TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault", &accessor_map_, &vault); if (vault.has_hdfs_info()) { +#ifdef ENABLE_HDFS_STORAGE_VAULT auto accessor = std::make_shared(vault.hdfs_info()); int ret = accessor->init(); if (ret != 0) { @@ -603,6 +606,10 @@ int InstanceRecycler::init_storage_vault_accessors() { << " resource_id=" << vault.id() << " name=" << vault.name() << " hdfs_vault=" << vault.hdfs_info().ShortDebugString(); accessor_map_.emplace(vault.id(), std::move(accessor)); +#else + LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), " + << "but HDFS storage vaults were detected"; +#endif } else if (vault.has_obj_info()) { auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info()); if (!s3_conf) { diff --git a/cloud/test/CMakeLists.txt b/cloud/test/CMakeLists.txt index d7bed4ad311bab..4208eb7019f0a7 100644 --- a/cloud/test/CMakeLists.txt +++ b/cloud/test/CMakeLists.txt @@ -53,7 +53,11 @@ add_executable(s3_accessor_test s3_accessor_test.cpp) add_executable(s3_accessor_mock_test s3_accessor_mock_test.cpp) -add_executable(hdfs_accessor_test hdfs_accessor_test.cpp) +option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON) +if (ENABLE_HDFS_STORAGE_VAULT) + add_compile_definitions(ENABLE_HDFS_STORAGE_VAULT) + add_executable(hdfs_accessor_test hdfs_accessor_test.cpp) +endif() add_executable(stopwatch_test stopwatch_test.cpp) @@ -94,7 +98,10 @@ target_link_libraries(s3_accessor_test ${TEST_LINK_LIBS}) target_link_libraries(s3_accessor_mock_test ${TEST_LINK_LIBS}) -target_link_libraries(hdfs_accessor_test ${TEST_LINK_LIBS}) +option(ENABLE_HDFS_STORAGE_VAULT "Enable HDFS storage support" ON) +if (ENABLE_HDFS_STORAGE_VAULT) + target_link_libraries(hdfs_accessor_test ${TEST_LINK_LIBS}) +endif() target_link_libraries(stopwatch_test ${TEST_LINK_LIBS}) diff --git a/run-cloud-ut.sh b/run-cloud-ut.sh index c74eee7b5f3eb4..62f84c1b6eb88c 100755 --- a/run-cloud-ut.sh +++ b/run-cloud-ut.sh @@ -192,6 +192,7 @@ find . -name "*.gcda" -exec rm {} \; -DGLIBC_COMPATIBILITY="${GLIBC_COMPATIBILITY}" \ -DUSE_LIBCPP="${USE_LIBCPP}" \ -DUSE_DWARF="${USE_DWARF}" \ + -DENABLE_HDFS_STORAGE_VAULT=${ENABLE_HDFS_STORAGE_VAULT:-ON} \ -DUSE_MEM_TRACKER=ON \ -DUSE_JEMALLOC=OFF \ -DSTRICT_MEMORY_USE=OFF \