diff --git a/VERSION b/VERSION index ca222b7cf394..16eb94e711f8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.23.0 +0.21.3 diff --git a/analytical_engine/java/pom.xml b/analytical_engine/java/pom.xml index 31bea3c28efc..0b302c340a57 100644 --- a/analytical_engine/java/pom.xml +++ b/analytical_engine/java/pom.xml @@ -64,7 +64,7 @@ - 0.23.0 + 0.21.3 0.1.2 0.19 3.3.11 diff --git a/charts/graphscope-store/Chart.yaml b/charts/graphscope-store/Chart.yaml index 4377550f38dc..54d3c9544485 100644 --- a/charts/graphscope-store/Chart.yaml +++ b/charts/graphscope-store/Chart.yaml @@ -19,7 +19,7 @@ sources: maintainers: - name: GraphScope url: https://github.com/alibaba/graphscope -version: 0.23.0 +version: 0.21.3 dependencies: - condition: kafka.enabled diff --git a/charts/graphscope/Chart.yaml b/charts/graphscope/Chart.yaml index 8e2302f278b1..73ea93e47881 100644 --- a/charts/graphscope/Chart.yaml +++ b/charts/graphscope/Chart.yaml @@ -26,10 +26,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.23.0 +version: 0.21.3 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.23.0" +appVersion: "0.21.3" diff --git a/flex/CMakeLists.txt b/flex/CMakeLists.txt index 3d7554c850ed..28d6454d2701 100644 --- a/flex/CMakeLists.txt +++ b/flex/CMakeLists.txt @@ -70,7 +70,9 @@ add_subdirectory(engines) add_subdirectory(bin) -set(CPACK_PACKAGE_NAME "graphscope_flex") +if (NOT DEFINED CPACK_PACKAGE_NAME) + set(CPACK_PACKAGE_NAME "graphscope_flex") +endif () set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Flex module of GraphScope") set(CPACK_PACKAGE_VENDOR "GraphScope") set(CPACK_PACKAGE_VERSION ${FLEX_VERSION}) diff --git a/flex/README.md b/flex/README.md new file mode 100644 index 000000000000..7c091ab2196e --- /dev/null +++ b/flex/README.md @@ -0,0 +1,102 @@ +# GraphScope Flex: A Graph Computing Stack with LEGO-Like Modularity + +### Introduction + +Graph applications in real life have diverse workloads, programming interfaces, and storage formats. GraphScope is a one-stop solution that addresses these variations. However, challenges remain due to: +- Various deployment modes are needed, such as an offline analytical pipeline for low latency, online services requiring high throughput, or a learning task benefiting from heterogeneous hardware. +- An all-inclusive solution may not be the best option as it could lead to increased resource and cost requirements. + +GraphScope Flex is an ongoing evolution of GraphScope. It champions a modular design that diminishes resource and cost requirements while delivering a seamless, user-friendly interface for flexible deployment. Presently, GraphScope Flex is actively being developed. + +### Architecture + +
+ GraphScope Flex architecture +
+ +The GraphScope Flex stack (as shown in the figure), consists of multiple components that users can combine like LEGO bricks to customize their graph computing deployments. The components are classified into three layers: +- Application Layer, which includes pre-built libraries of algorithms and GNN models, as well as SDKs and APIs; +- Execution Layer, which comprises multiple engines that are specialized for their respective domains; +- Storage Layer, which establishes a uniform interface for managing graph data across various storage backends. + +## How to Build + +### Dependencies + +Please use `script/install_deps.sh` to install dependencies. +Alternatively, you can mannully install a subset of dependencies required by your selected components. + +Please refer to [scripts/install_deps.sh](https://github.com/alibaba/GraphScope/blob/main/flex/scripts/install_dependencies.sh) for the full list of dependencies. + +### Building + +GraphScope Flex comes with a useful script `flexbuild` that allows you to build a customized stack using specific components. `flexbuild` has some parameters and two of them are critical for building: + +- argument `COMPONENTS` specifies which "lego bricks" you want to select. The available components are illustrated in figure above or listed in the `--help` section. +- flag `--app` specifies the applicatin type of the built artifacts you want to build. The available types are `db`, `olap`, `ldbcdriver`, `docker`(WIP). + +By selecting and combining the components that best suit your requirements, you can use the `flexbuild` script to create a tailored deployment of GraphScope Flex for your specific use case. + +Please use `flexbuild --help` to learn more. + +## User Cases and Examples + +### Case 1: For online BI analysis + +
+ GraphScope Flex usecase-1 +
+ +BI analysis is for analysts who interactively analyze data in a WebUI. While high concurrency is unlikely, low latency for complex queries is crucial. + +GraphScope Flex compiles Cypher and Gremlin queries into a **unified intermediate representation (IR)** and optimizes it using a **universal query optimizer** and **catalog** module. The optimized IR is passed to **Gaia Codegen** and executed on **Gaia**, a distributed dataflow engine that reduces query latency through data parallelism. Graph data is accessed from a mutable csr-based persistent storage via a unified interface. + +To build the artifacts for this use case, run the following command: +```bash +./flexbuild cypher gaia cppsp mcsr --app db +# To be supported. Please try scripts for other cases. +``` + +### Case 2: For high QPS queries + +
+ GraphScope Flex usecase-2 +
+ +In some service scenarios, e.g., recommendation or searching, the graph queries are coming at an extremely high rate and demands high throughput. In these scenarios, GraphScope Flex can be deployed with a different component set. The **compiler** generates an optimized query plan and **Hiactor Codegen** produces a physical plan tailored for **Hiactor**, a high-performance and concurrent actor framework for OLTP-like queries. + +To build the artifacts for this use case, run the following command: +```bash +./flexbuild hiactor cppsp mcsr --app db +``` + +Please note that we use the artifacts built by this command for LDBC SNB benchmarking. + + +### Case 3: For offline graph analytics + +
+ GraphScope Flex usecase-3 +
+ +GraphScope Flex is an efficient and user-friendly platform for performing graph analytics. It offers **built-in algorithms**, as well as **interfaces** for developing customized algorithms. The runtime, based on **GRAPE**, is fragment-centric and extensible, supporting multiple programming models like **FLASH**, **PIE**, and **Pregel**. Sequential algorithms can be easily parallelized or incrementalized using the **Ingress** component. To achieve high performance, an **in-memory graph store** is deployed in this stack. + +To build the artifacts for this use case, run the following command: +```bash +./flexbuild builtin grape-cpu --app olap +# or +./flexbuild builtin grape-gpu --app ldbcdriver +``` + +### Case 4: For graph learning tasks + +
+ GraphScope Flex usecase-4 +
+ +GraphScope Flex's GNN framework supports billion-scale graphs in industrial scenarios. It provides GNN model development paradigms, **example models**, and the flexibility to choose between **TensorFlow** or **PyTorch** as the training backend. Furthermore, the framework employs decoupled sampling and training processes, which can be independently scaled for optimal end-to-end throughput, providing superior performance. + +To build the artifacts for this use case, run the following command: +```bash +./flexbuild gnnmodels graphlearn tensorflow vineyard --app gnn +``` diff --git a/flex/flexbuild b/flex/flexbuild new file mode 100755 index 000000000000..df096c8188f7 --- /dev/null +++ b/flex/flexbuild @@ -0,0 +1,426 @@ +#!/usr/bin/env bash +# This script was generated by bashly 0.8.10 (https://bashly.dannyb.co) +# Modifying it manually is not recommended + +# :wrapper.bash3_bouncer +if [[ "${BASH_VERSINFO:-0}" -lt 4 ]]; then + printf "bash version 4 or higher is required\n" >&2 + exit 1 +fi + +# :command.master_script +# :command.root_command +root_command() { + # src/root_command.sh + # echo "# this file is located in 'src/root_command.sh'" + # echo "# you can edit it freely and regenerate (it will not be overwritten)" + # inspect_args + + function build_grape_cpu { + target_name=$1 + + tmp_dir=`mktemp -d` + pushd ${tmp_dir} > /dev/null + cmake_options="" + # if [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then + if [[ $(uname -s) == Linux* ]];then + # Do something under GNU/Linux platform + if grep -q avx512 /proc/cpuinfo; then + cmake_options="${cmake_options} -DUSE_SIMD=ON" + fi + if grep -q HugePages_Total /proc/meminfo; then + hpn=`grep HugePages_Total /proc/meminfo | cut -d ':' -f 2` + if [[ $hpn -gt 0 ]]; then + cmake_options="${cmake_options} -DUSE_HUGEPAGES=ON" + fi + fi + fi + cmd="git clone https://github.com/alibaba/libgrape-lite.git && cd libgrape-lite && git submodule update --init --recursive && mkdir build && cd build && cmake ${cmake_options} .. && make analytical_apps -j && mv run_app ${target_name}" + echo $cmd + eval $cmd + popd > /dev/null + } + + function build_grape_gpu { + target_name=$1 + if nvidia-smi &> /dev/null; then + tmp_dir=`mktemp -d` + pushd ${tmp_dir} > /dev/null + cmd="git clone https://github.com/alibaba/libgrape-lite.git && cd libgrape-lite && git submodule update --init --recursive && mkdir build && cd build && cmake .. && make gpu_analytical_apps -j && mv run_cuda_app ${target_name}" + echo $cmd + eval $cmd + popd > /dev/null + else + echo "Building libgrape-gpu failed: GPU is not found" + fi + } + + function build_grape_ldbc_driver { + package_name=$1 + + tmp_dir=`mktemp -d` + pushd ${tmp_dir} > /dev/null + cmd="git clone https://github.com/alibaba/libgrape-lite.git && cd libgrape-lite/ldbc_driver/ && mvn package && mv graphalytics-*-bin.tar.gz ${package_name}" + echo $cmd + eval $cmd + popd > /dev/null + } + + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + + ars=() + + output_dir=`realpath ${args[--output_dir]}` + mkdir -p ${output_dir} + + if [ ${args[--app]} == "db" ]; then + package_name="graphscope_flex_${args[--app]}" + comps="" + eval "comps=(${args[components]})" + for i in "${comps[@]}"; do + package_name="${package_name}_${i}" + done + build_dir=`mktemp -d` + pushd $build_dir > /dev/null + cmd="cmake -DCPACK_PACKAGE_NAME=${package_name} ${SCRIPT_DIR} && make -j && make package && mv ${package_name}*.deb ${output_dir}/" + echo $cmd + eval $cmd + fname=`ls ${output_dir}/*.deb` + ars+=("${fname}") + popd > /dev/null + elif [ ${args[--app]} == "olap" ]; then + target_name="graphscope_flex_${args[--app]}" + comps="" + eval "comps=(${args[components]})" + for i in "${comps[@]}"; do + target_name="${target_name}_${i}" + done + if [[ " ${comps[*]} " =~ " grape-cpu " ]]; then + build_grape_cpu ${output_dir}/${target_name} + elif [[ " ${comps[*]} " =~ " grape-gpu " ]]; then + build_grape_gpu ${output_dir}/${target_name} + fi + ars+=("${target_name}") + elif [ ${args[--app]} == "ldbcdriver" ]; then + target_name="graphscope_flex_olap" + comps="" + eval "comps=(${args[components]})" + for i in "${comps[@]}"; do + target_name="${target_name}_${i}" + done + if [[ " ${comps[*]} " =~ " grape-cpu " ]]; then + build_grape_cpu ${output_dir}/${target_name} + elif [[ " ${comps[*]} " =~ " grape-gpu " ]]; then + build_grape_gpu ${output_dir}/${target_name} + fi + + package_name="graphscope_flex_ldbcdriver" + for i in "${comps[@]}"; do + package_name="${package_name}_${i}" + done + package_name="${package_name}-SNAPSHOT-bin.tar.gz" + + build_grape_ldbc_driver ${output_dir}/${package_name} + ars+=("${package_name}") + + elif [ ${args[--app]} == "gnn" ]; then + target_name="graphscope_flex_gnn_gnnmodels_graphlearn_tensorflow_vineyard" + pushd ${SCRIPT_DIR}/../ > /dev/null + make learning + mv learning_engine/graph-learn/graphlearn/built/lib/libgraphlearn_shared* ${output_dir}/ + fname=`ls ${output_dir}/libgraphlearn_shared*` + ext="${fname##*.}" + mv ${fname} ${output_dir}/${target_name}.${ext} + ars+=("${target_name}.${ext}") + popd > /dev/null + else + echo "$(red Unsupported composition: ${args[--app]} and ${args[components]})" + fi + + echo "$(green_bold GraphScope Flex artifacts for customized deployment are built.)" + echo + + echo "artifacts: " + for x in "${ars[@]}"; do + echo "$(green $x)" + done + +} + +# :command.version_command +version_command() { + echo "$version" +} + +# :command.usage +flexbuild_usage() { + if [[ -n $long_usage ]]; then + printf "flexbuild - Utility script to build artifacts for GraphScope Flex\n" + echo + + else + printf "flexbuild - Utility script to build artifacts for GraphScope Flex\n" + echo + + fi + + printf "Usage:\n" + printf " flexbuild COMPONENTS... [OPTIONS]\n" + printf " flexbuild --help | -h\n" + printf " flexbuild --version | -v\n" + echo + + # :command.long_usage + if [[ -n $long_usage ]]; then + printf "Options:\n" + + # :command.usage_fixed_flags + echo " --help, -h" + printf " Show this help\n" + echo + echo " --version, -v" + printf " Show version number\n" + echo + + # :command.usage_flags + # :flag.usage + echo " --app, -a ATYPE (required)" + printf " Application type\n" + printf " Allowed: db, olap, ldbcdriver, gnn\n" + echo + + # :flag.usage + echo " --output_dir, -o NAME" + printf " Output prefix\n" + printf " Default: .\n" + echo + + # :command.usage_args + printf "Arguments:\n" + + # :argument.usage + echo " COMPONENTS..." + printf " Components (LEGO bricks) to include and link\n" + printf " Allowed: cppsp, builtin, hiactor, gaia, grape-cpu, grape-gpu, vineyard, mcsr, gnnmodels, graphlearn, tensorflow\n" + echo + + # :command.usage_examples + printf "Examples:\n" + printf " ./flexbuild cppsp hiactor mcsr --app db --output_dir ./output\n" + printf " ./flexbuild builtin grape-cpu --app olap\n" + printf " ./flexbuild builtin grape-gpu --app ldbcdriver\n" + echo + + fi +} + +# :command.normalize_input +normalize_input() { + local arg flags + + while [[ $# -gt 0 ]]; do + arg="$1" + if [[ $arg =~ ^(--[a-zA-Z0-9_\-]+)=(.+)$ ]]; then + input+=("${BASH_REMATCH[1]}") + input+=("${BASH_REMATCH[2]}") + elif [[ $arg =~ ^(-[a-zA-Z0-9])=(.+)$ ]]; then + input+=("${BASH_REMATCH[1]}") + input+=("${BASH_REMATCH[2]}") + elif [[ $arg =~ ^-([a-zA-Z0-9][a-zA-Z0-9]+)$ ]]; then + flags="${BASH_REMATCH[1]}" + for (( i=0 ; i < ${#flags} ; i++ )); do + input+=("-${flags:i:1}") + done + else + input+=("$arg") + fi + + shift + done +} +# :command.inspect_args +inspect_args() { + readarray -t sorted_keys < <(printf '%s\n' "${!args[@]}" | sort) + if (( ${#args[@]} )); then + echo args: + for k in "${sorted_keys[@]}"; do echo "- \${args[$k]} = ${args[$k]}"; done + else + echo args: none + fi + + if (( ${#other_args[@]} )); then + echo + echo other_args: + echo "- \${other_args[*]} = ${other_args[*]}" + for i in "${!other_args[@]}"; do + echo "- \${other_args[$i]} = ${other_args[$i]}" + done + fi +} + +# :command.user_lib +# src/lib/colors.sh +print_in_color() { + local color="$1" + shift + if [[ -z ${NO_COLOR+x} ]]; then + printf "$color%b\e[0m\n" "$*"; + else + printf "%b\n" "$*"; + fi +} + +red() { print_in_color "\e[31m" "$*"; } +green() { print_in_color "\e[32m" "$*"; } +yellow() { print_in_color "\e[33m" "$*"; } +blue() { print_in_color "\e[34m" "$*"; } +magenta() { print_in_color "\e[35m" "$*"; } +cyan() { print_in_color "\e[36m" "$*"; } +bold() { print_in_color "\e[1m" "$*"; } +underlined() { print_in_color "\e[4m" "$*"; } +red_bold() { print_in_color "\e[1;31m" "$*"; } +green_bold() { print_in_color "\e[1;32m" "$*"; } +yellow_bold() { print_in_color "\e[1;33m" "$*"; } +blue_bold() { print_in_color "\e[1;34m" "$*"; } +magenta_bold() { print_in_color "\e[1;35m" "$*"; } +cyan_bold() { print_in_color "\e[1;36m" "$*"; } +red_underlined() { print_in_color "\e[4;31m" "$*"; } +green_underlined() { print_in_color "\e[4;32m" "$*"; } +yellow_underlined() { print_in_color "\e[4;33m" "$*"; } +blue_underlined() { print_in_color "\e[4;34m" "$*"; } +magenta_underlined() { print_in_color "\e[4;35m" "$*"; } +cyan_underlined() { print_in_color "\e[4;36m" "$*"; } + +# :command.command_functions + +# :command.parse_requirements +parse_requirements() { + # :command.fixed_flags_filter + case "${1:-}" in + --version | -v ) + version_command + exit + ;; + + --help | -h ) + long_usage=yes + flexbuild_usage + exit + ;; + + esac + + # :command.command_filter + action="root" + + # :command.parse_requirements_while + while [[ $# -gt 0 ]]; do + key="$1" + case "$key" in + # :flag.case + --app | -a ) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--app]="$2" + shift + shift + else + printf "%s\n" "--app requires an argument: --app, -a ATYPE" >&2 + exit 1 + fi + ;; + + # :flag.case + --output_dir | -o ) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--output_dir]="$2" + shift + shift + else + printf "%s\n" "--output_dir requires an argument: --output_dir, -o NAME" >&2 + exit 1 + fi + ;; + + -?* ) + printf "invalid option: %s\n" "$key" >&2 + exit 1 + ;; + + * ) + # :command.parse_requirements_case + # :command.parse_requirements_case_repeatable + if [[ -z ${args[components]+x} ]]; then + + args[components]="\"$1\"" + shift + else + args[components]="${args[components]} \"$1\"" + shift + fi + + ;; + + esac + done + + # :command.required_args_filter + if [[ -z ${args[components]+x} ]]; then + printf "missing required argument: COMPONENTS\nusage: flexbuild COMPONENTS... [OPTIONS]\n" >&2 + exit 1 + fi + + # :command.required_flags_filter + if [[ -z ${args[--app]+x} ]]; then + printf "missing required flag: --app, -a ATYPE\n" >&2 + exit 1 + fi + + # :command.default_assignments + [[ -n ${args[--output_dir]:-} ]] || args[--output_dir]="." + + # :command.whitelist_filter + eval "input_array=(${args[components]})" + for i in "${input_array[@]}"; do + if [[ ! $i =~ ^(cppsp|builtin|hiactor|gaia|grape-cpu|grape-gpu|vineyard|mcsr|gnnmodels|graphlearn|tensorflow)$ ]]; then + printf "%s\n" "components must be one of: cppsp, builtin, hiactor, gaia, grape-cpu, grape-gpu, vineyard, mcsr, gnnmodels, graphlearn, tensorflow" >&2 + exit 1 + fi + done + if [[ ! ${args[--app]} =~ ^(db|olap|ldbcdriver|gnn)$ ]]; then + printf "%s\n" "--app must be one of: db, olap, ldbcdriver, gnn" >&2 + exit 1 + fi + +} + +# :command.initialize +initialize() { + version="0.1.0" + long_usage='' + set -e + + # src/initialize.sh + +} + +# :command.run +run() { + declare -A args=() + declare -a other_args=() + declare -a input=() + normalize_input "$@" + parse_requirements "${input[@]}" + + if [[ $action == "root" ]]; then + root_command + fi +} + +initialize +run "$@" diff --git a/flex/scripts/install_dependencies.sh b/flex/scripts/install_dependencies.sh index 32dfa9fefc52..43268ccc1110 100644 --- a/flex/scripts/install_dependencies.sh +++ b/flex/scripts/install_dependencies.sh @@ -6,7 +6,7 @@ apt install -y \ libboost-all-dev libxml2-dev apt install -y xfslibs-dev libgnutls28-dev liblz4-dev maven openssl pkg-config \ libsctp-dev gcc make python3 systemtap-sdt-dev libtool libyaml-cpp-dev \ - libc-ares-dev stow libfmt-dev diffutils valgrind doxygen python3-pip net-tools + libc-ares-dev stow libfmt-dev diffutils valgrind doxygen python3-pip net-tools graphviz git clone https://github.com/alibaba/libgrape-lite.git cd libgrape-lite