Merge branch 'master' into optimize/move-join-probe-row

vesoft-inc · Jun 13, 2022 · bda2797 · bda2797
2 parents 4e1a718 + 2b35538
commit bda2797
Show file tree

Hide file tree

Showing 139 changed files with 5,683 additions and 1,029 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -1,6 +1,6 @@
 #Require an approved review in PRs including files with a designated code owner.
 /conf/ @vesoft-inc/tech-committee-reviewers
-/src/kvstore/raftex/ @critical27 @sherman-the-tank
-/cmake/ @sherman-the-tank @yixinglu @dutor
+/src/kvstore/raftex/ @critical27
+/cmake/ @vesoft-inc/tech-committee-reviewers
 *.thrift @vesoft-inc/tech-committee-reviewers
 *.yy @CPWstatic @dutor
diff --git a/.github/workflows/auto_cherry_pick.yml b/.github/workflows/auto_cherry_pick.yml
@@ -0,0 +1,29 @@
+name: Auto Cherry Pick 
+on:
+ workflow_dispatch:
+ inputs:
+ pr_label:
+ description: "Cherry pick label such as 'cherry-pick-v3.2'"
+ required: true
+
+defaults:
+ run:
+ shell: bash
+
+jobs:
+ auto-cherry-pick:
+ #if: ${{ startsWith(github.event.pull_request.labels.*.name, 'cherry-pick-') && github.event.pull_request.merged == true }}
+ runs-on: ubuntu-latest
+ container:
+ image: reg.vesoft-inc.com/dashboard/dashboard-dev:centos7
+ steps:
+ - name: keep workspace empty 
+ run: |
+ rm -rf *
+ - name: auto cherry pick
+ uses: xigongdaEricyang/cherry-pick-robot@with-python
+ with:
+ repo_token: ${{ secrets.GH_BOT_PAT }}
+ pr_label: ${{ github.event.inputs.prlabel }}
+ #pr_num: ${{ github.event.pull_request.number }}
+ auto_merge: true
diff --git a/conf/tuned/README.md b/conf/tuned/README.md
@@ -0,0 +1,8 @@
+# Summary
+
+These are tuned profile to configure the system to optimize for the Nebula Graph service.
+
+Follow below steps to utilize:
+ * Install the tuned service if absent, and enable it with `systemctl`.
+ * Copy the __nebula__ directory into `/etc/tuned`.
+ * Execute `tuned-adm profile nebula` to activate the profile.
diff --git a/conf/tuned/nebula/tuned.conf b/conf/tuned/nebula/tuned.conf
@@ -0,0 +1,32 @@
+[main]
+summary=Optimize for Nebula Graph DBMS
+include=latency-performance
+
+
+[vm]
+transparent_hugepages=never
+
+
+[sysctl]
+kernel.core_pattern=core
+kernel.core_uses_pid=1
+kernel.numa_balancing=0
+
+vm.swappiness=0
+vm.oom_dump_tasks=1
+# min_free_kbytes is suggested to set to approximately 2% of the total memory.
+# 1GB at least and 5GB at most.
+vm.min_free_kbytes=5242880
+vm.max_map_count=131060
+vm.dirty_background_ratio = 3
+vm.dirty_ratio = 20
+vm.dirty_expire_centisecs = 500
+vm.dirty_writeback_centisecs = 100
+
+net.core.busy_read=50
+net.core.busy_poll=50
+net.core.somaxconn=4096
+net.ipv4.tcp_max_syn_backlog=4096
+net.core.netdev_max_backlog=10240
+net.ipv4.tcp_fastopen=3
+net.ipv4.tcp_slow_start_after_idle=0
diff --git a/src/common/base/Arena.cpp b/src/common/base/Arena.cpp
@@ -0,0 +1,44 @@
+// Copyright (c) 2022 vesoft inc. All rights reserved.
+//
+// This source code is licensed under Apache 2.0 License.
+
+#include "common/base/Arena.h"
+
+#include <cstdint>
+
+namespace nebula {
+
+void* Arena::allocateAligned(const std::size_t alloc) {
+ DCHECK_NE(alloc, 0); // don't allow zero sized allocation
+ // replace the modulo operation by bit and
+ static_assert(kAlignment && !(kAlignment & (kAlignment - 1)), "Align must be power of 2.");
+ const std::size_t pad =
+ kAlignment - (reinterpret_cast<uintptr_t>(currentPtr_) & (kAlignment - 1));
+ const std::size_t consumption = alloc + pad;
+ if (UNLIKELY(consumption > kMaxChunkSize)) {
+ DLOG(FATAL) << "Arena can't allocate so large memory.";
+ return nullptr;
+ }
+ if (LIKELY(consumption <= availableSize_)) {
+ void* ptr = currentPtr_ + pad;
+ currentPtr_ += consumption;
+#ifndef NDEBUG
+ allocatedSize_ += consumption;
+#endif
+ availableSize_ -= consumption;
+ return ptr;
+ } else {
+ newChunk(std::max(alloc, kMinChunkSize));
+ // The new operator will allocate the aligned memory
+ DCHECK_EQ(reinterpret_cast<uintptr_t>(currentPtr_) & (kAlignment - 1), 0);
+ void* ptr = currentPtr_;
+ currentPtr_ += alloc;
+#ifndef NDEBUG
+ allocatedSize_ += alloc;
+#endif
+ availableSize_ -= alloc;
+ return ptr;
+ }
+}
+
+} // namespace nebula
diff --git a/src/common/base/Arena.h b/src/common/base/Arena.h
@@ -0,0 +1,90 @@
+// Copyright (c) 2022 vesoft inc. All rights reserved.
+//
+// This source code is licensed under Apache 2.0 License.
+
+#pragma once
+
+#include <folly/Likely.h>
+
+#include <boost/core/noncopyable.hpp>
+#include <cstddef>
+#include <limits>
+#include <type_traits>
+
+#include "common/base/Logging.h"
+#include "common/cpp/helpers.h"
+
+namespace nebula {
+
+// MT-unsafe arena allocator
+// It's optimized for many small objects construct/destruct
+class Arena : public boost::noncopyable, cpp::NonMovable {
+ public:
+ ~Arena() {
+ while (LIKELY(currentChunk_ != nullptr)) {
+ auto *prev = currentChunk_->prev;
+ delete[] currentChunk_;
+ currentChunk_ = prev;
+ }
+#ifndef NDEBUG
+ allocatedSize_ = 0;
+#endif
+ availableSize_ = 0;
+ currentPtr_ = nullptr;
+ }
+
+ // The CPU access memory with the alignment,
+ // So construct object from alignment address will reduce the CPU access count then
+ // speed up read/write
+ void *allocateAligned(const std::size_t alloc);
+
+#ifndef NDEBUG
+ std::size_t allocatedSize() const {
+ return allocatedSize_;
+ }
+#endif
+
+ std::size_t availableSize() const {
+ return availableSize_;
+ }
+
+ private:
+ static constexpr std::size_t kMinChunkSize = 4096;
+ static constexpr std::size_t kMaxChunkSize = std::numeric_limits<uint16_t>::max();
+ static constexpr std::size_t kAlignment = std::alignment_of<std::max_align_t>::value;
+
+ struct Chunk {
+ explicit Chunk(Chunk *p) : prev{p} {}
+
+ union {
+ Chunk *prev{nullptr};
+ std::byte aligned[kAlignment];
+ };
+ };
+
+ // allocate new chunk
+ // The current pointer will keep alignment
+ void newChunk(std::size_t size) {
+ DCHECK_NE(size, 0);
+ std::byte *ptr = new std::byte[size + sizeof(Chunk)];
+ currentChunk_ = new (ptr) Chunk(currentChunk_);
+ availableSize_ = size;
+ currentPtr_ = (ptr + sizeof(Chunk));
+ }
+
+ Chunk *currentChunk_{nullptr};
+// These are debug info
+// Remove to speed up in Release build
+#ifndef NDEBUG
+ // total size allocated
+ std::size_t allocatedSize_{0};
+#endif
+ // total size which available to allocate
+ std::size_t availableSize_{0};
+ // The total chunks size
+ // = allocatedSize_ + availableSize_ + Memory Deprecated (Size can't fit allocation)
+ // Current pointer to available memory address
+ std::byte *currentPtr_{nullptr};
+};
+
+} // namespace nebula
diff --git a/src/common/base/CMakeLists.txt b/src/common/base/CMakeLists.txt
@@ -13,6 +13,7 @@ nebula_add_library(
  Status.cpp
  SanitizerOptions.cpp
  SignalHandler.cpp
+ Arena.cpp
  ${gdb_debug_script}
 )
 

diff --git a/src/common/base/ObjectPool.h b/src/common/base/ObjectPool.h
@@ -13,6 +13,7 @@
 #include <list>
 #include <type_traits>
 
+#include "common/base/Arena.h"
 #include "common/base/Logging.h"
 #include "common/cpp/helpers.h"
 
@@ -26,26 +27,19 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
  public:
  ObjectPool() {}
 
- ~ObjectPool() = default;
+ ~ObjectPool() {
+ clear();
+ }
 
  void clear() {
  SLGuard g(lock_);
  objects_.clear();
  }
 
- template <typename T>
- T *add(T *obj) {
- if constexpr (std::is_base_of<Expression, T>::value) {
- VLOG(3) << "New expression added into pool: " << obj->toString();
- }
- SLGuard g(lock_);
- objects_.emplace_back(obj);
- return obj;
- }
-
  template <typename T, typename... Args>
  T *makeAndAdd(Args &&... args) {
- return add(new T(std::forward<Args>(args)...));
+ void *ptr = arena_.allocateAligned(sizeof(T));
+ return add(new (ptr) T(std::forward<Args>(args)...));
  }
 
  bool empty() const {
@@ -58,7 +52,7 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
  public:
  template <typename T>
  explicit OwnershipHolder(T *obj)
- : obj_(obj), deleteFn_([](void *p) { delete reinterpret_cast<T *>(p); }) {}
+ : obj_(obj), deleteFn_([](void *p) { reinterpret_cast<T *>(p)->~T(); }) {}
 
  ~OwnershipHolder() {
  deleteFn_(obj_);
@@ -69,7 +63,18 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
  std::function<void(void *)> deleteFn_;
  };
 
+ template <typename T>
+ T *add(T *obj) {
+ if constexpr (std::is_base_of<Expression, T>::value) {
+ VLOG(3) << "New expression added into pool: " << obj->toString();
+ }
+ SLGuard g(lock_);
+ objects_.emplace_back(obj);
+ return obj;
+ }
+
  std::list<OwnershipHolder> objects_;
+ Arena arena_;
 
  folly::SpinLock lock_;
 };

diff --git a/src/common/base/test/ArenaBenchmark.cpp b/src/common/base/test/ArenaBenchmark.cpp
@@ -0,0 +1,88 @@
+// Copyright (c) 2022 vesoft inc. All rights reserved.
+//
+// This source code is licensed under Apache 2.0 License.
+
+#include <folly/Benchmark.h>
+#include <folly/init/Init.h>
+#include <folly/memory/Arena.h>
+
+#include <string>
+#include <type_traits>
+
+#include "common/base/Arena.h"
+#include "common/expression/LabelExpression.h"
+
+namespace nebula {
+
+class TestExpr : public LabelExpression {
+ public:
+ explicit TestExpr(const std::string &name = "")
+ : LabelExpression(reinterpret_cast<ObjectPool *>(1), name) {}
+};
+
+BENCHMARK(DefaultAllocator, iters) {
+ std::size_t round = iters * 1000;
+ for (std::size_t _ = 0; _ < round; ++_) {
+ auto *expr = new TestExpr("Label");
+ delete expr;
+ }
+}
+
+BENCHMARK_RELATIVE(ArenaAllocator, iters) {
+ std::size_t round = iters * 1000;
+ Arena a;
+ for (std::size_t _ = 0; _ < round; ++_) {
+ auto *ptr = a.allocateAligned(sizeof(TestExpr));
+ auto *expr = new (ptr) TestExpr("Label");
+ expr->~TestExpr();
+ }
+}
+
+BENCHMARK_RELATIVE(FollyArenaAllocator, iters) {
+ std::size_t round = iters * 1000;
+ folly::SysArena a;
+ for (std::size_t _ = 0; _ < round; ++_) {
+ auto *ptr = a.allocate(sizeof(TestExpr));
+ auto *expr = new (ptr) TestExpr("Label");
+ expr->~TestExpr();
+ }
+}
+
+BENCHMARK_DRAW_LINE();
+
+} // namespace nebula
+
+int main(int argc, char **argv) {
+ folly::init(&argc, &argv, true);
+
+ folly::runBenchmarks();
+ return 0;
+}
+
+// CPU info
+// Brand Raw: Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz
+// Hz Advertised Friendly: 3.0000 GHz
+// Hz Actual Friendly: 3.2942 GHz
+// Hz Advertised: (3000000000, 0)
+// Hz Actual: (3294220000, 0)
+// Arch: X86_64
+// Bits: 64
+// Count: 40
+// Arch String Raw: x86_64
+// L1 Data Cache Size: 32768
+// L1 Instruction Cache Size: 32768
+// L2 Cache Size: 262144
+// L2 Cache Line Size: 256
+// L2 Cache Associativity: 6
+// L3 Cache Size: 26214400
+//
+// Build in Release mode
+//
+// ============================================================================
+// /home/shylock.huang/nebula/src/common/base/test/ArenaBenchmark.cpprelative time/iter iters/s
+// ============================================================================
+// DefaultAllocator 36.59us 27.33K
+// ArenaAllocator 145.89% 25.08us 39.87K
+// FollyArenaAllocator 138.96% 26.33us 37.98K
+// ----------------------------------------------------------------------------
+// ============================================================================