Skip to content

Commit

Permalink
Merge branch 'master' into optimize/move-join-probe-row
Browse files Browse the repository at this point in the history
  • Loading branch information
Sophie-Xie authored Jun 13, 2022
2 parents 4e1a718 + 2b35538 commit bda2797
Show file tree
Hide file tree
Showing 139 changed files with 5,683 additions and 1,029 deletions.
4 changes: 2 additions & 2 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#Require an approved review in PRs including files with a designated code owner.
/conf/ @vesoft-inc/tech-committee-reviewers
/src/kvstore/raftex/ @critical27 @sherman-the-tank
/cmake/ @sherman-the-tank @yixinglu @dutor
/src/kvstore/raftex/ @critical27
/cmake/ @vesoft-inc/tech-committee-reviewers
*.thrift @vesoft-inc/tech-committee-reviewers
*.yy @CPWstatic @dutor
29 changes: 29 additions & 0 deletions .github/workflows/auto_cherry_pick.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Auto Cherry Pick
on:
workflow_dispatch:
inputs:
pr_label:
description: "Cherry pick label such as 'cherry-pick-v3.2'"
required: true

defaults:
run:
shell: bash

jobs:
auto-cherry-pick:
#if: ${{ startsWith(github.event.pull_request.labels.*.name, 'cherry-pick-') && github.event.pull_request.merged == true }}
runs-on: ubuntu-latest
container:
image: reg.vesoft-inc.com/dashboard/dashboard-dev:centos7
steps:
- name: keep workspace empty
run: |
rm -rf *
- name: auto cherry pick
uses: xigongdaEricyang/cherry-pick-robot@with-python
with:
repo_token: ${{ secrets.GH_BOT_PAT }}
pr_label: ${{ github.event.inputs.prlabel }}
#pr_num: ${{ github.event.pull_request.number }}
auto_merge: true
8 changes: 8 additions & 0 deletions conf/tuned/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Summary

These are tuned profile to configure the system to optimize for the Nebula Graph service.

Follow below steps to utilize:
* Install the tuned service if absent, and enable it with `systemctl`.
* Copy the __nebula__ directory into `/etc/tuned`.
* Execute `tuned-adm profile nebula` to activate the profile.
32 changes: 32 additions & 0 deletions conf/tuned/nebula/tuned.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[main]
summary=Optimize for Nebula Graph DBMS
include=latency-performance


[vm]
transparent_hugepages=never


[sysctl]
kernel.core_pattern=core
kernel.core_uses_pid=1
kernel.numa_balancing=0

vm.swappiness=0
vm.oom_dump_tasks=1
# min_free_kbytes is suggested to set to approximately 2% of the total memory.
# 1GB at least and 5GB at most.
vm.min_free_kbytes=5242880
vm.max_map_count=131060
vm.dirty_background_ratio = 3
vm.dirty_ratio = 20
vm.dirty_expire_centisecs = 500
vm.dirty_writeback_centisecs = 100

net.core.busy_read=50
net.core.busy_poll=50
net.core.somaxconn=4096
net.ipv4.tcp_max_syn_backlog=4096
net.core.netdev_max_backlog=10240
net.ipv4.tcp_fastopen=3
net.ipv4.tcp_slow_start_after_idle=0
44 changes: 44 additions & 0 deletions src/common/base/Arena.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) 2022 vesoft inc. All rights reserved.
//
// This source code is licensed under Apache 2.0 License.

#include "common/base/Arena.h"

#include <cstdint>

namespace nebula {

void* Arena::allocateAligned(const std::size_t alloc) {
DCHECK_NE(alloc, 0); // don't allow zero sized allocation
// replace the modulo operation by bit and
static_assert(kAlignment && !(kAlignment & (kAlignment - 1)), "Align must be power of 2.");
const std::size_t pad =
kAlignment - (reinterpret_cast<uintptr_t>(currentPtr_) & (kAlignment - 1));
const std::size_t consumption = alloc + pad;
if (UNLIKELY(consumption > kMaxChunkSize)) {
DLOG(FATAL) << "Arena can't allocate so large memory.";
return nullptr;
}
if (LIKELY(consumption <= availableSize_)) {
void* ptr = currentPtr_ + pad;
currentPtr_ += consumption;
#ifndef NDEBUG
allocatedSize_ += consumption;
#endif
availableSize_ -= consumption;
return ptr;
} else {
newChunk(std::max(alloc, kMinChunkSize));
// The new operator will allocate the aligned memory
DCHECK_EQ(reinterpret_cast<uintptr_t>(currentPtr_) & (kAlignment - 1), 0);
void* ptr = currentPtr_;
currentPtr_ += alloc;
#ifndef NDEBUG
allocatedSize_ += alloc;
#endif
availableSize_ -= alloc;
return ptr;
}
}

} // namespace nebula
90 changes: 90 additions & 0 deletions src/common/base/Arena.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (c) 2022 vesoft inc. All rights reserved.
//
// This source code is licensed under Apache 2.0 License.

#pragma once

#include <folly/Likely.h>

#include <boost/core/noncopyable.hpp>
#include <cstddef>
#include <limits>
#include <type_traits>

#include "common/base/Logging.h"
#include "common/cpp/helpers.h"

namespace nebula {

// MT-unsafe arena allocator
// It's optimized for many small objects construct/destruct
class Arena : public boost::noncopyable, cpp::NonMovable {
public:
~Arena() {
while (LIKELY(currentChunk_ != nullptr)) {
auto *prev = currentChunk_->prev;
delete[] currentChunk_;
currentChunk_ = prev;
}
#ifndef NDEBUG
allocatedSize_ = 0;
#endif
availableSize_ = 0;
currentPtr_ = nullptr;
}

// The CPU access memory with the alignment,
// So construct object from alignment address will reduce the CPU access count then
// speed up read/write
void *allocateAligned(const std::size_t alloc);

#ifndef NDEBUG
std::size_t allocatedSize() const {
return allocatedSize_;
}
#endif

std::size_t availableSize() const {
return availableSize_;
}

private:
static constexpr std::size_t kMinChunkSize = 4096;
static constexpr std::size_t kMaxChunkSize = std::numeric_limits<uint16_t>::max();
static constexpr std::size_t kAlignment = std::alignment_of<std::max_align_t>::value;

struct Chunk {
explicit Chunk(Chunk *p) : prev{p} {}

union {
Chunk *prev{nullptr};
std::byte aligned[kAlignment];
};
};

// allocate new chunk
// The current pointer will keep alignment
void newChunk(std::size_t size) {
DCHECK_NE(size, 0);
std::byte *ptr = new std::byte[size + sizeof(Chunk)];
currentChunk_ = new (ptr) Chunk(currentChunk_);
availableSize_ = size;
currentPtr_ = (ptr + sizeof(Chunk));
}

Chunk *currentChunk_{nullptr};
// These are debug info
// Remove to speed up in Release build
#ifndef NDEBUG
// total size allocated
std::size_t allocatedSize_{0};
#endif
// total size which available to allocate
std::size_t availableSize_{0};
// The total chunks size
// = allocatedSize_ + availableSize_ + Memory Deprecated (Size can't fit allocation)
// Current pointer to available memory address
std::byte *currentPtr_{nullptr};
};

} // namespace nebula
1 change: 1 addition & 0 deletions src/common/base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nebula_add_library(
Status.cpp
SanitizerOptions.cpp
SignalHandler.cpp
Arena.cpp
${gdb_debug_script}
)

Expand Down
31 changes: 18 additions & 13 deletions src/common/base/ObjectPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <list>
#include <type_traits>

#include "common/base/Arena.h"
#include "common/base/Logging.h"
#include "common/cpp/helpers.h"

Expand All @@ -26,26 +27,19 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
public:
ObjectPool() {}

~ObjectPool() = default;
~ObjectPool() {
clear();
}

void clear() {
SLGuard g(lock_);
objects_.clear();
}

template <typename T>
T *add(T *obj) {
if constexpr (std::is_base_of<Expression, T>::value) {
VLOG(3) << "New expression added into pool: " << obj->toString();
}
SLGuard g(lock_);
objects_.emplace_back(obj);
return obj;
}

template <typename T, typename... Args>
T *makeAndAdd(Args &&... args) {
return add(new T(std::forward<Args>(args)...));
void *ptr = arena_.allocateAligned(sizeof(T));
return add(new (ptr) T(std::forward<Args>(args)...));
}

bool empty() const {
Expand All @@ -58,7 +52,7 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
public:
template <typename T>
explicit OwnershipHolder(T *obj)
: obj_(obj), deleteFn_([](void *p) { delete reinterpret_cast<T *>(p); }) {}
: obj_(obj), deleteFn_([](void *p) { reinterpret_cast<T *>(p)->~T(); }) {}

~OwnershipHolder() {
deleteFn_(obj_);
Expand All @@ -69,7 +63,18 @@ class ObjectPool final : private boost::noncopyable, private cpp::NonMovable {
std::function<void(void *)> deleteFn_;
};

template <typename T>
T *add(T *obj) {
if constexpr (std::is_base_of<Expression, T>::value) {
VLOG(3) << "New expression added into pool: " << obj->toString();
}
SLGuard g(lock_);
objects_.emplace_back(obj);
return obj;
}

std::list<OwnershipHolder> objects_;
Arena arena_;

folly::SpinLock lock_;
};
Expand Down
88 changes: 88 additions & 0 deletions src/common/base/test/ArenaBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (c) 2022 vesoft inc. All rights reserved.
//
// This source code is licensed under Apache 2.0 License.

#include <folly/Benchmark.h>
#include <folly/init/Init.h>
#include <folly/memory/Arena.h>

#include <string>
#include <type_traits>

#include "common/base/Arena.h"
#include "common/expression/LabelExpression.h"

namespace nebula {

class TestExpr : public LabelExpression {
public:
explicit TestExpr(const std::string &name = "")
: LabelExpression(reinterpret_cast<ObjectPool *>(1), name) {}
};

BENCHMARK(DefaultAllocator, iters) {
std::size_t round = iters * 1000;
for (std::size_t _ = 0; _ < round; ++_) {
auto *expr = new TestExpr("Label");
delete expr;
}
}

BENCHMARK_RELATIVE(ArenaAllocator, iters) {
std::size_t round = iters * 1000;
Arena a;
for (std::size_t _ = 0; _ < round; ++_) {
auto *ptr = a.allocateAligned(sizeof(TestExpr));
auto *expr = new (ptr) TestExpr("Label");
expr->~TestExpr();
}
}

BENCHMARK_RELATIVE(FollyArenaAllocator, iters) {
std::size_t round = iters * 1000;
folly::SysArena a;
for (std::size_t _ = 0; _ < round; ++_) {
auto *ptr = a.allocate(sizeof(TestExpr));
auto *expr = new (ptr) TestExpr("Label");
expr->~TestExpr();
}
}

BENCHMARK_DRAW_LINE();

} // namespace nebula

int main(int argc, char **argv) {
folly::init(&argc, &argv, true);

folly::runBenchmarks();
return 0;
}

// CPU info
// Brand Raw: Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz
// Hz Advertised Friendly: 3.0000 GHz
// Hz Actual Friendly: 3.2942 GHz
// Hz Advertised: (3000000000, 0)
// Hz Actual: (3294220000, 0)
// Arch: X86_64
// Bits: 64
// Count: 40
// Arch String Raw: x86_64
// L1 Data Cache Size: 32768
// L1 Instruction Cache Size: 32768
// L2 Cache Size: 262144
// L2 Cache Line Size: 256
// L2 Cache Associativity: 6
// L3 Cache Size: 26214400
//
// Build in Release mode
//
// ============================================================================
// /home/shylock.huang/nebula/src/common/base/test/ArenaBenchmark.cpprelative time/iter iters/s
// ============================================================================
// DefaultAllocator 36.59us 27.33K
// ArenaAllocator 145.89% 25.08us 39.87K
// FollyArenaAllocator 138.96% 26.33us 37.98K
// ----------------------------------------------------------------------------
// ============================================================================
Loading

0 comments on commit bda2797

Please sign in to comment.