Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial implementation of file-backed durable cuckoo storage engine #217

Merged
merged 2 commits into from
Mar 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ option(TARGET_RESPCLI "build resp-cli binary" ON)
option(HAVE_RUST "build features written in rust" OFF)
option(RUST_USE_MUSL "build rust deps against musl" OFF)
option(BUILD_AND_INSTALL_CHECK "build our own version of check and link against it" OFF)
option(USE_PMEM "build persistent memory features" OFF)

option(COVERAGE "code coverage" OFF)

Expand Down Expand Up @@ -126,7 +127,10 @@ add_subdirectory(${CCOMMON_SOURCE_DIR} ${PROJECT_BINARY_DIR}/ccommon)
include(FindPackageHandleStandardArgs)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake")

find_package(PkgConfig QUIET)

find_package(Check)

if(NOT CHECK_FOUND)
message(WARNING "Check is required to build and run tests")
endif(NOT CHECK_FOUND)
Expand All @@ -137,6 +141,15 @@ if(CHECK_FOUND)
endif(NOT CHECK_WORKING)
endif(CHECK_FOUND)

if (USE_PMEM)
if(PKG_CONFIG_FOUND)
pkg_check_modules(LIBPMEM REQUIRED libpmem>=1.0)
else()
find_package(LIBPMEM REQUIRED 1.0)
endif()
link_directories(${LIBPMEM_LIBRARY_DIRS})
endif(USE_PMEM)

find_package(Threads)

if(TARGET_CDB)
Expand Down
4 changes: 4 additions & 0 deletions deps/ccommon/include/cc_mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ extern "C" {
#define cc_munmap(_p, _s) \
_cc_munmap(_p, (size_t)(_s), __FILE__, __LINE__)

#define cc_alloc_usable_size(_p) \
_cc_alloc_usable_size(_p, __FILE__, __LINE__)

void * _cc_alloc(size_t size, const char *name, int line);
void * _cc_zalloc(size_t size, const char *name, int line);
void * _cc_calloc(size_t nmemb, size_t size, const char *name, int line);
Expand All @@ -74,6 +77,7 @@ void * _cc_realloc_move(void *ptr, size_t size, const char *name, int line);
void _cc_free(void *ptr, const char *name, int line);
void * _cc_mmap(size_t size, const char *name, int line);
int _cc_munmap(void *p, size_t size, const char *name, int line);
size_t _cc_alloc_usable_size(void *ptr, const char *name, int line);

#ifdef __cplusplus
}
Expand Down
11 changes: 11 additions & 0 deletions deps/ccommon/src/cc_mm.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
/* TODO(yao): detect OS in one place and use one variable everywhere */
#if defined(__APPLE__) && defined(__MACH__)
# define MAP_ANONYMOUS MAP_ANON
#include <malloc/malloc.h>
#define malloc_usable_size malloc_size
#else
#include <malloc.h>
#endif

void *
Expand Down Expand Up @@ -167,3 +171,10 @@ _cc_munmap(void *p, size_t size, const char *name, int line)

return status;
}

size_t
_cc_alloc_usable_size(void *ptr, const char *name, int line)
{
log_vverb("malloc_usable_size(%p) @ %s:%d", ptr, name, line);
return malloc_usable_size(ptr);
}
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_subdirectory(protocol ${PROJECT_BINARY_DIR}/protocol)
add_subdirectory(storage ${PROJECT_BINARY_DIR}/storage)
add_subdirectory(time ${PROJECT_BINARY_DIR}/time)
add_subdirectory(util ${PROJECT_BINARY_DIR}/util)
add_subdirectory(datapool ${PROJECT_BINARY_DIR}/datapool)

# executables
add_custom_target(service)
Expand Down
9 changes: 9 additions & 0 deletions src/datapool/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
add_library(datapool datapool.h)

if(USE_PMEM)
target_sources(datapool PRIVATE datapool_pmem.c)
target_include_directories(datapool PRIVATE ${LIBPMEM_INCLUDE_DIRS})
target_link_libraries(datapool ${LIBPMEM_LIBRARIES})
else()
target_sources(datapool PRIVATE datapool_shm.c)
endif(USE_PMEM)
12 changes: 12 additions & 0 deletions src/datapool/datapool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <stddef.h>

struct datapool;

struct datapool *datapool_open(const char *path, size_t size,
int *fresh);
void datapool_close(struct datapool *pool);

void *datapool_addr(struct datapool *pool);
size_t datapool_size(struct datapool *pool);
227 changes: 227 additions & 0 deletions src/datapool/datapool_pmem.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
/*
* File-backed datapool.
* Retains its contents if the pool has been closed correctly.
*
*/
#include "datapool.h"

#include <cc_mm.h>
#include <cc_debug.h>

#include <inttypes.h>
#include <libpmem.h>
#include <errno.h>

#define DATAPOOL_SIGNATURE ("PELIKAN") /* 8 bytes */
#define DATAPOOL_SIGNATURE_LEN (sizeof(DATAPOOL_SIGNATURE))

/*
* Size of the data pool header.
* Big enough to fit all necessary metadata, but most of this size is left
* unused for future expansion.
*/
#define DATAPOOL_HEADER_LEN 4096
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the header designed to be precisely 1 page? if so, what if the platform has different sized pages? would appreciate if this was documented as well as the reason behind that.

Copy link
Contributor Author

@pbalcer pbalcer Mar 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you expect to run Pelikan on architectures that do not support 4k pages? :)

But no, there no correctness (or otherwise) reasons for this number. It's just big enough for any future expansion.

Also, msync() doesn't have to internally align the size if we pass it a round number.

Added a comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it. we don't plan to use such architectures afaik (though we have evaluated the effect of hugepage before), but this is an open source project so ideally we would like to support a multitude of use cases if we can. 4 KB seems quite large to me but considering the amount of memory we are dealing with using pmem, this seems fine to me.

#define DATAPOOL_VERSION 1

#define DATAPOOL_FLAG_DIRTY (1 << 0)
#define DATAPOOL_VALID_FLAGS (DATAPOOL_FLAG_DIRTY)

/*
* Header at the beginning of the file, it's verified every time the pool is
* opened.
*/
struct datapool_header {
uint8_t signature[DATAPOOL_SIGNATURE_LEN];
uint64_t version;
uint64_t size;
uint64_t flags;
uint8_t unused[DATAPOOL_HEADER_LEN - 32];
};

struct datapool {
void *addr;

struct datapool_header *hdr;
void *user_addr;
size_t mapped_len;
int is_pmem;
int file_backed;
};

static void
datapool_sync_hdr(struct datapool *pool)
{
int ret = pmem_msync(pool->hdr, DATAPOOL_HEADER_LEN);
ASSERT(ret == 0);
}

static void
datapool_sync(struct datapool *pool)
{
int ret = pmem_msync(pool->addr, pool->mapped_len);
ASSERT(ret == 0);
}

static bool
datapool_valid(struct datapool *pool)
{
if (memcmp(pool->hdr->signature,
DATAPOOL_SIGNATURE, DATAPOOL_SIGNATURE_LEN) != 0) {
log_info("no signature found in datapool");
return false;
}

if (pool->hdr->version != DATAPOOL_VERSION) {
log_info("incompatible datapool version (is: %d, expecting: %d)",
pool->hdr->version, DATAPOOL_SIGNATURE);
return false;
}

if (pool->hdr->size == 0) {
log_error("datapool has 0 size");
return false;
}

if (pool->hdr->size > pool->mapped_len) {
log_error("datapool has invalid size (is: %d, expecting: %d)",
pool->mapped_len, pool->hdr->size);
return false;
}

if (pool->hdr->flags & ~DATAPOOL_VALID_FLAGS) {
log_error("datapool has invalid flags set");
return false;
}

if (pool->hdr->flags & DATAPOOL_FLAG_DIRTY) {
log_info("datapool has a valid header but is dirty");
return false;
}

return true;
}

static void
datapool_initialize(struct datapool *pool)
{
log_info("initializing fresh datapool");

/* 1. clear the header from any leftovers */
memset(pool->hdr, 0, DATAPOOL_HEADER_LEN);
datapool_sync_hdr(pool);

/* 2. fill in the data */
pool->hdr->version = DATAPOOL_VERSION;
pool->hdr->size = pool->mapped_len;
pool->hdr->flags = 0;
datapool_sync_hdr(pool);

/* 3. set the signature */
memcpy(pool->hdr->signature, DATAPOOL_SIGNATURE, DATAPOOL_SIGNATURE_LEN);
datapool_sync_hdr(pool);
}

static void
datapool_flag_set(struct datapool *pool, int flag)
{
pool->hdr->flags |= flag;
datapool_sync_hdr(pool);
}

static void
datapool_flag_clear(struct datapool *pool, int flag)
{
pool->hdr->flags &= ~flag;
datapool_sync_hdr(pool);
}

/*
* Opens, and if necessary initializes, a datapool that resides in the given
* file. If no file is provided, the pool is allocated through cc_zalloc.
*
* The the datapool to retain its contents, the datapool_close() call must
* finish successfully.
*/
struct datapool *
datapool_open(const char *path, size_t size, int *fresh)
{
struct datapool *pool = cc_alloc(sizeof(*pool));
if (pool == NULL) {
log_error("unable to create allocate memory for pmem mapping");
goto err_alloc;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wonder if using goto really makes sense here; each goto label only has one goto sending it there

Copy link
Contributor Author

@pbalcer pbalcer Mar 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a common error handling idiom when cleanup is needed before exiting the function. There should be only one goto per label because this allows for stacking the cleanup functions. You could say that this is poor's man RAII.

I'm fine with changing this if this isn't inline with your coding style.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool, i'm fine with leaving this as is. it isn't explicitly out of line with our style guide, and i don't think it really has a negative impact on readability; i just haven't seen this idiom before.

}

size_t map_size = size + sizeof(struct datapool_header);

if (path == NULL) { /* fallback to DRAM if pmem is not configured */
pool->addr = cc_zalloc(map_size);
pool->mapped_len = map_size;
pool->is_pmem = 0;
pool->file_backed = 0;
} else {
pool->addr = pmem_map_file(path, map_size, PMEM_FILE_CREATE, 0600,
&pool->mapped_len, &pool->is_pmem);
pool->file_backed = 1;
}

if (pool->addr == NULL) {
log_error(path == NULL ? strerror(errno) : pmem_errormsg());
goto err_map;
}

log_info("mapped datapool %s with size %llu, is_pmem: %d",
path, pool->mapped_len, pool->is_pmem);

pool->hdr = pool->addr;
pool->user_addr = (uint8_t *)pool->addr + sizeof(struct datapool_header);

if (fresh) {
*fresh = 0;
}

if (!datapool_valid(pool)) {
if (fresh) {
*fresh = 1;
}

datapool_initialize(pool);
}

datapool_flag_set(pool, DATAPOOL_FLAG_DIRTY);

return pool;

err_map:
cc_free(pool);
err_alloc:
return NULL;
}

void
datapool_close(struct datapool *pool)
{
datapool_sync(pool);
datapool_flag_clear(pool, DATAPOOL_FLAG_DIRTY);

if (pool->file_backed) {
int ret = pmem_unmap(pool->addr, pool->mapped_len);
ASSERT(ret == 0);
} else {
cc_free(pool->addr);
}

cc_free(pool);
}

void *
datapool_addr(struct datapool *pool)
{
return pool->user_addr;
}

size_t
datapool_size(struct datapool *pool)
{
return pool->mapped_len - sizeof(struct datapool_header);
}

43 changes: 43 additions & 0 deletions src/datapool/datapool_shm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Anonymous shared memory backed datapool.
* Loses all its contents after closing.
*/
#include "datapool.h"

#include <cc_debug.h>
#include <cc_mm.h>

struct datapool *
datapool_open(const char *path, size_t size, int *fresh)
{
if (path != NULL) {
log_warn("attempted to open a file-based data pool without"
"pmem features enabled");
return NULL;
}

if (fresh) {
*fresh = 1;
}

return cc_zalloc(size);
}

void
datapool_close(struct datapool *pool)
{
cc_free(pool);
}

void *
datapool_addr(struct datapool *pool)
{
return pool;
}

size_t
datapool_size(struct datapool *pool)
{
return cc_alloc_usable_size(pool);
}

1 change: 1 addition & 0 deletions src/storage/cuckoo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
add_library(cuckoo cuckoo.c)
target_link_libraries(cuckoo datapool)
Loading