Skip to content

Commit

Permalink
blk/kernel: add plugin system for devices with compression and move V…
Browse files Browse the repository at this point in the history
…DO support into plugin

The current VDO support implementation is buried inside the common/blkdev.cc
with a simple interface used by KernelDevice. It is not easily extendable
and can not be easily used for other devices providing similar capabilities.
This patch adds a plugin system that is based in its structure on the
erasure code plugin system and moves the VDO support code into a VDO plugin.

Signed-off-by: Martin Ohmacht <mohmacht@us.ibm.com>
  • Loading branch information
mohmacht committed Sep 28, 2022
1 parent 652bf75 commit a6658c9
Show file tree
Hide file tree
Showing 26 changed files with 868 additions and 221 deletions.
3 changes: 3 additions & 0 deletions ceph.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ BuildRequires: libaio-devel
BuildRequires: libblkid-devel >= 2.17
BuildRequires: cryptsetup-devel
BuildRequires: libcurl-devel
BuildRequires: libcap-devel
BuildRequires: libcap-ng-devel
BuildRequires: fmt-devel >= 6.2.1
BuildRequires: pkgconfig(libudev)
Expand Down Expand Up @@ -1563,6 +1564,8 @@ rm -rf %{_vpath_builddir}
%dir %{_libdir}/ceph
%dir %{_libdir}/ceph/erasure-code
%{_libdir}/ceph/erasure-code/libec_*.so*
%dir %{_libdir}/ceph/extblkdev
%{_libdir}/ceph/extblkdev/libceph_*.so*
%dir %{_libdir}/ceph/compressor
%{_libdir}/ceph/compressor/libceph_*.so*
%{_unitdir}/ceph-crash.service
Expand Down
1 change: 1 addition & 0 deletions debian/ceph-base.install
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ usr/bin/osdmaptool
usr/bin/ceph-kvstore-tool
usr/libexec/ceph/ceph_common.sh
usr/lib/ceph/erasure-code/*
usr/lib/ceph/extblkdev/*
usr/lib/rados-classes/*
usr/sbin/ceph-create-keys
usr/share/doc/ceph/sample.ceph.conf
Expand Down
1 change: 1 addition & 0 deletions debian/control
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Build-Depends: automake,
libcrypto++-dev <pkg.ceph.crimson>,
libcryptsetup-dev,
libcap-ng-dev,
libcap-dev,
libcunit1-dev,
libcurl4-openssl-dev,
libevent-dev,
Expand Down
5 changes: 3 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ add_subdirectory(common)
add_subdirectory(crush)
add_subdirectory(msg)
add_subdirectory(arch)
add_subdirectory(extblkdev)

set(ceph_common_objs
$<TARGET_OBJECTS:common-auth-objs>
Expand All @@ -446,7 +447,7 @@ set(ceph_common_objs
$<TARGET_OBJECTS:common_mountcephfs_objs>
$<TARGET_OBJECTS:crush_objs>)
set(ceph_common_deps
json_spirit erasure_code arch crc32
json_spirit erasure_code extblkdev arch crc32
${LIB_RESOLV}
Boost::thread
Boost::system
Expand Down Expand Up @@ -678,7 +679,7 @@ set(ceph_osd_srcs
ceph_osd.cc)

add_executable(ceph-osd ${ceph_osd_srcs})
add_dependencies(ceph-osd erasure_code_plugins)
add_dependencies(ceph-osd erasure_code_plugins extblkdev_plugins)
target_link_libraries(ceph-osd osd os global-static common
${ALLOC_LIBS}
${BLKID_LIBRARIES})
Expand Down
5 changes: 3 additions & 2 deletions src/blk/BlockDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "acconfig.h"
#include "common/ceph_mutex.h"
#include "include/common_fwd.h"
#include "extblkdev/ExtBlkDevInterface.h"

#if defined(HAVE_LIBAIO) || defined(HAVE_POSIXAIO)
#include "aio/aio.h"
Expand Down Expand Up @@ -237,8 +238,8 @@ class BlockDevice {
uint64_t get_optimal_io_size() const { return optimal_io_size; }

/// hook to provide utilization of thinly-provisioned device
virtual bool get_thin_utilization(uint64_t *total, uint64_t *avail) const {
return false;
virtual int get_ebd_state(ExtBlkDevState &state) const {
return -ENOENT;
}

virtual int collect_metadata(const std::string& prefix, std::map<std::string,std::string> *pm) const = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/blk/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if(libblk_srcs)
endif()

if(HAVE_LIBAIO)
target_link_libraries(blk PUBLIC ${AIO_LIBRARIES})
target_link_libraries(blk PUBLIC ${AIO_LIBRARIES} extblkdev)
endif(HAVE_LIBAIO)

if(WITH_SPDK)
Expand Down
43 changes: 17 additions & 26 deletions src/blk/kernel/KernelDevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,12 @@ int KernelDevice::open(const string& p)
support_discard = blkdev_buffered.support_discard();
optimal_io_size = blkdev_buffered.get_optimal_io_size();
this->devname = devname;
_detect_vdo();
// check if any extended block device plugin recognizes this device
// detect_vdo has moved into the VDO plugin
int rc = extblkdev::detect_device(cct, devname, ebd_impl);
if (rc != 0) {
dout(20) << __func__ << " no plugin volume maps to " << devname << dendl;
}
}
}

Expand Down Expand Up @@ -305,10 +310,7 @@ void KernelDevice::close()
_discard_stop();
_pre_close();

if (vdo_fd >= 0) {
VOID_TEMP_FAILURE_RETRY(::close(vdo_fd));
vdo_fd = -1;
}
extblkdev::release_device(ebd_impl);

for (int i = 0; i < WRITE_LIFE_MAX; i++) {
assert(fd_directs[i] >= 0);
Expand All @@ -335,11 +337,10 @@ int KernelDevice::collect_metadata(const string& prefix, map<string,string> *pm)
} else {
(*pm)[prefix + "type"] = "ssd";
}
if (vdo_fd >= 0) {
(*pm)[prefix + "vdo"] = "true";
uint64_t total, avail;
get_vdo_utilization(vdo_fd, &total, &avail);
(*pm)[prefix + "vdo_physical_size"] = stringify(total);
// if compression device detected, collect meta data for device
// VDO specific meta data has moved into VDO plugin
if (ebd_impl) {
ebd_impl->collect_metadata(prefix, pm);
}

{
Expand Down Expand Up @@ -407,24 +408,14 @@ int KernelDevice::collect_metadata(const string& prefix, map<string,string> *pm)
return 0;
}

void KernelDevice::_detect_vdo()
{
vdo_fd = get_vdo_stats_handle(devname.c_str(), &vdo_name);
if (vdo_fd >= 0) {
dout(1) << __func__ << " VDO volume " << vdo_name
<< " maps to " << devname << dendl;
} else {
dout(20) << __func__ << " no VDO volume maps to " << devname << dendl;
}
return;
}

bool KernelDevice::get_thin_utilization(uint64_t *total, uint64_t *avail) const
int KernelDevice::get_ebd_state(ExtBlkDevState &state) const
{
if (vdo_fd < 0) {
return false;
// use compression driver plugin to determine physical size and availability
// VDO specific get_thin_utilization has moved into VDO plugin
if (ebd_impl) {
return ebd_impl->get_state(state);
}
return get_vdo_utilization(vdo_fd, total, avail);
return -ENOENT;
}

int KernelDevice::choose_fd(bool buffered, int write_hint) const
Expand Down
7 changes: 3 additions & 4 deletions src/blk/kernel/KernelDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#include "aio/aio.h"
#include "BlockDevice.h"
#include "extblkdev/ExtBlkDevPlugin.h"

#define RW_IO_MAX (INT_MAX & CEPH_PAGE_MASK)

Expand All @@ -35,8 +36,7 @@ class KernelDevice : public BlockDevice {
bool enable_wrt = true;
bool aio, dio;

int vdo_fd = -1; ///< fd for vdo sysfs directory
std::string vdo_name;
ExtBlkDevInterfaceRef ebd_impl; // structure for retrieving compression state from extended block device

std::string devname; ///< kernel dev name (/sys/block/$devname), if any

Expand Down Expand Up @@ -109,7 +109,6 @@ class KernelDevice : public BlockDevice {
void debug_aio_link(aio_t& aio);
void debug_aio_unlink(aio_t& aio);

void _detect_vdo();
int choose_fd(bool buffered, int write_hint) const;

ceph::unique_leakable_ptr<buffer::raw> create_custom_aligned(size_t len, IOContext* ioc) const;
Expand All @@ -130,7 +129,7 @@ class KernelDevice : public BlockDevice {
}
int get_devices(std::set<std::string> *ls) const override;

bool get_thin_utilization(uint64_t *total, uint64_t *avail) const override;
int get_ebd_state(ExtBlkDevState &state) const override;

int read(uint64_t off, uint64_t len, ceph::buffer::list *pbl,
IOContext *ioc,
Expand Down
9 changes: 9 additions & 0 deletions src/ceph_osd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "mon/MonClient.h"
#include "include/ceph_features.h"
#include "common/config.h"
#include "extblkdev/ExtBlkDevPlugin.h"

#include "mon/MonMap.h"

Expand Down Expand Up @@ -472,6 +473,14 @@ int main(int argc, const char **argv)
forker.exit(0);
}

{
int r = extblkdev::preload(g_ceph_context);
if (r < 0) {
derr << "Failed preloading extblkdev plugins, error code: " << r << dendl;
forker.exit(1);
}
}

string magic;
uuid_d cluster_fsid, osd_fsid;
ceph_release_t require_osd_release = ceph_release_t::unknown;
Expand Down
134 changes: 0 additions & 134 deletions src/common/blkdev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -338,95 +338,6 @@ void get_raw_devices(const std::string& in,
}
}

int _get_vdo_stats_handle(const char *devname, std::string *vdo_name)
{
int vdo_fd = -1;

// we need to go from the raw devname (e.g., dm-4) to the VDO volume name.
// currently the best way seems to be to look at /dev/mapper/* ...
std::string expect = std::string("../") + devname; // expected symlink target
DIR *dir = ::opendir("/dev/mapper");
if (!dir) {
return -1;
}
struct dirent *de = nullptr;
while ((de = ::readdir(dir))) {
if (de->d_name[0] == '.')
continue;
char fn[4096], target[4096];
snprintf(fn, sizeof(fn), "/dev/mapper/%s", de->d_name);
int r = readlink(fn, target, sizeof(target));
if (r < 0 || r >= (int)sizeof(target))
continue;
target[r] = 0;
if (expect == target) {
snprintf(fn, sizeof(fn), "/sys/kvdo/%s/statistics", de->d_name);
vdo_fd = ::open(fn, O_RDONLY|O_CLOEXEC); //DIRECTORY);
if (vdo_fd >= 0) {
*vdo_name = de->d_name;
break;
}
}
}
closedir(dir);
return vdo_fd;
}

int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
{
std::set<std::string> devs = { devname };
while (!devs.empty()) {
std::string dev = *devs.begin();
devs.erase(devs.begin());
int fd = _get_vdo_stats_handle(dev.c_str(), vdo_name);
if (fd >= 0) {
// yay, it's vdo
return fd;
}
// ok, see if there are constituent devices
if (dev.find("dm-") == 0) {
get_dm_parents(dev, &devs);
}
}
return -1;
}

int64_t get_vdo_stat(int vdo_fd, const char *property)
{
int64_t ret = 0;
int fd = ::openat(vdo_fd, property, O_RDONLY|O_CLOEXEC);
if (fd < 0) {
return 0;
}
char buf[1024];
int r = ::read(fd, buf, sizeof(buf) - 1);
if (r > 0) {
buf[r] = 0;
ret = atoll(buf);
}
TEMP_FAILURE_RETRY(::close(fd));
return ret;
}

bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
{
int64_t block_size = get_vdo_stat(fd, "block_size");
int64_t physical_blocks = get_vdo_stat(fd, "physical_blocks");
int64_t overhead_blocks_used = get_vdo_stat(fd, "overhead_blocks_used");
int64_t data_blocks_used = get_vdo_stat(fd, "data_blocks_used");
if (!block_size
|| !physical_blocks
|| !overhead_blocks_used
|| !data_blocks_used) {
return false;
}
int64_t avail_blocks =
physical_blocks - overhead_blocks_used - data_blocks_used;
*total = block_size * physical_blocks;
*avail = block_size * avail_blocks;
return true;
}

std::string _decode_model_enc(const std::string& in)
{
auto v = boost::replace_all_copy(in, "\\x20", " ");
Expand Down Expand Up @@ -908,21 +819,6 @@ void get_raw_devices(const std::string& in,
{
}

int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
{
return -1;
}

int64_t get_vdo_stat(int fd, const char *property)
{
return 0;
}

bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
{
return false;
}

std::string get_device_id(const std::string& devname,
std::string *err)
{
Expand Down Expand Up @@ -1083,21 +979,6 @@ void get_raw_devices(const std::string& in,
{
}

int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
{
return -1;
}

int64_t get_vdo_stat(int fd, const char *property)
{
return 0;
}

bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
{
return false;
}

std::string get_device_id(const std::string& devname,
std::string *err)
{
Expand Down Expand Up @@ -1237,21 +1118,6 @@ void get_raw_devices(const std::string& in,
{
}

int get_vdo_stats_handle(const char *devname, std::string *vdo_name)
{
return -1;
}

int64_t get_vdo_stat(int fd, const char *property)
{
return 0;
}

bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail)
{
return false;
}

std::string get_device_id(const std::string& devname,
std::string *err)
{
Expand Down
6 changes: 0 additions & 6 deletions src/common/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,6 @@ extern int block_device_get_metrics(const std::string& devname, int timeout,
extern void get_raw_devices(const std::string& in,
std::set<std::string> *ls);

// for VDO
/// return an op fd for the sysfs stats dir, if this is a VDO device
extern int get_vdo_stats_handle(const char *devname, std::string *vdo_name);
extern int64_t get_vdo_stat(int fd, const char *property);
extern bool get_vdo_utilization(int fd, uint64_t *total, uint64_t *avail);

class BlkDev {
public:
BlkDev(int fd);
Expand Down
Loading

0 comments on commit a6658c9

Please sign in to comment.