Skip to content

Commit

Permalink
xfs: create a big array data structure
Browse files Browse the repository at this point in the history
Create a simple 'big array' data structure for storage of fixed-size
metadata records that will be used to reconstruct a btree index.  For
repair operations, the most important operations are append, iterate,
and sort.

Earlier implementations of the big array used linked lists and suffered
from severe problems -- pinning all records in kernel memory was not a
good idea and frequently lead to OOM situations; random access was very
inefficient; and record overhead for the lists was unacceptably high at
40-60%.

Therefore, the big memory array relies on the 'xfile' abstraction, which
creates a memfd file and stores the records in page cache pages.  Since
the memfd is created in tmpfs, the memory pages can be pushed out to
disk if necessary and we have a built-in usage limit of 50% of physical
memory.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
  • Loading branch information
Darrick J. Wong committed Aug 10, 2023
1 parent 014ad53 commit 3934e8e
Show file tree
Hide file tree
Showing 8 changed files with 922 additions and 1 deletion.
1 change: 1 addition & 0 deletions fs/xfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ config XFS_ONLINE_SCRUB
bool "XFS online metadata check support"
default n
depends on XFS_FS
depends on TMPFS && SHMEM
select XFS_DRAIN_INTENTS
help
If you say Y here you will be able to check metadata on a
Expand Down
2 changes: 2 additions & 0 deletions fs/xfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ xfs-y += $(addprefix scrub/, \
rmap.o \
scrub.o \
symlink.o \
xfarray.o \
xfile.o \
)

xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o
Expand Down
4 changes: 3 additions & 1 deletion fs/xfs/scrub/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "scrub/scrub.h"
#include "xfs_ag.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"

/* Figure out which block the btree cursor was pointing to. */
static inline xfs_fsblock_t
Expand Down
121 changes: 121 additions & 0 deletions fs/xfs/scrub/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#include <linux/tracepoint.h>
#include "xfs_bit.h"

struct xfile;
struct xfarray;

/*
* ftrace's __print_symbolic requires that all enum values be wrapped in the
* TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
Expand Down Expand Up @@ -725,6 +728,124 @@ TRACE_EVENT(xchk_refcount_incorrect,
__entry->seen)
)

TRACE_EVENT(xfile_create,
TP_PROTO(struct xfile *xf),
TP_ARGS(xf),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
__array(char, pathname, 256)
),
TP_fast_assign(
char pathname[257];
char *path;

__entry->ino = file_inode(xf->file)->i_ino;
memset(pathname, 0, sizeof(pathname));
path = file_path(xf->file, pathname, sizeof(pathname) - 1);
if (IS_ERR(path))
path = "(unknown)";
strncpy(__entry->pathname, path, sizeof(__entry->pathname));
),
TP_printk("xfino 0x%lx path '%s'",
__entry->ino,
__entry->pathname)
);

TRACE_EVENT(xfile_destroy,
TP_PROTO(struct xfile *xf),
TP_ARGS(xf),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long long, bytes)
__field(loff_t, size)
),
TP_fast_assign(
struct xfile_stat statbuf;
int ret;

ret = xfile_stat(xf, &statbuf);
if (!ret) {
__entry->bytes = statbuf.bytes;
__entry->size = statbuf.size;
} else {
__entry->bytes = -1;
__entry->size = -1;
}
__entry->ino = file_inode(xf->file)->i_ino;
),
TP_printk("xfino 0x%lx mem_bytes 0x%llx isize 0x%llx",
__entry->ino,
__entry->bytes,
__entry->size)
);

DECLARE_EVENT_CLASS(xfile_class,
TP_PROTO(struct xfile *xf, loff_t pos, unsigned long long bytecount),
TP_ARGS(xf, pos, bytecount),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long long, bytes_used)
__field(loff_t, pos)
__field(loff_t, size)
__field(unsigned long long, bytecount)
),
TP_fast_assign(
struct xfile_stat statbuf;
int ret;

ret = xfile_stat(xf, &statbuf);
if (!ret) {
__entry->bytes_used = statbuf.bytes;
__entry->size = statbuf.size;
} else {
__entry->bytes_used = -1;
__entry->size = -1;
}
__entry->ino = file_inode(xf->file)->i_ino;
__entry->pos = pos;
__entry->bytecount = bytecount;
),
TP_printk("xfino 0x%lx mem_bytes 0x%llx pos 0x%llx bytecount 0x%llx isize 0x%llx",
__entry->ino,
__entry->bytes_used,
__entry->pos,
__entry->bytecount,
__entry->size)
);
#define DEFINE_XFILE_EVENT(name) \
DEFINE_EVENT(xfile_class, name, \
TP_PROTO(struct xfile *xf, loff_t pos, unsigned long long bytecount), \
TP_ARGS(xf, pos, bytecount))
DEFINE_XFILE_EVENT(xfile_pread);
DEFINE_XFILE_EVENT(xfile_pwrite);
DEFINE_XFILE_EVENT(xfile_seek_data);

TRACE_EVENT(xfarray_create,
TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity),
TP_ARGS(xfa, required_capacity),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(uint64_t, max_nr)
__field(size_t, obj_size)
__field(int, obj_size_log)
__field(unsigned long long, required_capacity)
),
TP_fast_assign(
__entry->max_nr = xfa->max_nr;
__entry->obj_size = xfa->obj_size;
__entry->obj_size_log = xfa->obj_size_log;
__entry->ino = file_inode(xfa->xfile->file)->i_ino;
__entry->required_capacity = required_capacity;
),
TP_printk("xfino 0x%lx max_nr %llu reqd_nr %llu objsz %zu objszlog %d",
__entry->ino,
__entry->max_nr,
__entry->required_capacity,
__entry->obj_size,
__entry->obj_size_log)
);

/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)

Expand Down
Loading

0 comments on commit 3934e8e

Please sign in to comment.