Skip to content

Commit

Permalink
xfs: enable sorting of xfile-backed arrays
Browse files Browse the repository at this point in the history
The btree bulk loading code requires that records be provided in the
correct record sort order for the given btree type.  In general, repair
code cannot be required to collect records in order, and it is not
feasible to insert new records in the middle of an array to maintain
sort order.

Implement a sorting algorithm so that we can sort the records just prior
to bulk loading.  In principle, an xfarray could consume many gigabytes
of memory and its backing pages can be sent out to disk at any time.
This means that we cannot map the entire array into memory at once, so
we must find a way to divide the work into smaller portions (e.g. a
page) that /can/ be mapped into memory.

Quicksort seems like a reasonable fit for this purpose, since it uses a
divide and conquer strategy to keep its average runtime logarithmic.
The solution presented here is a port of the glibc implementation, which
itself is derived from the median-of-three and tail call recursion
strategies outlined by Sedgwick.

Subsequent patches will optimize the implementation further by utilizing
the kernel's heapsort on directly-mapped memory whenever possible, and
improving the quicksort pivot selection algorithm to try to avoid O(n^2)
collapses.

Note: The sorting functionality gets its own patch because the basic big
array mechanisms were plenty for a single code patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
  • Loading branch information
Darrick J. Wong committed Aug 10, 2023
1 parent 3934e8e commit 232ea05
Show file tree
Hide file tree
Showing 3 changed files with 750 additions and 0 deletions.
114 changes: 114 additions & 0 deletions fs/xfs/scrub/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

struct xfile;
struct xfarray;
struct xfarray_sortinfo;

/*
* ftrace's __print_symbolic requires that all enum values be wrapped in the
Expand Down Expand Up @@ -846,6 +847,119 @@ TRACE_EVENT(xfarray_create,
__entry->obj_size_log)
);

TRACE_EVENT(xfarray_isort,
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
TP_ARGS(si, lo, hi),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long long, lo)
__field(unsigned long long, hi)
),
TP_fast_assign(
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
__entry->lo = lo;
__entry->hi = hi;
),
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
__entry->ino,
__entry->lo,
__entry->hi,
__entry->hi - __entry->lo)
);

TRACE_EVENT(xfarray_qsort,
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
TP_ARGS(si, lo, hi),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long long, lo)
__field(unsigned long long, hi)
__field(int, stack_depth)
__field(int, max_stack_depth)
),
TP_fast_assign(
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
__entry->lo = lo;
__entry->hi = hi;
__entry->stack_depth = si->stack_depth;
__entry->max_stack_depth = si->max_stack_depth;
),
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu stack %d/%d",
__entry->ino,
__entry->lo,
__entry->hi,
__entry->hi - __entry->lo,
__entry->stack_depth,
__entry->max_stack_depth)
);

TRACE_EVENT(xfarray_sort,
TP_PROTO(struct xfarray_sortinfo *si, size_t bytes),
TP_ARGS(si, bytes),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long long, nr)
__field(size_t, obj_size)
__field(size_t, bytes)
__field(unsigned int, max_stack_depth)
),
TP_fast_assign(
__entry->nr = si->array->nr;
__entry->obj_size = si->array->obj_size;
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
__entry->bytes = bytes;
__entry->max_stack_depth = si->max_stack_depth;
),
TP_printk("xfino 0x%lx nr %llu objsz %zu stack %u bytes %zu",
__entry->ino,
__entry->nr,
__entry->obj_size,
__entry->max_stack_depth,
__entry->bytes)
);

TRACE_EVENT(xfarray_sort_stats,
TP_PROTO(struct xfarray_sortinfo *si, int error),
TP_ARGS(si, error),
TP_STRUCT__entry(
__field(unsigned long, ino)
#ifdef DEBUG
__field(unsigned long long, loads)
__field(unsigned long long, stores)
__field(unsigned long long, compares)
#endif
__field(unsigned int, max_stack_depth)
__field(unsigned int, max_stack_used)
__field(int, error)
),
TP_fast_assign(
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
#ifdef DEBUG
__entry->loads = si->loads;
__entry->stores = si->stores;
__entry->compares = si->compares;
#endif
__entry->max_stack_depth = si->max_stack_depth;
__entry->max_stack_used = si->max_stack_used;
__entry->error = error;
),
TP_printk(
#ifdef DEBUG
"xfino 0x%lx loads %llu stores %llu compares %llu stack_depth %u/%u error %d",
#else
"xfino 0x%lx stack_depth %u/%u error %d",
#endif
__entry->ino,
#ifdef DEBUG
__entry->loads,
__entry->stores,
__entry->compares,
#endif
__entry->max_stack_used,
__entry->max_stack_depth,
__entry->error)
);

/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)

Expand Down
Loading

0 comments on commit 232ea05

Please sign in to comment.