Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
lib: fix panic at m0_locality_chores_run() on NUMA nodes (#833)
Browse files Browse the repository at this point in the history
lib: fix panic at m0_locality_chores_run() on NUMA nodes

On NUMA nodes, with numad service enabled, CPU affinity of
locality threads can be changed (especially, for compute-
intensive applications which use a lot of CPU and/or memory).
As a result, this leads to the following panic:

    Motr panic: (locality == m0_locality_here()) at m0_locality_chores_run() lib/locality.c:310 (errno: 0) (last failed: none) [git: sage-base-1.0-389-g09ff618]

    #0  0x00002b25fbcb6387 in raise () from /usr/lib64/libc.so.6
    #1  0x00002b25fbcb7a78 in abort () from /usr/lib64/libc.so.6
    #2  0x00002b25fc5e7acd in m0_arch_panic (c=c@entry=0x2b25fca74400 <__pctx.15545>, ap=ap@entry=0x2b26032c7a18) at lib/user_space/uassert.c:131
    #3  0x00002b25fc5d7e44 in m0_panic (ctx=ctx@entry=0x2b25fca74400 <__pctx.15545>) at lib/assert.c:52
    #4  0x00002b25fc5dbf6a in m0_locality_chores_run (locality=locality@entry=0x106f778) at lib/locality.c:310
    #5  0x00002b25fc5abcaa in loc_handler_thread (th=0xf5d160) at fop/fom.c:926
    #6  0x00002b25fc5de2ee in m0_thread_trampoline (arg=arg@entry=0xf5d168) at lib/thread.c:117
    #7  0x00002b25fc5e882d in uthread_trampoline (arg=0xf5d168) at lib/user_space/uthread.c:98
    #8  0x00002b25fba6bea5 in start_thread () from /usr/lib64/libpthread.so.0
    #9  0x00002b25fbd7e96d in clone () from /usr/lib64/libc.so.6

The reason is that we link our localities to the CPUs in the
very beginning, when the application is just started and Motr
is initialised. And we use the CPU id to figure out the current
locality at m0_locality_here(). Of course, when the affinity
is changed later and the thread is moved to another CPU, this
gives the wrong locality result.

Solution: stash the initial CPU id in TLS and use it at
m0_locality_here().

Kudos to Nikita Danilov for the idea.

Reviewed-by: Nikita Danilov <nikita.danilov@seagate.com>
Reviewed-by: Huang Hua <hua.huang@seagate.com>
Signed-off-by: Andriy Tkachuk <andriy.tkachuk@seagate.com>
  • Loading branch information
andriytk authored Aug 17, 2021
1 parent 46ab3c2 commit 4c27187
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 9 deletions.
8 changes: 5 additions & 3 deletions lib/linux_kernel/kthread.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ M0_INTERNAL int m0_thread_signal(struct m0_thread *q, int sig)
return M0_ERR(-ENOSYS);
}

M0_INTERNAL int m0_thread_confine(struct m0_thread *q,
const struct m0_bitmap *processors)
M0_INTERNAL int m0_thread_arch_confine(struct m0_thread *q,
const struct m0_bitmap *processors)
{
int result = 0;
size_t idx;
Expand All @@ -207,9 +207,11 @@ M0_INTERNAL int m0_thread_confine(struct m0_thread *q,
if (!zalloc_cpumask_var(&cpuset, GFP_KERNEL))
return M0_ERR(-ENOMEM);

for (idx = 0; idx < nr_bits; ++idx)
for (idx = 0; idx < nr_bits; ++idx) {
if (m0_bitmap_get(processors, idx))
cpumask_set_cpu(idx, cpuset);
}

nr_allowed = cpumask_weight(cpuset);

if (nr_allowed == 0) {
Expand Down
12 changes: 10 additions & 2 deletions lib/locality.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,19 @@ M0_INTERNAL void m0_locality_fini(struct m0_locality *loc)
M0_INTERNAL struct m0_locality *m0_locality_here(void)
{
struct locality_global *glob = loc_glob();
struct m0_thread_tls *tls;

if (glob->lg_dom == NULL || m0_thread_self() == &glob->lg_ast_thread)
return &glob->lg_fallback;
else
return m0_locality_get(m0_processor_id_get());
else {
tls = m0_thread_tls();
if (tls->tls_loci != m0_processor_id_get() &&
!tls->tls_warned) {
M0_LOG(M0_WARN, "thread migrated to another CPU core");
tls->tls_warned = true;
}
return m0_locality_get(tls->tls_loci);
}
}

M0_INTERNAL struct m0_locality *m0_locality_get(uint64_t value)
Expand Down
14 changes: 14 additions & 0 deletions lib/thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ int m0_thread_init(struct m0_thread *q, int (*init)(void *),
* of `q'. */
q->t_tls.tls_m0_instance = m0_get();
q->t_tls.tls_self = q;
q->t_tls.tls_loci = m0_processor_id_get();

result = m0_thread_init_impl(q, q->t_namebuf);
if (result != 0)
Expand Down Expand Up @@ -136,6 +137,19 @@ M0_INTERNAL void m0_thread_shun(void)
m0_thread_arch_shun();
}

M0_INTERNAL int m0_thread_confine(struct m0_thread *q,
const struct m0_bitmap *processors)
{
int cpu_idx = m0_bitmap_ffs(processors);

M0_PRE(cpu_idx >= 0);

q->t_tls.tls_loci = cpu_idx;

return m0_thread_arch_confine(q, processors);
}


/** @} end of thread group */

/*
Expand Down
26 changes: 24 additions & 2 deletions lib/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# include "lib/linux_kernel/thread.h"
#endif
#include "lib/semaphore.h"
#include "lib/processor.h"

#include "lib/list.h"
#include "addb2/counter.h" /* m0_addb2_sensor */
Expand Down Expand Up @@ -63,11 +64,22 @@ struct m0_thread;
struct m0_thread_tls {
/** m0 instance this thread belong to. */
struct m0 *tls_m0_instance;
/** Platform specific part of tls. Defined in lib/PLATFORM/thread.h. */
struct m0_thread_arch_tls tls_arch;
struct m0_addb2_mach *tls_addb2_mach;
struct m0_thread *tls_self;
/**
* First cpu to which the thread was confined (or 0 if not confined).
* Used as thread's locality index in m0_locality_here().
*
* @note m0_processor_id_get() cannot be used for this purpose,
* because thread's affinity can be re-set externally by,
* say, numad service.
*/
m0_processor_nr_t tls_loci;
/** Warned about the thread migration to another CPU. */
bool tls_warned;
struct m0_addb2_sensor tls_clock;
/** Platform specific part of tls. Defined in lib/PLATFORM/thread.h. */
struct m0_thread_arch_tls tls_arch;
};

/**
Expand Down Expand Up @@ -231,6 +243,13 @@ M0_INTERNAL int m0_thread_signal(struct m0_thread *q, int sig);
The user space implementation calls pthread_setaffinity_np and the kernel
implementation modifies fields of the task_struct directly.
@note the function sets tls_loci to the 1st cpu specified at @processors
bitmap, which is used later by m0_locality_here(). That's why this
function can be called one time only for a specific thread (usually
during the process initialisation). In case there is a need to call
it several times to re-set thread's affinity, the logic of setting
tls_loci should be moved to a new API.
@see http://www.kernel.org/doc/man-pages/online/pages/man3/pthread_setaffinity_np.3.html
@see lib/processor.h
@see kthread
Expand Down Expand Up @@ -318,6 +337,9 @@ M0_INTERNAL int m0_thread_arch_adopt(struct m0_thread *thread,
struct m0 *instance, bool full);
M0_INTERNAL void m0_thread_arch_shun(void);

M0_INTERNAL int m0_thread_arch_confine(struct m0_thread *q,
const struct m0_bitmap *processors);

/** @} end of thread group */
#endif /* __MOTR_LIB_THREAD_H__ */

Expand Down
4 changes: 2 additions & 2 deletions lib/user_space/uthread.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ M0_INTERNAL int m0_thread_signal(struct m0_thread *q, int sig)
return -pthread_kill(q->t_h.h_id, sig);
}

M0_INTERNAL int m0_thread_confine(struct m0_thread *q,
const struct m0_bitmap *processors)
M0_INTERNAL int m0_thread_arch_confine(struct m0_thread *q,
const struct m0_bitmap *processors)
{
size_t idx;
size_t nr_bits = min64u(processors->b_nr, CPU_SETSIZE);
Expand Down

0 comments on commit 4c27187

Please sign in to comment.