Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
EOS-25109 be/io_sched: fix op done ordering bug
Browse files Browse the repository at this point in the history
It was possible for the op, passed to m0_be_io_sched_add(), to become
DONE before sched lock is taken in be_io_sched_cb() because parent op
could become DONE before the child op M0_BTS_GC callback is called.
This situation led to failed invariant, assertion failures [1] or just
spinning in infinite loop in m0_list invariant due to a broken list [2].

The patch fixes the issue by moving the op (passed to
m0_be_io_sched_add()) to DONE only after finishing processing of the
corresponding m0_be_io.

The bug was discovered during EOS-25005 patch testing.

[1]

be-ut
  io_sched  motr[15961]:  e4b0  FATAL  [lib/assert.c:50:m0_panic]  panic: (io->bio_ext.e_start == sched->bis_pos) at be_io_sched_cb() (be/io_sched.c:132)  [git: 2.0.0-307-37-gd35074f6-dirty] /w/var/m0ut/m0trace.15961
Motr panic: (io->bio_ext.e_start == sched->bis_pos) at be_io_sched_cb() be/io_sched.c:132 (errno: 0) (last failed: none) [git: 2.0.0-307-37-gd35074f6-dirty] pid: 15961  /w/var/m0ut/m0trace.15961
...

[2]

  (gdb) bt
  #0  __lll_lock_wait () at ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
  #1  0x00007f432af53e1b in _L_lock_812 () from /lib64/libpthread.so.0
  #2  0x00007f432af53ce8 in __GI___pthread_mutex_lock (mutex=0x7f432d714a80 <be_ut_io_sched_scheduler+32>) at ../nptl/pthread_mutex_lock.c:79
  #3  0x00007f432b8c3520 in m0_arch_mutex_lock (mutex=0x7f432d714a80 <be_ut_io_sched_scheduler+32>) at lib/user_space/umutex.c:56
  #4  0x00007f432b8aee6f in m0_mutex_lock (mutex=0x7f432d714a80 <be_ut_io_sched_scheduler+32>) at lib/mutex.c:55
  #5  0x00007f432b76673b in m0_be_io_sched_lock (sched=0x7f432d714a60 <be_ut_io_sched_scheduler>) at be/io_sched.c:72
  #6  0x00007f432b766a75 in be_io_sched_cb (op=0x7f4288000998, param=0x7f42880008c0) at be/io_sched.c:123
  #7  0x00007f432b771bb3 in be_op_done (op=0x7f4288000998) at be/op.c:257
  #8  0x00007f432b771cb6 in m0_be_op_done (op=0x7f4288000998) at be/op.c:276
  #9  0x00007f432b765715 in be_io_finished (bio=0x7f42880008c0) at be/io.c:555
  #10 0x00007f432b7658e1 in be_io_cb (link=0x7f4288001040) at be/io.c:587
  #11 0x00007f432b8a53a0 in clink_signal (clink=0x7f4288001040) at lib/chan.c:135
  #12 0x00007f432b8a54ff in chan_signal_nr (chan=0x7f4288000f38, nr=0) at lib/chan.c:154
  #13 0x00007f432b8a55d4 in m0_chan_broadcast (chan=0x7f4288000f38) at lib/chan.c:174
  #14 0x00007f432b8a55fa in m0_chan_broadcast_lock (chan=0x7f4288000f38) at lib/chan.c:181
  #15 0x00007f432ba03cfe in ioq_complete (ioq=0x1367bc0, qev=0x7f42886a3fe0, res=8, res2=0) at stob/ioq.c:587
  #16 0x00007f432ba041d1 in stob_ioq_thread (ioq=0x1367bc0) at stob/ioq.c:669
  #17 0x00007f432b8b01b8 in m0_thread_trampoline (arg=0x1367be0) at lib/thread.c:117
  #18 0x00007f432b8c37fc in uthread_trampoline (arg=0x1367be0) at lib/user_space/uthread.c:98
  #19 0x00007f432af51e25 in start_thread (arg=0x7f42ccff9700) at pthread_create.c:308
  #20 0x00007f4329999bad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113

  (gdb) thread 2
  [Switching to thread 2 (Thread 0x7f428ffff700 (LWP 8730))]
  #0  0x00007f432b8b270b in m0_tlist_invariant (d=0x7f432bd94600 <sched_io_tl>, list=0x7f432d714a68 <be_ut_io_sched_scheduler+8>) at lib/tlist.c:253
  253                     if (!M0_CHECK_EX(m0_tlink_invariant(d, amb(d, scan))))
  (gdb) bt
  #0  0x00007f432b8b270b in m0_tlist_invariant (d=0x7f432bd94600 <sched_io_tl>, list=0x7f432d714a68 <be_ut_io_sched_scheduler+8>) at lib/tlist.c:253
  #1  0x00007f432b8b1f13 in m0_tlist_add (d=0x7f432bd94600 <sched_io_tl>, list=0x7f432d714a68 <be_ut_io_sched_scheduler+8>, obj=0x7f42880008c0) at lib/tlist.c:128
  #2  0x00007f432b7664c8 in sched_io_tlist_add (list=0x7f432d714a68 <be_ut_io_sched_scheduler+8>, amb=0x7f42880008c0) at be/io_sched.c:48
  #3  0x00007f432b766d88 in be_io_sched_insert (sched=0x7f432d714a60 <be_ut_io_sched_scheduler>, io=0x7f42880008c0) at be/io_sched.c:161
  #4  0x00007f432b767144 in m0_be_io_sched_add (sched=0x7f432d714a60 <be_ut_io_sched_scheduler>, io=0x7f42880008c0, ext=0x7f428fffed00, op=0x7f4288000c00) at be/io_sched.c:193
  #5  0x00007f432cf8a008 in be_ut_io_sched_thread (param=0x136a2b0) at be/ut/io_sched.c:241
  #6  0x00007f432b8b01b8 in m0_thread_trampoline (arg=0x136bad0) at lib/thread.c:117
  #7  0x00007f432b8c37fc in uthread_trampoline (arg=0x136bad0) at lib/user_space/uthread.c:98
  #8  0x00007f432af51e25 in start_thread (arg=0x7f428ffff700) at pthread_create.c:308
  #9  0x00007f4329999bad in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113

Signed-off-by: Maksym Medvied <maksym.medvied@seagate.com>
  • Loading branch information
max-seagate committed Sep 30, 2021
1 parent fc31309 commit e878095
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
2 changes: 2 additions & 0 deletions be/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ struct m0_be_io {
struct m0_tlink bio_sched_link;
uint64_t bio_sched_magic;
struct m0_be_op bio_sched_op;
/** The op passed to m0_be_io_sched_add() */
struct m0_be_op *bio_sched_op_user;
struct m0_ext bio_ext;
};

Expand Down
4 changes: 3 additions & 1 deletion be/io_sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ static void be_io_sched_launch_next(struct m0_be_io_sched *sched)
sched->bis_io_in_progress = true;
M0_LOG(M0_DEBUG, "sched=%p io=%p pos=%"PRId64,
sched, io, sched->bis_pos);
m0_be_op_active(io->bio_sched_op_user);
m0_be_io_launch(io, &io->bio_sched_op);
}
}
Expand Down Expand Up @@ -137,6 +138,7 @@ static void be_io_sched_cb(struct m0_be_op *op, void *param)
sched->bis_pos = io->bio_ext.e_end;
m0_be_io_sched_unlock(sched);

m0_be_op_done(io->bio_sched_op_user);
be_io_sched_launch_next_locked(sched);
}

Expand Down Expand Up @@ -201,7 +203,7 @@ M0_INTERNAL void m0_be_io_sched_add(struct m0_be_io_sched *sched,
m0_be_op_init(&io->bio_sched_op);
m0_be_op_callback_set(&io->bio_sched_op, &be_io_sched_cb,
io, M0_BOS_GC);
m0_be_op_set_add(op, &io->bio_sched_op);
io->bio_sched_op_user = op;
be_io_sched_launch_next(sched);
}

Expand Down

0 comments on commit e878095

Please sign in to comment.