Skip to content

Commit

Permalink
tcmur_device: skip reporting the event if device in recovery
Browse files Browse the repository at this point in the history
If the device is in recovery, we can defer reporting the event in
the recovery when reopening the device. And if the device is stopped
or stopping we can just skip it.

Just wait for the report event to finish when recoverying the device,
because the recovery will close and then open the device during which
the private data maybe released. And it may cause use-after-free crash
in report event routine.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
  • Loading branch information
lxbsz committed Sep 9, 2021
1 parent 314b56c commit 96199cb
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 3 deletions.
Binary file added kmod-devel-25-16.el8.x86_64.rpm
Binary file not shown.
14 changes: 13 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1044,9 +1044,15 @@ static int dev_added(struct tcmu_device *dev)
goto cleanup_format_lock;
}

ret = pthread_cond_init(&rdev->report_event_cond, NULL);
if (ret) {
ret = -ret;
goto cleanup_rdev_lock;
}

ret = setup_io_work_queue(dev);
if (ret < 0)
goto cleanup_rdev_lock;
goto cleanup_event_cond;

ret = setup_aio_tracking(rdev);
if (ret < 0)
Expand Down Expand Up @@ -1088,6 +1094,8 @@ static int dev_added(struct tcmu_device *dev)
cleanup_aio_tracking(rdev);
cleanup_io_work_queue:
cleanup_io_work_queue(dev, true);
cleanup_event_cond:
pthread_cond_destroy(&rdev->report_event_cond);
cleanup_rdev_lock:
pthread_mutex_destroy(&rdev->rdev_lock);
cleanup_format_lock:
Expand Down Expand Up @@ -1130,6 +1138,10 @@ static void dev_removed(struct tcmu_device *dev)

tcmur_destroy_work(rdev->event_work);

ret = pthread_cond_destroy(&rdev->report_event_cond);
if (ret != 0)
tcmu_err("could not cleanup report event cond %d\n", ret);

ret = pthread_mutex_destroy(&rdev->rdev_lock);
if (ret != 0)
tcmu_err("could not cleanup state lock %d\n", ret);
Expand Down
6 changes: 6 additions & 0 deletions target.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ static void tgt_port_grp_recovery_work_fn(void *arg)
*/
list_for_each_safe(&tpg->devs, rdev, tmp_rdev, recovery_entry) {
list_del(&rdev->recovery_entry);

pthread_mutex_lock(&rdev->rdev_lock);
if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT)
pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock);
pthread_mutex_unlock(&rdev->rdev_lock);

ret = __tcmu_reopen_dev(rdev->dev, -1);
if (ret) {
tcmu_dev_err(rdev->dev, "Could not reinitialize device. (err %d).\n",
Expand Down
22 changes: 20 additions & 2 deletions tcmur_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ int __tcmu_reopen_dev(struct tcmu_device *dev, int retries)
rhandler->close(dev);
}

pthread_mutex_lock(&rdev->rdev_lock);
ret = -EIO;
pthread_mutex_lock(&rdev->rdev_lock);
while (ret != 0 && !(rdev->flags & TCMUR_DEV_FLAG_STOPPING) &&
(retries < 0 || attempt <= retries)) {
pthread_mutex_unlock(&rdev->rdev_lock);
Expand Down Expand Up @@ -128,7 +128,10 @@ int tcmu_reopen_dev(struct tcmu_device *dev, int retries)
pthread_mutex_unlock(&rdev->rdev_lock);
return -EBUSY;
}

rdev->flags |= TCMUR_DEV_FLAG_IN_RECOVERY;
if (rdev->flags & TCMUR_DEV_FLAG_REPORTING_EVENT)
pthread_cond_wait(&rdev->report_event_cond, &rdev->rdev_lock);
pthread_mutex_unlock(&rdev->rdev_lock);

return __tcmu_reopen_dev(dev, retries);
Expand Down Expand Up @@ -168,10 +171,25 @@ static void __tcmu_report_event(void *data)
sleep(1);

pthread_mutex_lock(&rdev->rdev_lock);
/*
* If the device is in recovery, will skip reporting the event
* this time because the event should be report when the device
* is reopening.
*/
if (rdev->flags & (TCMUR_DEV_FLAG_IN_RECOVERY |
TCMUR_DEV_FLAG_STOPPING |
TCMUR_DEV_FLAG_STOPPED)) {
pthread_mutex_unlock(&rdev->rdev_lock);
return;
}

rdev->flags |= TCMUR_DEV_FLAG_REPORTING_EVENT;
pthread_mutex_unlock(&rdev->rdev_lock);

ret = rhandler->report_event(dev);
pthread_cond_signal(&rdev->report_event_cond);
if (ret)
tcmu_dev_err(dev, "Could not report events. Error %d.\n", ret);
pthread_mutex_unlock(&rdev->rdev_lock);
}

static void tcmu_report_event(struct tcmu_device *dev)
Expand Down
3 changes: 3 additions & 0 deletions tcmur_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define TCMUR_DEV_FLAG_IS_OPEN (1 << 2)
#define TCMUR_DEV_FLAG_STOPPING (1 << 3)
#define TCMUR_DEV_FLAG_STOPPED (1 << 4)
#define TCMUR_DEV_FLAG_REPORTING_EVENT (1 << 5)

#define TCMUR_UA_DEV_SIZE_CHANGED 0

Expand Down Expand Up @@ -83,6 +84,8 @@ struct tcmur_device {

int cmd_time_out;

pthread_cond_t report_event_cond;

pthread_spinlock_t cmds_list_lock; /* protects cmds_list */
struct list_head cmds_list;
};
Expand Down

0 comments on commit 96199cb

Please sign in to comment.