diff --git a/libtcmu.c b/libtcmu.c index 084a8f5..d6f0575 100644 --- a/libtcmu.c +++ b/libtcmu.c @@ -711,8 +711,8 @@ static int open_devices(struct tcmulib_context *ctx) continue; if (add_device(ctx, dirent_list[i]->d_name, dev_name, true) < 0) { - free (dev_name); - continue; + free(dev_name); + goto cleanup; } free(dev_name); @@ -724,6 +724,10 @@ static int open_devices(struct tcmulib_context *ctx) free(dirent_list); return num_good_devs; + +cleanup: + close_devices(ctx); + return -1; } static void release_resources(struct tcmulib_context *ctx) diff --git a/main.c b/main.c index f2e65bf..1ec34fa 100644 --- a/main.c +++ b/main.c @@ -788,13 +788,6 @@ static int dev_added(struct tcmu_device *dev) ret = rhandler->open(dev, false); if (ret) goto cleanup_aio_tracking; - /* - * On the initial creation ALUA will probably not yet have been setup, - * but for reopens it will be so we need to sync our failover state. - */ - list_head_init(&group_list); - tcmu_get_alua_grps(dev, &group_list); - tcmu_release_alua_grps(&group_list); rdev->flags |= TCMUR_DEV_FLAG_IS_OPEN; @@ -810,6 +803,19 @@ static int dev_added(struct tcmu_device *dev) tcmu_set_dev_opt_unmap_gran(dev, max_xfer_length); tcmu_set_dev_unmap_gran_align(dev, 0); + /* + * On the initial creation ALUA will probably not yet have been setup, + * but for reopens it will be so we need to sync our failover state. + */ + list_head_init(&group_list); + tcmu_get_alua_grps(dev, &group_list); + + ret = tcmur_update_lock_state(dev, &group_list); + + tcmu_release_alua_grps(&group_list); + if (ret) + goto cleanup_lock_cond; + ret = pthread_create(&rdev->cmdproc_thread, NULL, tcmur_cmdproc_thread, dev); if (ret < 0) diff --git a/tcmur_device.c b/tcmur_device.c index efbbc75..d924f0c 100644 --- a/tcmur_device.c +++ b/tcmur_device.c @@ -265,7 +265,7 @@ int tcmu_get_lock_tag(struct tcmu_device *dev, uint16_t *tag) int retry = 0, ret; if (rdev->failover_type != TMCUR_DEV_FAILOVER_EXPLICIT) - return 0; + return TCMU_STS_OK; retry: ret = rhandler->get_lock_tag(dev, tag); @@ -387,3 +387,52 @@ done: return ret; } + +int tcmur_update_lock_state(struct tcmu_device *dev, + struct list_head *group_list) +{ + struct tgt_port *port; + uint16_t tag; + int ret; + + port = tcmu_get_enabled_port(group_list); + if (!port) { + /* + * TODO: + * + * If the user completely cleared the target from the kernel + * and is restarting from scratch then there will be no ports + * setup yet. Most likely this is a call from systemd restarting + * just runner after a crash so just handle that for now. + */ + return 0; + } + + ret = tcmu_get_lock_tag(dev, &tag); + if (ret == TCMU_STS_NO_LOCK_HOLDERS) + return 0; + + if (ret != TCMU_STS_OK) { + tcmu_dev_warn(dev, "Could not check lock state after an unclean tcmu-runner shutdown. tcmu-runner might need to be restarted. Error %d.\n", + ret); + return -EIO; + } + + if (tag != port->grp->id) + return 0; + + tcmu_dev_info(dev, "Detected stale lock info. Updating state.\n"); + /* + * The tag still points to us, so grab the lock. If another + * node is in the process of grabbing the lock we will race, but + * the initiator will figure things out. + */ + ret = tcmu_acquire_dev_lock(dev, true, tag); + if (ret != TCMU_STS_OK) { + tcmu_dev_warn(dev, "Could not re-acquire lock after an unclean tcmu-runner shutdown. tcmu-runner might need to be restarted. Error %d.\n", + ret); + return -EIO; + } + + return 0; +} diff --git a/tcmur_device.h b/tcmur_device.h index 128a8ba..134d00f 100644 --- a/tcmur_device.h +++ b/tcmur_device.h @@ -83,5 +83,7 @@ int tcmu_reopen_dev(struct tcmu_device *dev, bool in_lock_thread, int retries); int tcmu_acquire_dev_lock(struct tcmu_device *dev, bool is_sync, uint16_t tag); void tcmu_release_dev_lock(struct tcmu_device *dev); int tcmu_get_lock_tag(struct tcmu_device *dev, uint16_t *tag); +int tcmur_update_lock_state(struct tcmu_device *dev, + struct list_head *group_list); #endif