Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retry for instance start and stop for VPC instance #1934

Merged
merged 1 commit into from
Sep 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 107 additions & 8 deletions ibm/resource_ibm_is_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,12 @@ func resourceIBMISInstance() *schema.Resource {
Computed: true,
Description: "The resource group name in which resource is provisioned",
},

"force_recovery_time": {
Description: "Define timeout to force the instances to start/stop in minutes.",
Type: schema.TypeInt,
Optional: true,
},
},
}
}
Expand Down Expand Up @@ -834,15 +840,21 @@ func isWaitForClassicInstanceAvailable(instanceC *vpcclassicv1.VpcClassicV1, id
func isWaitForInstanceAvailable(instanceC *vpcv1.VpcV1, id string, timeout time.Duration, d *schema.ResourceData) (interface{}, error) {
log.Printf("Waiting for instance (%s) to be available.", id)

communicator := make(chan interface{})

stateConf := &resource.StateChangeConf{
Pending: []string{"retry", isInstanceProvisioning},
Target: []string{isInstanceStatusRunning, "available", "failed", ""},
Refresh: isInstanceRefreshFunc(instanceC, id, d),
Refresh: isInstanceRefreshFunc(instanceC, id, d, communicator),
Timeout: timeout,
Delay: 10 * time.Second,
MinTimeout: 10 * time.Second,
}

if v, ok := d.GetOk("force_recovery_time"); ok {
forceTimeout := v.(int)
go isRestartStartAction(instanceC, id, d, forceTimeout, communicator)
}
return stateConf.WaitForState()
}

Expand All @@ -866,7 +878,7 @@ func isClassicInstanceRefreshFunc(instanceC *vpcclassicv1.VpcClassicV1, id strin
}
}

func isInstanceRefreshFunc(instanceC *vpcv1.VpcV1, id string, d *schema.ResourceData) resource.StateRefreshFunc {
func isInstanceRefreshFunc(instanceC *vpcv1.VpcV1, id string, d *schema.ResourceData, communicator chan interface{}) resource.StateRefreshFunc {
return func() (interface{}, string, error) {
getinsOptions := &vpcv1.GetInstanceOptions{
ID: &id,
Expand All @@ -877,14 +889,61 @@ func isInstanceRefreshFunc(instanceC *vpcv1.VpcV1, id string, d *schema.Resource
}
d.Set(isInstanceStatus, *instance.Status)

select {
case data := <-communicator:
return nil, "", data.(error)
default:
fmt.Println("no message sent")
}

if *instance.Status == "available" || *instance.Status == "failed" || *instance.Status == "running" {
// let know the isRestartStartAction() to stop
close(communicator)
return instance, *instance.Status, nil
}

}
return instance, isInstanceProvisioning, nil
}
}

func isRestartStartAction(instanceC *vpcv1.VpcV1, id string, d *schema.ResourceData, forceTimeout int, communicator chan interface{}) {
subticker := time.NewTicker(time.Duration(forceTimeout) * time.Minute)
//subticker := time.NewTicker(time.Duration(forceTimeout) * time.Second)
for {
select {

case <-subticker.C:
log.Println("Instance is still in starting state, force retry by restarting the instance.")
actiontype := "stop"
createinsactoptions := &vpcv1.CreateInstanceActionOptions{
InstanceID: &id,
Type: &actiontype,
}
_, response, err := instanceC.CreateInstanceAction(createinsactoptions)
if err != nil {
communicator <- fmt.Errorf("Error retrying instance action start: %s\n%s", err, response)
return
}
waitTimeout := time.Duration(1) * time.Minute
_, _ = isWaitForInstanceActionStop(instanceC, waitTimeout, id, d)
actiontype = "start"
createinsactoptions = &vpcv1.CreateInstanceActionOptions{
InstanceID: &id,
Type: &actiontype,
}
_, response, err = instanceC.CreateInstanceAction(createinsactoptions)
if err != nil {
communicator <- fmt.Errorf("Error retrying instance action start: %s\n%s", err, response)
return
}
case <-communicator:
// indicates refresh func is reached target and not proceed with the thread
subticker.Stop()
return

}
}
}
func resourceIBMisInstanceRead(d *schema.ResourceData, meta interface{}) error {
userDetails, err := meta.(ClientSession).BluemixUserDetails()
if err != nil {
Expand Down Expand Up @@ -1836,7 +1895,7 @@ func instanceDelete(d *schema.ResourceData, meta interface{}, id string) error {
}
return fmt.Errorf("Error Creating Instance Action: %s\n%s", err, response)
}
_, err = isWaitForInstanceActionStop(instanceC, d, meta, id)
_, err = isWaitForInstanceActionStop(instanceC, d.Timeout(schema.TimeoutDelete), id, d)
if err != nil {
return err
}
Expand Down Expand Up @@ -2043,8 +2102,8 @@ func isWaitForClassicInstanceActionStop(instanceC *vpcclassicv1.VpcClassicV1, d

return stateConf.WaitForState()
}
func isWaitForInstanceActionStop(instanceC *vpcv1.VpcV1, d *schema.ResourceData, meta interface{}, id string) (interface{}, error) {

func isWaitForInstanceActionStop(instanceC *vpcv1.VpcV1, timeout time.Duration, id string, d *schema.ResourceData) (interface{}, error) {
communicator := make(chan interface{})
stateConf := &resource.StateChangeConf{
Pending: []string{isInstanceStatusRunning, isInstanceStatusPending, isInstanceActionStatusStopping},
Target: []string{isInstanceActionStatusStopped, isInstanceStatusFailed, ""},
Expand All @@ -2056,19 +2115,59 @@ func isWaitForInstanceActionStop(instanceC *vpcv1.VpcV1, d *schema.ResourceData,
if err != nil {
return nil, "", fmt.Errorf("Error Getting Instance: %s\n%s", err, response)
}
select {
case data := <-communicator:
return nil, "", data.(error)
default:
fmt.Println("no message sent")
}
if *instance.Status == isInstanceStatusFailed {
return instance, *instance.Status, fmt.Errorf("The instance %s failed to stop: %v", d.Id(), err)
// let know the isRestartStopAction() to stop
close(communicator)
return instance, *instance.Status, fmt.Errorf("The instance %s failed to stop: %v", id, err)
}
return instance, *instance.Status, nil
},
Timeout: d.Timeout(schema.TimeoutDelete),
Timeout: timeout,
Delay: 10 * time.Second,
MinTimeout: 10 * time.Second,
}

if v, ok := d.GetOk("force_recovery_time"); ok {
forceTimeout := v.(int)
go isRestartStopAction(instanceC, id, d, forceTimeout, communicator)
}

return stateConf.WaitForState()
}

func isRestartStopAction(instanceC *vpcv1.VpcV1, id string, d *schema.ResourceData, forceTimeout int, communicator chan interface{}) {
subticker := time.NewTicker(time.Duration(forceTimeout) * time.Minute)
//subticker := time.NewTicker(time.Duration(forceTimeout) * time.Second)
for {
select {

case <-subticker.C:
log.Println("Instance is still in stopping state, retrying to stop with -force")
actiontype := "stop"
createinsactoptions := &vpcv1.CreateInstanceActionOptions{
InstanceID: &id,
Type: &actiontype,
}
_, response, err := instanceC.CreateInstanceAction(createinsactoptions)
if err != nil {
communicator <- fmt.Errorf("Error retrying instance action stop: %s\n%s", err, response)
return
}
case <-communicator:
// indicates refresh func is reached target and not proceed with the thread)
subticker.Stop()
return

}
}
}

func isWaitForClassicInstanceVolumeAttached(instanceC *vpcclassicv1.VpcClassicV1, d *schema.ResourceData, id, volID string) (interface{}, error) {
log.Printf("Waiting for instance volume (%s) to be attched.", id)

Expand Down
1 change: 1 addition & 0 deletions website/docs/r/is_instance.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ Nested `network_interfaces` block have the following structure:
* `user_data` - (Optional, string) User data to transfer to the server instance.
* `resource_group` - (Optional, Forces new resource, string) The resource group ID for this instance.
* `tags` - (Optional, array of strings) Tags associated with the instance.
* `force_recovery_time` - (Optional, int) Define timeout (in minutes), to force the is_instance to recover from a perpetual "starting" state, during provisioning; similarly, to force the is_instance to recover from a perpetual "stopping" state, during deprovisioning. **Note**: the force_recovery_time is used to retry multiple times until timeout.

## Attribute Reference

Expand Down