Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retries to all resources and datasources #286

Merged
merged 11 commits into from
Oct 23, 2024
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ go 1.21
toolchain go1.22.1

require (
github.com/avast/retry-go/v4 v4.6.0
github.com/go-test/deep v1.1.1
github.com/google/uuid v1.6.0
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/hashicorp/terraform-plugin-docs v0.19.4
github.com/hashicorp/terraform-plugin-framework v1.11.0
github.com/hashicorp/terraform-plugin-framework-jsontypes v0.1.0
Expand Down Expand Up @@ -41,7 +41,6 @@ require (
github.com/hashicorp/go-hclog v1.6.3 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-plugin v1.6.0 // indirect
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
github.com/hashicorp/go-uuid v1.0.3 // indirect
github.com/hashicorp/go-version v1.7.0 // indirect
github.com/hashicorp/hc-install v0.8.0 // indirect
Expand All @@ -67,6 +66,7 @@ require (
github.com/posener/complete v1.2.3 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/spf13/cast v1.5.0 // indirect
github.com/stretchr/testify v1.9.0 // indirect
github.com/vmihailenco/msgpack v4.0.4+incompatible // indirect
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew
github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/avast/retry-go/v4 v4.6.0 h1:K9xNA+KeB8HHc2aWFuLb25Offp+0iVRXEvFx8IinRJA=
github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRSHKIr2PyOE=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=
Expand Down
65 changes: 44 additions & 21 deletions internal/client/client.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package client

import (
"context"
"errors"
"fmt"
"net/http"
Expand All @@ -10,14 +11,38 @@ import (
"github.com/google/uuid"

"github.com/prefecthq/terraform-provider-prefect/internal/api"

retryablehttp "github.com/hashicorp/go-retryablehttp"
)

var _ = api.PrefectClient(&Client{})

// New creates and returns new client instance.
func New(opts ...Option) (*Client, error) {
// Uses the retryablehttp package for built-in retries
// with exponential backoff.
//
// Some notable defaults from that package include:
// - max retries: 4
// - retry wait minimum seconds: 1
// - retry wait maximum seconds: 30
//
// All defaults are defined in
// https://github.com/hashicorp/go-retryablehttp/blob/main/client.go#L48-L51.
retryableClient := retryablehttp.NewClient()

// By default, retryablehttp will only retry requests if there was some kind
// of transient server or networking error. We can be more specific with this
// by providing a custom function for determining whether or not to retry.
retryableClient.CheckRetry = checkRetryPolicy

// Finally, convert the retryablehttp client to a standard http client.
// This allows us to retain the `http.Client` interface, and avoid specifying
// the `retryablehttp.Client` interface in our client methods.
httpClient := retryableClient.StandardClient()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part is pretty sweet. It saves a lot of lines changed - see f86254b.


client := &Client{
hc: http.DefaultClient,
hc: httpClient,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main change here - we're replacing the default HTTP client with our retry-able HTTP client.

}

var errs []error
Expand All @@ -36,26 +61,6 @@ func New(opts ...Option) (*Client, error) {
return client, nil
}

// MustNew returns a new client or panics if an error occurred.
func MustNew(opts ...Option) *Client {
client, err := New(opts...)
if err != nil {
panic(fmt.Sprintf("error occurred during construction: %s", err))
}

return client
}

// WithClient configures the underlying http.Client used to send
// requests.
func WithClient(httpClient *http.Client) Option {
return func(client *Client) error {
client.hc = httpClient

return nil
}
}

Comment on lines -39 to -58
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These were unused.

// WithEndpoint configures the client to communicate with a self-hosted
// Prefect server or Prefect Cloud.
func WithEndpoint(endpoint string) Option {
Expand Down Expand Up @@ -97,3 +102,21 @@ func WithDefaults(accountID uuid.UUID, workspaceID uuid.UUID) Option {
return nil
}
}

func checkRetryPolicy(ctx context.Context, resp *http.Response, err error) (bool, error) {
// If the response is a 409 (StatusConflict), that means the request
// eventually succeeded and we don't need to make the request again.
if resp.StatusCode == http.StatusConflict {
return false, nil
}

// If the response is a 404 (NotFound), try again. This is particularly
// relevant for block-related objects that are created asynchronously.
if resp.StatusCode == http.StatusNotFound {
return true, err
}

// Fall back to the default retry policy for any other status codes.
//nolint:wrapcheck // we've extended this method, no need to wrap error
return retryablehttp.DefaultRetryPolicy(ctx, resp, err)
}
Comment on lines +105 to +122
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we can be specific about what warrants a retry. This is cool because we knew in the past we didn't need to retry 409s, but the retry implementation we had at the time didn't really let us achieve that.

44 changes: 11 additions & 33 deletions internal/provider/resources/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"strings"

"github.com/avast/retry-go/v4"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-framework-jsontypes/jsontypes"
"github.com/hashicorp/terraform-plugin-framework/diag"
Expand All @@ -19,7 +18,6 @@ import (
"github.com/prefecthq/terraform-provider-prefect/internal/api"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/customtypes"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/helpers"
"github.com/prefecthq/terraform-provider-prefect/internal/utils"
)

type BlockResource struct {
Expand Down Expand Up @@ -165,59 +163,39 @@ func (r *BlockResource) Create(ctx context.Context, req resource.CreateRequest,

return
}

blockSchemaClient, err := r.client.BlockSchemas(plan.AccountID.ValueUUID(), plan.WorkspaceID.ValueUUID())
if err != nil {
resp.Diagnostics.Append(helpers.CreateClientErrorDiagnostic("Block Schema", err))

return
}

blockDocumentClient, err := r.client.BlockDocuments(plan.AccountID.ValueUUID(), plan.WorkspaceID.ValueUUID())
if err != nil {
resp.Diagnostics.Append(helpers.CreateClientErrorDiagnostic("Block Document", err))

return
}

// NOTE: here, we add retries for BlockType and BlockSchema fetches.
// BlockTypes / BlockSchemas are tied to a particular Workspace, and
// they are created asynchronously after a new Workspace request resolves.
// This means that if a `prefect_block` is created in the same plan as a
// new `prefect_workspace`, there is a potential race condition that could occur if the
// Workspace is created + resolves, but the BlockTypes / BlockSchemas have yet to have
// been created - this has been observed to happen even with a depends_on relationship defined.
// To eliminate spurious create failures, test flakiness, and general user confusion,
// we'll add a retry for these specific operations.
blockType, err := retry.DoWithData(
func() (*api.BlockType, error) {
return blockTypeClient.GetBySlug(ctx, plan.TypeSlug.ValueString())
},
utils.DefaultRetryOptions...,
)
blockType, err := blockTypeClient.GetBySlug(ctx, plan.TypeSlug.ValueString())
if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Block Type", "get_by_slug", err))

return
}

blockSchemas, err := retry.DoWithData(
func() ([]*api.BlockSchema, error) {
blockSchemas, err := blockSchemaClient.List(ctx, []uuid.UUID{blockType.ID})
if err != nil {
return nil, fmt.Errorf("http request to fetch block schemas failed: %w", err)
}
blockSchemas, err := blockSchemaClient.List(ctx, []uuid.UUID{blockType.ID})
if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Block Schema", "list", err))

if len(blockSchemas) == 0 {
return nil, fmt.Errorf("no block schemas found for %s block type slug", plan.TypeSlug.ValueString())
}
return
}

return blockSchemas, nil
},
utils.DefaultRetryOptions...,
)
if err != nil {
if len(blockSchemas) == 0 {
resp.Diagnostics.AddError(
"Failed to fetch block schemas",
fmt.Sprintf("Failed to fetch block schemas for %s block type slug due to: %s", plan.TypeSlug.ValueString(), err.Error()),
"No block schemas found",
fmt.Sprintf("No block schemas found for %s block type slug", plan.TypeSlug.ValueString()),
)

return
Expand Down
18 changes: 5 additions & 13 deletions internal/provider/resources/variable.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"strings"

"github.com/avast/retry-go/v4"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-framework/attr"
"github.com/hashicorp/terraform-plugin-framework/diag"
Expand All @@ -20,7 +19,6 @@ import (
"github.com/prefecthq/terraform-provider-prefect/internal/api"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/customtypes"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/helpers"
"github.com/prefecthq/terraform-provider-prefect/internal/utils"
)

var (
Expand Down Expand Up @@ -177,17 +175,11 @@ func (r *VariableResource) Create(ctx context.Context, req resource.CreateReques
return
}

variable, err := retry.DoWithData(
func() (*api.Variable, error) {
return client.Create(ctx, api.VariableCreate{
Name: plan.Name.ValueString(),
Value: plan.Value.ValueString(),
Tags: tags,
})
},
utils.DefaultRetryOptions...,
)

variable, err := client.Create(ctx, api.VariableCreate{
Name: plan.Name.ValueString(),
Value: plan.Value.ValueString(),
Tags: tags,
})
if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Variable", "create", err))

Expand Down
24 changes: 6 additions & 18 deletions internal/provider/resources/workspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"strings"

"github.com/avast/retry-go/v4"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-framework/diag"
"github.com/hashicorp/terraform-plugin-framework/path"
Expand All @@ -18,7 +17,6 @@ import (
"github.com/prefecthq/terraform-provider-prefect/internal/api"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/customtypes"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/helpers"
"github.com/prefecthq/terraform-provider-prefect/internal/utils"
)

var (
Expand Down Expand Up @@ -156,24 +154,14 @@ func (r *WorkspaceResource) Create(ctx context.Context, req resource.CreateReque
return
}

// In certain scenarios, such as in tests when multiple Workspaces are created at the same time
// for test isolation, Prefect will return a "503 Service Unavailable" error.
// To mitigate this, we will retry the Workspace creation.
// See https://github.com/PrefectHQ/terraform-provider-prefect/issues/241 for more information.
workspace, err := retry.DoWithData(
func() (*api.Workspace, error) {
return client.Create(
ctx,
api.WorkspaceCreate{
Name: plan.Name.ValueString(),
Handle: plan.Handle.ValueString(),
Description: plan.Description.ValueStringPointer(),
},
)
workspace, err := client.Create(
ctx,
api.WorkspaceCreate{
Name: plan.Name.ValueString(),
Handle: plan.Handle.ValueString(),
Description: plan.Description.ValueStringPointer(),
},
utils.DefaultRetryOptions...,
)

if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Workspace", "create", err))

Expand Down
15 changes: 0 additions & 15 deletions internal/utils/retry.go

This file was deleted.

Loading