From 7d50a8cca8de3dff58c086135c3ecb31240a5619 Mon Sep 17 00:00:00 2001 From: Dustin Popp Date: Tue, 7 May 2024 15:00:25 -0500 Subject: [PATCH] fix(errors): flatten sdk problem chains to reduce hash complexity Previously, any SDKProblem instance could be used as the "caused by" for another SDKProblem instance and its ID would become a part of the hash. This meant that problems created between service SDKs and the Go SDK Core would have a lot of complexity - the error scenarios tracked would be unnecessarily granular and numerous. This change updates the system to maintain one problem scenario per *context* (where a context is either HTTP, SDK, or Terraform) rather than per every component within a given context. It also provides for keeping knowledge of problem scenarios originating in the core for easier debugging and easier attaching of HTTP errors to service SDK problems. Signed-off-by: Dustin Popp --- core/base_service.go | 2 +- core/http_problem.go | 28 ++++++++++--- core/http_problem_test.go | 30 ++++++++++++++ core/sdk_problem.go | 59 +++++++++++++++++++++++++- core/sdk_problem_test.go | 87 ++++++++++++++++++++++++++++++++++++++- core/sdk_problem_utils.go | 16 +++++++ 6 files changed, 212 insertions(+), 10 deletions(-) diff --git a/core/base_service.go b/core/base_service.go index 1a6fe26..6a9f85a 100644 --- a/core/base_service.go +++ b/core/base_service.go @@ -389,7 +389,7 @@ func (service *BaseService) Request(req *http.Request, result interface{}) (deta var sdkErr *SDKProblem if errors.As(authenticateError, &authErr) { detailedResponse = authErr.Response - err = SDKErrorf(authErr.HTTPProblem, fmt.Sprintf(ERRORMSG_AUTHENTICATE_ERROR, authErr.Error()), "auth-failed", getComponentInfo()) + err = SDKErrorf(authErr.HTTPProblem, fmt.Sprintf(ERRORMSG_AUTHENTICATE_ERROR, authErr.Error()), "auth-request-failed", getComponentInfo()) } else if errors.As(authenticateError, &sdkErr) { sdkErr := RepurposeSDKProblem(authenticateError, "auth-failed") // For compatibility. diff --git a/core/http_problem.go b/core/http_problem.go index 338a554..c6e2ac7 100644 --- a/core/http_problem.go +++ b/core/http_problem.go @@ -150,16 +150,32 @@ func EnrichHTTPProblem(err error, operationID string, component *ProblemComponen // If the problem originated from an HTTP error response, populate the // HTTPProblem instance with details from the SDK that weren't available // in the core at problem creation time. - var httpProp *HTTPProblem - if errors.As(err, &httpProp) { - enrichHTTPProblem(httpProp, operationID, component) + var httpProb *HTTPProblem + + // In the case of an SDKProblem instance originating in the core, + // it will not track an HTTPProblem instance in its "caused by" + // chain, but we still want to be able to enrich it. It will be + // stored in the private "httpProblem" field. + var sdkProb *SDKProblem + + if errors.As(err, &httpProb) { + enrichHTTPProblem(httpProb, operationID, component) + } else if errors.As(err, &sdkProb) && sdkProb.httpProblem != nil { + enrichHTTPProblem(sdkProb.httpProblem, operationID, component) } } // enrichHTTPProblem takes an HTTPProblem instance alongside information about the request // and adds the extra info to the instance. It also loosely deserializes the response // in order to set additional information, like the error code. -func enrichHTTPProblem(httpProp *HTTPProblem, operationID string, component *ProblemComponent) { - httpProp.Component = component - httpProp.OperationID = operationID +func enrichHTTPProblem(httpProb *HTTPProblem, operationID string, component *ProblemComponent) { + // If this problem is already populated with service-level information, + // we should not enrich it any further. Most likely, this is an authentication + // error passed from the core to the SDK. + if httpProb.Component.Name != "" { + return + } + + httpProb.Component = component + httpProb.OperationID = operationID } diff --git a/core/http_problem_test.go b/core/http_problem_test.go index 8ffe016..767b33c 100644 --- a/core/http_problem_test.go +++ b/core/http_problem_test.go @@ -274,6 +274,23 @@ func TestPublicEnrichHTTPProblemWithinSDKProblem(t *testing.T) { assert.Equal(t, "delete_resource", httpProb.OperationID) } +func TestPublicEnrichHTTPProblemWithinCoreProblem(t *testing.T) { + httpProb := httpErrorf("Bad request", &DetailedResponse{}) + assert.Empty(t, httpProb.Component) + assert.Empty(t, httpProb.OperationID) + + sdkProb := SDKErrorf(httpProb, "Wrong!", "disc", getComponentInfo()) + assert.Nil(t, sdkProb.causedBy) + assert.NotNil(t, sdkProb.httpProblem) + + EnrichHTTPProblem(sdkProb, "delete_resource", NewProblemComponent("test", "v2")) + + assert.NotEmpty(t, httpProb.Component) + assert.Equal(t, "test", httpProb.Component.Name) + assert.Equal(t, "v2", httpProb.Component.Version) + assert.Equal(t, "delete_resource", httpProb.OperationID) +} + func TestPrivateEnrichHTTPProblem(t *testing.T) { httpProb := httpErrorf("Bad request", &DetailedResponse{}) assert.Empty(t, httpProb.Component) @@ -286,6 +303,19 @@ func TestPrivateEnrichHTTPProblem(t *testing.T) { assert.Equal(t, "delete_resource", httpProb.OperationID) } +func TestPrivateEnrichHTTPProblemWithPopulatedProblem(t *testing.T) { + httpProb := httpErrorf("Bad request", &DetailedResponse{}) + httpProb.Component = NewProblemComponent("some-api", "v3") + httpProb.OperationID = "get_resource" + assert.NotEmpty(t, httpProb.Component) + assert.NotEmpty(t, httpProb.OperationID) + + enrichHTTPProblem(httpProb, "delete_resource", NewProblemComponent("test", "v2")) + assert.Equal(t, "some-api", httpProb.Component.Name) + assert.Equal(t, "v3", httpProb.Component.Version) + assert.Equal(t, "get_resource", httpProb.OperationID) +} + func getPopulatedHTTPProblem() *HTTPProblem { return &HTTPProblem{ IBMProblem: &IBMProblem{ diff --git a/core/sdk_problem.go b/core/sdk_problem.go index 570ca6b..b4b6727 100644 --- a/core/sdk_problem.go +++ b/core/sdk_problem.go @@ -34,6 +34,19 @@ type SDKProblem struct { // function names, files, and line numbers invoked // leading up to the origination of the problem. stack []sdkStackFrame + + // If the problem instance originated in the core, we + // want to keep track of the information for debugging + // purposes (even though we aren't using the problem + // as a "caused by" problem). + coreProblem *sparseSDKProblem + + // If the problem instance originated in the core and + // was caused by an HTTP request, we don't use the + // HTTPProblem instance as a "caused by" but we need + // to store the instance to pass it to a downstream + // SDKProblem instance. + httpProblem *HTTPProblem } // GetConsoleMessage returns all public fields of @@ -84,6 +97,10 @@ func (e *SDKProblem) GetDebugOrderedMaps() *OrderedMaps { orderedMaps.Add("stack", e.stack) + if e.coreProblem != nil { + orderedMaps.Add("core_problem", e.coreProblem) + } + var orderableCausedBy OrderableProblem if errors.As(e.GetCausedBy(), &orderableCausedBy) { orderedMaps.Add("caused_by", orderableCausedBy.GetDebugOrderedMaps().GetMaps()) @@ -97,11 +114,49 @@ func SDKErrorf(err error, summary, discriminator string, component *ProblemCompo function := computeFunctionName(component.Name) stack := getStackInfo(component.Name) - return &SDKProblem{ - IBMProblem: IBMErrorf(err, component, summary, discriminator), + ibmProb := IBMErrorf(err, component, summary, discriminator) + newSDKProb := &SDKProblem{ + IBMProblem: ibmProb, Function: function, stack: stack, } + + // Flatten chains of SDKProblem instances in order to present a single, + // unique error scenario for the SDK context. Multiple Golang components + // can invoke each other, but we only want to track "caused by" problems + // through context boundaries (like API, SDK, Terraform, etc.). This + // eliminates unnecessary granularity of problem scenarios for the SDK + // context. If the problem originated in this library (the Go SDK Core), + // we still want to track that info for debugging purposes. + var sdkCausedBy *SDKProblem + if errors.As(err, &sdkCausedBy) { + // Not a "native" caused by but allows us to maintain compatibility through "Unwrap". + newSDKProb.nativeCausedBy = sdkCausedBy + newSDKProb.causedBy = nil + + if isCoreProblem(sdkCausedBy) { + newSDKProb.coreProblem = newSparseSDKProblem(sdkCausedBy) + + // If we stored an HTTPProblem instance in the core, we'll want to use + // it as the actual "caused by" problem for the new SDK problem. + if sdkCausedBy.httpProblem != nil { + newSDKProb.causedBy = sdkCausedBy.httpProblem + } + } + } + + // We can't use HTTPProblem instances as "caused by" problems for Go SDK Core + // problems because 1) it prevents us from enumerating hashes in the core and + // 2) core problems will almost never be the instances that users interact with + // and the HTTPProblem will need to be used as the "caused by" of the problems + // coming from actual service SDK libraries. + var httpCausedBy *HTTPProblem + if errors.As(err, &httpCausedBy) && isCoreProblem(newSDKProb) { + newSDKProb.httpProblem = httpCausedBy + newSDKProb.causedBy = nil + } + + return newSDKProb } // RepurposeSDKProblem provides a convenient way to take a problem from diff --git a/core/sdk_problem_test.go b/core/sdk_problem_test.go index ff1a4e7..7959adf 100644 --- a/core/sdk_problem_test.go +++ b/core/sdk_problem_test.go @@ -73,6 +73,27 @@ caused_by: assert.Equal(t, expected, message) } +func TestSDKProblemGetDebugMessageWithCoreProblem(t *testing.T) { + coreProb := SDKErrorf(nil, "", "", getComponentInfo()) + sdkProb := SDKErrorf(coreProb, "Wrong!", "disc", NewProblemComponent("a", "b")) + message := sdkProb.GetDebugMessage() + expected := `--- +id: sdk-1518356c +summary: Wrong! +severity: error +function: github.com/IBM/go-sdk-core/v5/core.TestSDKProblemGetDebugMessageWithCoreProblem +component: + name: a + version: b +stack: [] +core_problem: + id: sdk-fd790a1d + function: core.TestSDKProblemGetDebugMessageWithCoreProblem +--- +` + assert.Equal(t, expected, message) +} + func TestSDKProblemGetID(t *testing.T) { sdkProb := getPopulatedSDKProblem() assert.Equal(t, "sdk-32d4ac5e", sdkProb.GetID()) @@ -170,7 +191,7 @@ func TestSDKErrorf(t *testing.T) { assert.Equal(t, "github.com/IBM/go-sdk-core/v5/core.TestSDKErrorf", stack[0].Function) assert.Contains(t, stack[0].File, "core/sdk_problem_test.go") // This might be too fragile. If it becomes an issue, we can remove it. - assert.Equal(t, 158, stack[0].Line) + assert.Equal(t, 179, stack[0].Line) } func TestSDKErrorfNoCausedBy(t *testing.T) { @@ -217,6 +238,51 @@ func TestSDKErrorfNoSummary(t *testing.T) { assert.Contains(t, stack[0].File, "core/sdk_problem_test.go") } +func TestSDKErrorfDoesntUseSDKCausedBy(t *testing.T) { + sdkProb := getPopulatedSDKProblem(); + newSDKProb := SDKErrorf(sdkProb, "", "", NewProblemComponent("a", "b")) + assert.Nil(t, newSDKProb.causedBy) + assert.NotNil(t, newSDKProb.nativeCausedBy) + assert.Equal(t, sdkProb, newSDKProb.nativeCausedBy) + assert.Nil(t, newSDKProb.coreProblem) +} + +func TestSDKErrorfStoreCoreProblem(t *testing.T) { + coreProb := SDKErrorf(nil, "", "", getComponentInfo()) + sdkProb := SDKErrorf(coreProb, "", "", NewProblemComponent("a", "b")) + assert.Nil(t, sdkProb.causedBy) + assert.NotNil(t, sdkProb.nativeCausedBy) + assert.Equal(t, coreProb, sdkProb.nativeCausedBy) + assert.NotNil(t, sdkProb.coreProblem) + assert.Equal(t, coreProb.GetID(), sdkProb.coreProblem.ID) + assert.Equal(t, coreProb.Function, sdkProb.coreProblem.Function) +} + +func TestSDKErrorfHTTPCausedByNotSetForCoreProblem(t *testing.T) { + httpProb := getPopulatedHTTPProblem() + coreProb := SDKErrorf(httpProb, "", "", getComponentInfo()) + assert.Nil(t, coreProb.causedBy) + assert.Nil(t, coreProb.nativeCausedBy) + assert.NotNil(t, coreProb.httpProblem) + assert.Equal(t, httpProb, coreProb.httpProblem) +} + +func TestSDKErrorfGetHTTPCausedByFromCoreProblem(t *testing.T) { + httpProb := getPopulatedHTTPProblem() + + coreProb := SDKErrorf(httpProb, "", "", getComponentInfo()) + assert.Nil(t, coreProb.causedBy) + assert.NotNil(t, coreProb.httpProblem) + assert.Equal(t, httpProb, coreProb.httpProblem) + + sdkProb := SDKErrorf(coreProb, "", "", NewProblemComponent("a", "b")) + assert.NotNil(t, sdkProb.causedBy) + assert.Equal(t, httpProb, sdkProb.causedBy) + + assert.NotNil(t, sdkProb.nativeCausedBy) + assert.Equal(t, coreProb, sdkProb.nativeCausedBy) +} + func TestRepurposeSDKProblem(t *testing.T) { sdkProb := getPopulatedSDKProblem() assert.Equal(t, "some-issue", sdkProb.discriminator) @@ -254,6 +320,25 @@ func TestSDKProblemIsWithNative(t *testing.T) { assert.True(t, errors.Is(secondProb, context.Canceled)) } +func TestSDKProblemIsWithCoreProblem(t *testing.T) { + firstProb := SDKErrorf(nil, "msg", "disc", getComponentInfo()) + secondProb := SDKErrorf(firstProb, "", "other-disc", NewProblemComponent("a", "b")) + assert.False(t, errors.Is(firstProb, secondProb)) + assert.True(t, errors.Is(secondProb, firstProb)) +} + +func TestNewSparseSDKProblem(t *testing.T) { + sparse := newSparseSDKProblem(getPopulatedSDKProblem()) + assert.NotNil(t, sparse) + assert.Equal(t, "sdk-32d4ac5e", sparse.ID) + assert.Equal(t, "mysdk.(*MySdkV1).GetResource", sparse.Function) +} + +func TestIsCoreProblem(t *testing.T) { + assert.False(t, isCoreProblem(getPopulatedSDKProblem())) + assert.True(t, isCoreProblem(SDKErrorf(nil, "", "", getComponentInfo()))) +} + func getPopulatedSDKProblem() *SDKProblem { return &SDKProblem{ IBMProblem: &IBMProblem{ diff --git a/core/sdk_problem_utils.go b/core/sdk_problem_utils.go index 775260c..080a1bf 100644 --- a/core/sdk_problem_utils.go +++ b/core/sdk_problem_utils.go @@ -106,3 +106,19 @@ func formatFrames(pcs []uintptr, componentName string) []sdkStackFrame { return result } + +type sparseSDKProblem struct { + ID string + Function string +} + +func newSparseSDKProblem(prob *SDKProblem) *sparseSDKProblem { + return &sparseSDKProblem{ + ID: prob.GetID(), + Function: prob.Function, + } +} + +func isCoreProblem(prob *SDKProblem) bool { + return prob.Component != nil && prob.Component.Name == MODULE_NAME +}