Skip to content

Commit

Permalink
Attempt to resolve flaky test TestAcmeBasicWorkflow test (#20960)
Browse files Browse the repository at this point in the history
* Attempt to resolve flaky test TestAcmeBasicWorkflow test

 - Since we can't control the challenge engine, flush the validation records it leverages prior to manually updating the authorization/challenge statuses

 ```
     path_acme_test.go:261: csr: &{[] [] [] [] 0 [] 0 0 <nil> CN=*.localdomain [] [] [] [localhost.localdomain *.localdomain] [] [] []}
     path_acme_test.go:300:
         	Error Trace:	/home/runner/actions-runner/_work/vault-enterprise/vault-enterprise/builtin/logical/pki/path_acme_test.go:300
         	Error:      	Received unexpected error:
         	            	403 urn:ietf:params:acme:error:orderNotReady: The request attempted to finalize an order that is not ready to be finalized: order is status pending, needs to be in ready state
         	Test:       	TestAcmeBasicWorkflow/role
         	Messages:   	failed finalizing order
 ```

* make fmt
  • Loading branch information
stevendpclark authored Jun 2, 2023
1 parent 0115b5e commit 3dbdee5
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 91 deletions.
20 changes: 10 additions & 10 deletions builtin/logical/pki/acme_billing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,42 +67,42 @@ func TestACMEBilling(t *testing.T) {
expectedCount := validateClientCount(t, client, "", -1, "initial fetch")

// Unique identifier: should increase by one.
doACMEForDomainWithDNS(t, dns, &acmeClientPKI, []string{"dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI, []string{"dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "pki", expectedCount+1, "new certificate")

// Different identifier; should increase by one.
doACMEForDomainWithDNS(t, dns, &acmeClientPKI, []string{"example.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI, []string{"example.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "pki", expectedCount+1, "new certificate")

// While same identifiers, used together and so thus are unique; increase by one.
doACMEForDomainWithDNS(t, dns, &acmeClientPKI, []string{"example.dadgarcorp.com", "dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI, []string{"example.dadgarcorp.com", "dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "pki", expectedCount+1, "new certificate")

// Same identifiers in different order are not unique; keep the same.
doACMEForDomainWithDNS(t, dns, &acmeClientPKI, []string{"dadgarcorp.com", "example.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI, []string{"dadgarcorp.com", "example.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "pki", expectedCount, "different order; same identifiers")

// Using a different mount shouldn't affect counts.
doACMEForDomainWithDNS(t, dns, &acmeClientPKI2, []string{"dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI2, []string{"dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "", expectedCount, "different mount; same identifiers")

// But using a different identifier should.
doACMEForDomainWithDNS(t, dns, &acmeClientPKI2, []string{"pki2.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI2, []string{"pki2.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "pki2", expectedCount+1, "different mount with different identifiers")

// A new identifier in a unique namespace will affect results.
doACMEForDomainWithDNS(t, dns, &acmeClientPKINS1, []string{"unique.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKINS1, []string{"unique.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "ns1/pki", expectedCount+1, "unique identifier in a namespace")

// But in a different namespace with the existing identifier will not.
doACMEForDomainWithDNS(t, dns, &acmeClientPKINS2, []string{"unique.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKINS2, []string{"unique.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "", expectedCount, "existing identifier in a namespace")
doACMEForDomainWithDNS(t, dns, &acmeClientPKI2, []string{"unique.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKI2, []string{"unique.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "", expectedCount, "existing identifier outside of a namespace")

// Creating a unique identifier in a namespace with a mount with the
// same name as another namespace should increase counts as well.
doACMEForDomainWithDNS(t, dns, &acmeClientPKINS2, []string{"very-unique.dadgarcorp.com"})
doACMEForDomainWithDNS(t, dns, acmeClientPKINS2, []string{"very-unique.dadgarcorp.com"})
expectedCount = validateClientCount(t, client, "ns2/pki", expectedCount+1, "unique identifier in a different namespace")

// Check the current fragment
Expand Down
166 changes: 85 additions & 81 deletions builtin/logical/pki/path_acme_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,33 +225,7 @@ func TestAcmeBasicWorkflow(t *testing.T) {

// HACK: Update authorization/challenge to completed as we can't really do it properly in this workflow
// test.
pkiMount := findStorageMountUuid(t, client, "pki")
accountId := acct.URI[strings.LastIndex(acct.URI, "/"):]
for _, authURI := range getOrder.AuthzURLs {
authId := authURI[strings.LastIndex(authURI, "/"):]

rawPath := path.Join("/sys/raw/logical/", pkiMount, getAuthorizationPath(accountId, authId))
resp, err := client.Logical().ReadWithContext(testCtx, rawPath)
require.NoError(t, err, "failed looking up authorization storage")
require.NotNil(t, resp, "sys raw response was nil")
require.NotEmpty(t, resp.Data["value"], "no value field in sys raw response")

var authz ACMEAuthorization
err = jsonutil.DecodeJSON([]byte(resp.Data["value"].(string)), &authz)
require.NoError(t, err, "error decoding authorization: %w", err)
authz.Status = ACMEAuthorizationValid
for _, challenge := range authz.Challenges {
challenge.Status = ACMEChallengeValid
}

encodeJSON, err := jsonutil.EncodeJSON(authz)
require.NoError(t, err, "failed encoding authz json")
_, err = client.Logical().WriteWithContext(testCtx, rawPath, map[string]interface{}{
"value": base64.StdEncoding.EncodeToString(encodeJSON),
"encoding": "base64",
})
require.NoError(t, err, "failed writing authorization storage")
}
markAuthorizationSuccess(t, client, acmeClient, acct, getOrder)

// Make sure sending a CSR with the account key gets rejected.
goodCr := &x509.CertificateRequest{
Expand Down Expand Up @@ -738,33 +712,7 @@ func TestAcmeTruncatesToIssuerExpiry(t *testing.T) {

// HACK: Update authorization/challenge to completed as we can't really do it properly in this workflow
// test.
pkiMount := findStorageMountUuid(t, client, "pki")
accountId := acct.URI[strings.LastIndex(acct.URI, "/"):]
for _, authURI := range order.AuthzURLs {
authId := authURI[strings.LastIndex(authURI, "/"):]

rawPath := path.Join("/sys/raw/logical/", pkiMount, getAuthorizationPath(accountId, authId))
resp, err := client.Logical().ReadWithContext(testCtx, rawPath)
require.NoError(t, err, "failed looking up authorization storage")
require.NotNil(t, resp, "sys raw response was nil")
require.NotEmpty(t, resp.Data["value"], "no value field in sys raw response")

var authz ACMEAuthorization
err = jsonutil.DecodeJSON([]byte(resp.Data["value"].(string)), &authz)
require.NoError(t, err, "error decoding authorization: %w", err)
authz.Status = ACMEAuthorizationValid
for _, challenge := range authz.Challenges {
challenge.Status = ACMEChallengeValid
}

encodeJSON, err := jsonutil.EncodeJSON(authz)
require.NoError(t, err, "failed encoding authz json")
_, err = client.Logical().WriteWithContext(testCtx, rawPath, map[string]interface{}{
"value": base64.StdEncoding.EncodeToString(encodeJSON),
"encoding": "base64",
})
require.NoError(t, err, "failed writing authorization storage")
}
markAuthorizationSuccess(t, client, acmeClient, acct, order)

// Build a proper CSR, with the correct name and signed with a different key works.
goodCr := &x509.CertificateRequest{DNSNames: []string{identifiers[0]}}
Expand Down Expand Up @@ -834,7 +782,7 @@ func TestAcmeIgnoresRoleExtKeyUsage(t *testing.T) {
require.NoError(t, err, "failed creating order")

// HACK: Update authorization/challenge to completed as we can't really do it properly in this workflow test.
markAuthorizationSuccess(t, client, acct, order)
markAuthorizationSuccess(t, client, acmeClient, acct, order)

// Build a proper CSR, with the correct name and signed with a different key works.
goodCr := &x509.CertificateRequest{DNSNames: []string{identifiers[0]}}
Expand All @@ -854,38 +802,94 @@ func TestAcmeIgnoresRoleExtKeyUsage(t *testing.T) {
"mismatch of ExtKeyUsage flags")
}

func markAuthorizationSuccess(t *testing.T, client *api.Client, acct *acme.Account, order *acme.Order) {
func markAuthorizationSuccess(t *testing.T, client *api.Client, acmeClient *acme.Client, acct *acme.Account,
order *acme.Order,
) {
testCtx := context.Background()

pkiMount := findStorageMountUuid(t, client, "pki")
accountId := acct.URI[strings.LastIndex(acct.URI, "/"):]
for _, authURI := range order.AuthzURLs {
authId := authURI[strings.LastIndex(authURI, "/"):]

rawPath := path.Join("/sys/raw/logical/", pkiMount, getAuthorizationPath(accountId, authId))
resp, err := client.Logical().ReadWithContext(testCtx, rawPath)
require.NoError(t, err, "failed looking up authorization storage")
require.NotNil(t, resp, "sys raw response was nil")
require.NotEmpty(t, resp.Data["value"], "no value field in sys raw response")

var authz ACMEAuthorization
err = jsonutil.DecodeJSON([]byte(resp.Data["value"].(string)), &authz)
require.NoError(t, err, "error decoding authorization: %w", err)
authz.Status = ACMEAuthorizationValid
for _, challenge := range authz.Challenges {
challenge.Status = ACMEChallengeValid

// Delete any and all challenge validation entries to stop the engine from overwriting our hack here
i := 0
for {
deleteCvEntries(t, client, pkiMount)

accountId := acct.URI[strings.LastIndex(acct.URI, "/"):]
for _, authURI := range order.AuthzURLs {
authId := authURI[strings.LastIndex(authURI, "/"):]

rawPath := path.Join("/sys/raw/logical/", pkiMount, getAuthorizationPath(accountId, authId))
resp, err := client.Logical().ReadWithContext(testCtx, rawPath)
require.NoError(t, err, "failed looking up authorization storage")
require.NotNil(t, resp, "sys raw response was nil")
require.NotEmpty(t, resp.Data["value"], "no value field in sys raw response")

var authz ACMEAuthorization
err = jsonutil.DecodeJSON([]byte(resp.Data["value"].(string)), &authz)
require.NoError(t, err, "error decoding authorization: %w", err)
authz.Status = ACMEAuthorizationValid
for _, challenge := range authz.Challenges {
challenge.Status = ACMEChallengeValid
}

encodeJSON, err := jsonutil.EncodeJSON(authz)
require.NoError(t, err, "failed encoding authz json")
_, err = client.Logical().WriteWithContext(testCtx, rawPath, map[string]interface{}{
"value": base64.StdEncoding.EncodeToString(encodeJSON),
"encoding": "base64",
})
require.NoError(t, err, "failed writing authorization storage")
}

encodeJSON, err := jsonutil.EncodeJSON(authz)
require.NoError(t, err, "failed encoding authz json")
_, err = client.Logical().WriteWithContext(testCtx, rawPath, map[string]interface{}{
"value": base64.StdEncoding.EncodeToString(encodeJSON),
"encoding": "base64",
})
require.NoError(t, err, "failed writing authorization storage")
// Give some time
time.Sleep(200 * time.Millisecond)

// Check to see if we have fixed up the status and no new entries have appeared.
if !deleteCvEntries(t, client, pkiMount) {
// No entries found
// Look to see if we raced against the engine
orderLookup, err := acmeClient.GetOrder(testCtx, order.URI)
require.NoError(t, err, "failed loading order status after manually ")

if orderLookup.Status == string(ACMEOrderReady) {
// Our order seems to be in the proper status, should be safe-ish to go ahead now
break
} else {
t.Logf("order status was not ready, retrying")
}
} else {
t.Logf("new challenge entries appeared after deletion, retrying")
}

if i > 5 {
t.Fatalf("We are constantly deleting cv entries or order status is not changing, something is wrong")
}

i++
}
}

func deleteCvEntries(t *testing.T, client *api.Client, pkiMount string) bool {
testCtx := context.Background()

cvPath := path.Join("/sys/raw/logical/", pkiMount, acmeValidationPrefix)
resp, err := client.Logical().ListWithContext(testCtx, cvPath)
require.NoError(t, err, "failed listing cv path items")

deletedEntries := false
if resp != nil {
cvEntries := resp.Data["keys"].([]interface{})
for _, cvEntry := range cvEntries {
cvEntryPath := path.Join(cvPath, cvEntry.(string))
_, err = client.Logical().DeleteWithContext(testCtx, cvEntryPath)
require.NoError(t, err, "failed to delete cv entry")
deletedEntries = true
}
}

return deletedEntries
}

func setupAcmeBackend(t *testing.T) (*vault.TestCluster, *api.Client, string) {
cluster, client := setupTestPkiCluster(t)

Expand Down Expand Up @@ -1161,7 +1165,7 @@ func setupTestPkiCluster(t *testing.T) (*vault.TestCluster, *api.Client) {
return cluster, client
}

func getAcmeClientForCluster(t *testing.T, cluster *vault.TestCluster, baseUrl string, key crypto.Signer) acme.Client {
func getAcmeClientForCluster(t *testing.T, cluster *vault.TestCluster, baseUrl string, key crypto.Signer) *acme.Client {
coreAddr := cluster.Cores[0].Listeners[0].Address
tlsConfig := cluster.Cores[0].TLSConfig()

Expand All @@ -1178,7 +1182,7 @@ func getAcmeClientForCluster(t *testing.T, cluster *vault.TestCluster, baseUrl s
baseUrl = "v1/" + baseUrl
}
baseAcmeURL := fmt.Sprintf("https://%s/%s", coreAddr.String(), baseUrl)
return acme.Client{
return &acme.Client{
Key: key,
HTTPClient: httpClient,
DirectoryURL: baseAcmeURL + "directory",
Expand Down

0 comments on commit 3dbdee5

Please sign in to comment.