Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: operator retries and error logging #511

Merged
merged 23 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
630 changes: 262 additions & 368 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/istio/common/manifests/pepr-istio-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Have to pre-create the namespace and also patch it with the istio-injection label later because
# Helm is kind of dumb: https://github.com/helm/helm/issues/350
kind: Namespace
apiVersion: v1
metadata:
name: pepr-system
labels:
Expand Down
130 changes: 79 additions & 51 deletions src/pepr/operator/controllers/keycloak/client-sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@ import { Sso, UDSPackage } from "../../crd";
import { getOwnerRef } from "../utils";
import { Client } from "./types";

const apiURL =
let apiURL =
"http://keycloak-http.keycloak.svc.cluster.local:8080/realms/uds/clients-registrations/default";
const samlDescriptorUrl =
"http://keycloak-http.keycloak.svc.cluster.local:8080/realms/uds/protocol/saml/descriptor";

// Support dev mode with port-forwarded keycloak svc
if (process.env.PEPR_MODE === "dev") {
apiURL = "http://localhost:8080/realms/uds/clients-registrations/default";
}

// Template regex to match clientField() references, see https://regex101.com/r/e41Dsk/3 for details
const secretTemplateRegex = new RegExp(
'clientField\\(([a-zA-Z]+)\\)(?:\\["?([\\w]+)"?\\]|(\\.json\\(\\)))?',
Expand Down Expand Up @@ -78,70 +83,88 @@ async function syncClient(
isRetry = false,
) {
Log.debug(pkg.metadata, `Processing client request: ${clientReq.clientId}`);
// Not including the CR data in the ref because Keycloak client IDs must be unique already
const name = `sso-client-${clientReq.clientId}`;
let client: Client;
handleClientGroups(clientReq);

try {
// Not including the CR data in the ref because Keycloak client IDs must be unique already
const name = `sso-client-${clientReq.clientId}`;
const token = Store.getItem(name);

let client: Client;
// Get keycloak client token from the store if this is an existing client
const token = Store.getItem(name);

handleClientGroups(clientReq);

// If an existing client is found, update it
try {
// If an existing client is found, use the token to update the client
if (token && !isRetry) {
mjnagel marked this conversation as resolved.
Show resolved Hide resolved
Log.debug(pkg.metadata, `Found existing token for ${clientReq.clientId}`);
client = await apiCall(clientReq, "PUT", token);
} else {
Log.debug(pkg.metadata, `Creating new client for ${clientReq.clientId}`);
client = await apiCall(clientReq);
}
} catch (err) {
const msg =
`Failed to process Keycloak request for client '${clientReq.clientId}', package ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}. Error: ${err.message}`;

// Throw the error if this is the retry or was an initial client creation attempt
if (isRetry || !token) {
Log.error(`${msg}, retry failed.`);
// Throw the original error captured from the first attempt
throw new Error(msg);
} else {
// Retry the request without the token in case we have a bad token stored
Log.error(msg);

try {
return await syncClient(clientReq, pkg, true);
} catch (retryErr) {
// If the retry fails, log the retry error and throw the original error
const retryMsg =
`Retry of Keycloak request failed for client '${clientReq.clientId}', package ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}. Error: ${retryErr.message}`;
Log.error(retryMsg);
// Throw the error from the original attempt since our retry without token failed
throw new Error(msg);
}
}
}

// Write the new token to the store
// Write the new token to the store
try {
await Store.setItemAndWait(name, client.registrationAccessToken!);
} catch (err) {
throw Error(
`Failed to set token in store for client '${clientReq.clientId}', package ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}`,
);
}

// Remove the registrationAccessToken from the client object to avoid problems (one-time use token)
delete client.registrationAccessToken;
// Remove the registrationAccessToken from the client object to avoid problems (one-time use token)
delete client.registrationAccessToken;

if (clientReq.protocol === "saml") {
client.samlIdpCertificate = await getSamlCertificate();
}
if (clientReq.protocol === "saml") {
client.samlIdpCertificate = await getSamlCertificate();
}

// Create or update the client secret
await K8s(kind.Secret).Apply({
metadata: {
namespace: pkg.metadata!.namespace,
// Use the CR secret name if provided, otherwise use the client name
name: secretName || name,
labels: {
"uds/package": pkg.metadata!.name,
},
// Use the CR as the owner ref for each VirtualService
ownerReferences: getOwnerRef(pkg),
// Create or update the client secret
await K8s(kind.Secret).Apply({
metadata: {
namespace: pkg.metadata!.namespace,
// Use the CR secret name if provided, otherwise use the client name
name: secretName || name,
labels: {
"uds/package": pkg.metadata!.name,
},
data: generateSecretData(client, secretTemplate),
});

if (isAuthSvcClient) {
// Do things here
}

return name;
} catch (err) {
const msg =
`Failed to process client request '${clientReq.clientId}' for ` +
`${pkg.metadata?.namespace}/${pkg.metadata?.name}. This can occur if a client already exists with the same ID that Pepr isn't tracking.`;
Log.error({ err }, msg);

if (isRetry) {
Log.error(`${msg}, retry failed, aborting`);
throw new Error(`${msg}. RETRY FAILED, aborting: ${JSON.stringify(err)}`);
}

// Retry the request
Log.warn(`${msg}, retrying`);
return syncClient(clientReq, pkg, true);
// Use the CR as the owner ref for each VirtualService
ownerReferences: getOwnerRef(pkg),
},
data: generateSecretData(client, secretTemplate),
});

if (isAuthSvcClient) {
// Do things here
}

return name;
}

/**
Expand Down Expand Up @@ -183,7 +206,8 @@ async function apiCall(sso: Partial<Sso>, method = "POST", authToken = "") {
// When not creating a new client, add the client ID and registrationAccessToken
if (authToken) {
req.headers.Authorization = `Bearer ${authToken}`;
url += `/${sso.clientId}`;
// Ensure that we URI encode the clientId in the request URL
url += `/${encodeURIComponent(sso.clientId!)}`;
}

// Remove the body for DELETE requests
Expand All @@ -195,7 +219,11 @@ async function apiCall(sso: Partial<Sso>, method = "POST", authToken = "") {
const resp = await fetch<Client>(url, req);

if (!resp.ok) {
throw new Error(`Failed to ${method} client: ${resp.statusText}`);
if (resp.data) {
throw new Error(`${JSON.stringify(resp.statusText)}, ${JSON.stringify(resp.data)}`);
} else {
throw new Error(`${JSON.stringify(resp.statusText)}`);
}
}

return resp.data;
Expand Down
3 changes: 2 additions & 1 deletion src/pepr/operator/crd/generated/package-v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ export enum ClientAuthenticatorType {
*/
export interface Groups {
/**
* List of group allowed to access to client
* List of groups allowed to access to client
*/
anyOf?: string[];
}
Expand All @@ -562,6 +562,7 @@ export enum Phase {
Failed = "Failed",
Pending = "Pending",
Ready = "Ready",
Retrying = "Retrying",
}

RegisterKind(Package, {
Expand Down
2 changes: 1 addition & 1 deletion src/pepr/operator/crd/sources/package/v1alpha1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ export const v1alpha1: V1CustomResourceDefinitionVersion = {
type: "integer",
},
phase: {
enum: ["Pending", "Ready", "Failed"],
enum: ["Pending", "Ready", "Failed", "Retrying"],
type: "string",
},
ssoClients: {
Expand Down
6 changes: 6 additions & 0 deletions src/pepr/operator/crd/validators/package-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ export async function validator(req: PeprValidateRequest<UDSPackage>) {
return req.Deny(`The client ID "${client.clientId}" is not unique`);
}
clientIDs.add(client.clientId);
// Don't allow illegal k8s resource names for the secret name
if (client.secretName && client.secretName !== sanitizeResourceName(client.secretName)) {
return req.Deny(
`The client ID "${client.clientId}" uses an invalid secret name ${client.secretName}`,
);
}
}

return req.Approve();
Expand Down
2 changes: 2 additions & 0 deletions src/pepr/operator/reconcilers/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ describe("handleFailure", () => {
expect(PatchStatus).toHaveBeenCalledWith({
metadata: { namespace: "default", name: "test" },
status: {
phase: Phase.Retrying,
retryAttempt: 1,
},
});
Expand Down Expand Up @@ -224,6 +225,7 @@ describe("handleFailure", () => {
status: {
observedGeneration: 1,
phase: Phase.Failed,
retryAttempt: 0,
},
});
});
Expand Down
20 changes: 17 additions & 3 deletions src/pepr/operator/reconcilers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export const uidSeen = new Set<string>();
* @returns true if the CRD is pending or the current generation has been processed
*/
export function shouldSkip(cr: UDSPackage) {
const isRetrying = cr.status?.phase === Phase.Retrying;
const isPending = cr.status?.phase === Phase.Pending;
const isCurrentGeneration = cr.metadata?.generation === cr.status?.observedGeneration;

Expand All @@ -22,6 +23,12 @@ export function shouldSkip(cr: UDSPackage) {
return false;
}

// If the CR is retrying, it should not be skipped
if (isRetrying) {
Log.debug(cr, `Should skip? No, retrying`);
return false;
}

// This is the second time the CR has been seen, so check if it is pending or the current generation
if (isPending || isCurrentGeneration) {
Log.debug(cr, `Should skip? Yes, pending or current generation and not first time seen`);
Expand Down Expand Up @@ -50,6 +57,9 @@ export async function updateStatus(cr: UDSPackage, status: PkgStatus) {
},
status,
});

// Track the UID of the CRD to know if it has been seen before
uidSeen.add(cr.metadata!.uid!);
}

/**
Expand Down Expand Up @@ -95,19 +105,22 @@ export async function handleFailure(err: { status: number; message: string }, cr
const identifier = `${metadata.namespace}/${metadata.name}`;
let status: Status;

// todo: identify exact 404 we are targetting, possibly in `updateStatus`
// todo: identify exact 404 we are targeting, possibly in `updateStatus`
if (err.status === 404) {
Log.warn({ err }, `Package metadata seems to have been deleted`);
return;
}

const retryAttempt = cr.status?.retryAttempt || 0;

if (retryAttempt < 5) {
// retryAttempt starts at 0, we perform 4 retries, 5 total attempts
if (retryAttempt < 4) {
const currRetry = retryAttempt + 1;

Log.error({ err }, `Reconciliation attempt ${currRetry} failed for ${identifier}, retrying...`);

status = {
phase: Phase.Retrying,
retryAttempt: currRetry,
};
} else {
Expand All @@ -116,11 +129,12 @@ export async function handleFailure(err: { status: number; message: string }, cr
status = {
phase: Phase.Failed,
observedGeneration: metadata.generation,
retryAttempt: 0, // todo: make this nullable when kfc generates the type
};
}

// Write an event for the error
void writeEvent(cr, { message: err.message });
await writeEvent(cr, { message: err.message });

// Update the status of the package with the error
updateStatus(cr, status).catch(finalErr => {
Expand Down
5 changes: 1 addition & 4 deletions src/pepr/operator/reconcilers/package-reconciler.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Log } from "pepr";

import { handleFailure, shouldSkip, uidSeen, updateStatus } from ".";
import { handleFailure, shouldSkip, updateStatus } from ".";
import { UDSConfig } from "../../config";
import { enableInjection } from "../controllers/istio/injection";
import { istioResources } from "../controllers/istio/istio-resources";
Expand Down Expand Up @@ -64,9 +64,6 @@ export async function packageReconciler(pkg: UDSPackage) {
observedGeneration: metadata.generation,
retryAttempt: 0, // todo: make this nullable when kfc generates the type
});

// Update to indicate this version of pepr-core has reconciled the package successfully once
uidSeen.add(pkg.metadata!.uid!);
} catch (err) {
void handleFailure(err, pkg);
}
Expand Down