Skip to content

Commit

Permalink
Include service URL and request method in r11s request errors (micros…
Browse files Browse the repository at this point in the history
…oft#22851)

Troubleshooting live-site issues and customer reported issues will
become easier. Ex: If Container creation failures have service URL and
response code, it'll be easier to identify point of failure and come-up
with a fix to mitigate/fix the incident.

This gives AFR a good idea about the APIs which are failing, which
cluster it was going to, and which cluster group it was going to. It
helps AFR correlate the errors on the server better.

## Examples

Here are some examples of request calls. Only the bolded parts can be
logged, and the first line of each entry below is just the type of
request being made (no need to log it):

**CreateDocument** (or **CreateContainer**)
**POST** https://**us.fluidrelay.azure.com/documents**/someId

**Connect**  
**GET**
https://**alfred.southcentralus-1.fluidrelay.azure.com/socket.io**/?documentId=someId&tenantId=someId&EIO=4&transport=websocket
 
**GetDelta**  
**GET**
https://**alfred.southcentralus-1.fluidrelay.azure.com/deltas**/someId/someId?from=0&to=2001

**GetSummary**  
**GET**
https://**historian.southcentralus-1.fluidrelay.azure.com/repos**/someId/git/summaries/latest?token=someToken&disableCache=true

**PostSummary**  
**POST**
https://**historian.southcentralus-1.fluidrelay.azure.com/repos**/someId/git/summaries?token=someToken

**FindSession** – **First Request**
**POST**
https://**alfred.southcentralus-1.fluidrelay.azure.com/documents**/someId/session/someId

Fixes
[AB#18452](https://dev.azure.com/fluidframework/235294da-091d-4c29-84fc-cdfc3d90890b/_workitems/edit/18452)
  • Loading branch information
kian-thompson authored Oct 29, 2024
1 parent 645b9ed commit 2f1175f
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 24 deletions.
24 changes: 16 additions & 8 deletions packages/drivers/driver-base/src/documentDeltaConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ import {
ISequencedDocumentMessage,
ISignalMessage,
} from "@fluidframework/driver-definitions/internal";
import { UsageError, createGenericNetworkError } from "@fluidframework/driver-utils/internal";
import {
UsageError,
createGenericNetworkError,
type DriverErrorTelemetryProps,
} from "@fluidframework/driver-utils/internal";
import {
ITelemetryLoggerExt,
EventEmitterWithErrorHandling,
Expand Down Expand Up @@ -789,13 +793,17 @@ export class DocumentDeltaConnection
return createGenericNetworkError(
`socket.io (${handler}): ${this.getErrorMessage(error)}`,
{ canRetry },
{
driverVersion,
details: JSON.stringify({
...this.getConnectionDetailsProps(),
}),
scenarioName: handler,
},
this.getAdditionalErrorProps(handler),
);
}

protected getAdditionalErrorProps(handler: string): DriverErrorTelemetryProps {
return {
driverVersion,
details: JSON.stringify({
...this.getConnectionDetailsProps(),
}),
scenarioName: handler,
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@ import {
IAnyDriverError,
IConnect,
} from "@fluidframework/driver-definitions/internal";
import type { DriverErrorTelemetryProps } from "@fluidframework/driver-utils/internal";
import { ITelemetryLoggerExt } from "@fluidframework/telemetry-utils/internal";
import type { Socket } from "socket.io-client";

import { IR11sSocketError, errorObjectFromSocketError } from "./errorUtils.js";
import {
IR11sSocketError,
errorObjectFromSocketError,
getUrlForTelemetry,
socketIoPath,
} from "./errorUtils.js";
import { pkgVersion as driverVersion } from "./packageVersion.js";
import { SocketIOClientStatic } from "./socketModule.js";

Expand Down Expand Up @@ -60,13 +67,24 @@ export class R11sDocumentDeltaConnection extends DocumentDeltaConnection {
socket,
id,
logger,
url,
enableLongPollingDowngrade,
);

await deltaConnection.initialize(connectMessage, timeoutMs);
return deltaConnection;
}

private constructor(
socket: Socket,
documentId: string,
logger: ITelemetryLoggerExt,
private readonly url: string,
enableLongPollingDowngrades?: boolean,
) {
super(socket, documentId, logger, enableLongPollingDowngrades);
}

/**
* Error raising for socket.io issues
*/
Expand All @@ -75,7 +93,18 @@ export class R11sDocumentDeltaConnection extends DocumentDeltaConnection {
// - a socketError: add it to the R11sError object for driver to be able to parse it and reason over it.
// - anything else: let base class handle it
return canRetry && Number.isInteger(error?.code) && typeof error?.message === "string"
? errorObjectFromSocketError(error as IR11sSocketError, handler)
? errorObjectFromSocketError(
error as IR11sSocketError,
handler,
this.getAdditionalErrorProps(handler),
)
: super.createErrorObject(handler, error, canRetry);
}

protected getAdditionalErrorProps(handler: string): DriverErrorTelemetryProps {
return {
...super.getAdditionalErrorProps(handler),
url: getUrlForTelemetry(this.url, socketIoPath),
};
}
}
39 changes: 35 additions & 4 deletions packages/drivers/routerlicious-driver/src/errorUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,11 @@ export function createR11sNetworkError(
errorMessage: string,
statusCode: number,
retryAfterMs?: number,
additionalProps?: DriverErrorTelemetryProps,
internalErrorCode?: string | number,
): IFluidErrorBase & R11sError {
let error: IFluidErrorBase & R11sError;
const props = { statusCode, driverVersion };
const props = { ...additionalProps, statusCode, driverVersion };
switch (statusCode) {
case 401:
// The first 401 is manually retried in RouterliciousRestWrapper with a refreshed token,
Expand Down Expand Up @@ -148,10 +149,9 @@ export function throwR11sNetworkError(
errorMessage: string,
statusCode: number,
retryAfterMs?: number,
additionalProps?: DriverErrorTelemetryProps,
): never {
const networkError = createR11sNetworkError(errorMessage, statusCode, retryAfterMs);

throw networkError;
throw createR11sNetworkError(errorMessage, statusCode, retryAfterMs, additionalProps);
}

/**
Expand All @@ -160,13 +160,15 @@ export function throwR11sNetworkError(
export function errorObjectFromSocketError(
socketError: IR11sSocketError,
handler: string,
additionalProps?: DriverErrorTelemetryProps,
): R11sError {
// pre-0.58 error message prefix: R11sSocketError
const message = `R11s socket error (${handler}): ${socketError.message}`;
const error = createR11sNetworkError(
message,
socketError.code,
socketError.retryAfterMs,
additionalProps,
socketError.internalErrorCode,
);
error.addTelemetryProperties({
Expand All @@ -176,3 +178,32 @@ export function errorObjectFromSocketError(
});
return error;
}

/** Simulate the pathname for socket connection */
export const socketIoPath = "socket.io";

/**
* Get a stripped version of a URL safe for r11s telemetry
* @returns undefined if no appropriate hostName is provided
*/
export function getUrlForTelemetry(hostName: string, path?: string): string | undefined {
// Strip off "http://" or "https://"
const hostNameMatch = hostName.match(/^(?:https?:\/\/)?([^/]+)/);
if (!hostNameMatch) {
return undefined;
}
const strippedHostName = hostNameMatch[1];

let extractedPath: string | undefined;
if (path !== undefined) {
// Extract the first portion of the path and explicitly match it to known path names
const pathMatch = path.match(/^\/?([^/]+)/);
if (pathMatch && [socketIoPath, "repos", "deltas", "documents"].includes(pathMatch[1])) {
extractedPath = pathMatch[1];
}
}

return extractedPath !== undefined
? `${strippedHostName}/${extractedPath}`
: strippedHostName;
}
39 changes: 29 additions & 10 deletions packages/drivers/routerlicious-driver/src/restWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ import fetch from "cross-fetch";
import safeStringify from "json-stringify-safe";

import type { AxiosRequestConfig, RawAxiosRequestHeaders } from "./axios.cjs";
import { RouterliciousErrorTypes, throwR11sNetworkError } from "./errorUtils.js";
import {
getUrlForTelemetry,
RouterliciousErrorTypes,
throwR11sNetworkError,
} from "./errorUtils.js";
import { pkgVersion as driverVersion } from "./packageVersion.js";
import { addOrUpdateQueryParams, type QueryStringType } from "./queryStringUtils.js";
import { RestWrapper } from "./restWrapperBase.js";
Expand Down Expand Up @@ -174,6 +178,13 @@ class RouterliciousRestWrapper extends RestWrapper {
// on failure, add the request entry into the retryCounter map to count the subsequent retries, if any
this.retryCounter.set(requestKey, requestRetryCount ? requestRetryCount + 1 : 1);

const telemetryProps = {
driverVersion,
retryCount: requestRetryCount,
url: getUrlForTelemetry(completeRequestUrl.hostname, completeRequestUrl.pathname),
requestMethod: fetchRequestConfig.method,
};

// Browser Fetch throws a TypeError on network error, `node-fetch` throws a FetchError
const isNetworkError = ["TypeError", "FetchError"].includes(error?.name);
const errorMessage = isNetworkError
Expand All @@ -185,14 +196,16 @@ class RouterliciousRestWrapper extends RestWrapper {
// If there exists a self-signed SSL certificates error, throw a NonRetryableError
// TODO: instead of relying on string matching, filter error based on the error code like we do for websocket connections
const err = errorMessage.includes("failed, reason: self signed certificate")
? new NonRetryableError(errorMessage, RouterliciousErrorTypes.sslCertError, {
driverVersion,
retryCount: requestRetryCount,
})
: new GenericNetworkError(errorMessage, errorMessage.startsWith("NetworkError"), {
driverVersion,
retryCount: requestRetryCount,
});
? new NonRetryableError(
errorMessage,
RouterliciousErrorTypes.sslCertError,
telemetryProps,
)
: new GenericNetworkError(
errorMessage,
errorMessage.startsWith("NetworkError"),
telemetryProps,
);
throw err;
},
);
Expand All @@ -203,6 +216,7 @@ class RouterliciousRestWrapper extends RestWrapper {
});

const response = res.response;
const headers = headersToMap(response.headers);

let start = performance.now();
const text = await response.text();
Expand All @@ -222,7 +236,6 @@ class RouterliciousRestWrapper extends RestWrapper {
// on successful response, remove the entry from the retryCounter map
this.retryCounter.delete(requestKey);
const result = responseBody as T;
const headers = headersToMap(response.headers);
return {
content: result,
headers,
Expand Down Expand Up @@ -268,6 +281,12 @@ class RouterliciousRestWrapper extends RestWrapper {
`R11s fetch error: ${responseSummary}`,
response.status,
responseBody?.retryAfter,
{
...getPropsToLogFromResponse(headers),
driverVersion,
url: getUrlForTelemetry(completeRequestUrl.hostname, completeRequestUrl.pathname),
requestMethod: fetchRequestConfig.method,
},
);
}

Expand Down
29 changes: 29 additions & 0 deletions packages/drivers/routerlicious-driver/src/test/errorUtils.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import {
RouterliciousErrorTypes,
createR11sNetworkError,
errorObjectFromSocketError,
getUrlForTelemetry,
socketIoPath,
throwR11sNetworkError,
} from "../errorUtils.js";

Expand Down Expand Up @@ -291,4 +293,31 @@ describe("ErrorUtils", () => {
assert.strictEqual((error as any).statusCode, 400);
});
});

describe("getUrlForTelemetry", () => {
// 0:hostName 1:path 2:expectedOutput
const testCases = [
["", undefined, undefined],
["/", undefined, undefined],
["http://some.url.com", undefined, "some.url.com"],
["http://some.url.com/", undefined, "some.url.com"],
["https://some.url.com/", "", "some.url.com"],
["something://some.url.com/", "", "something:"],
["some.url.com/path", undefined, "some.url.com"],
["some.url.com/", "randomPath", "some.url.com"],
["some.url.com/", socketIoPath, `some.url.com/${socketIoPath}`],
["http://some.url.com/", "repos", "some.url.com/repos"],
["some.url.com/", "deltas", "some.url.com/deltas"],
["https://some.url.com", "documents", "some.url.com/documents"],
["https://some.url.com/", "/documents/", "some.url.com/documents"],
["https://some.url.com/", "documents/morePath", "some.url.com/documents"],
];

testCases.forEach((testCase) => {
it(`Parses URL as expected hostName:[${testCase[0]}] path:[${testCase[1]}]`, () => {
const actualOutput = getUrlForTelemetry(testCase[0]!, testCase[1]);
assert.strictEqual(actualOutput, testCase[2]);
});
});
});
});

0 comments on commit 2f1175f

Please sign in to comment.