Skip to content

Commit

Permalink
Emit more information for summarizer observability (#23217)
Browse files Browse the repository at this point in the history
By providing a way to programmatically observing summarization failures
we can make progress in mitigating/reducing summarizer related issues.
Customer OCE's will be able to catch a summarization bugs on their own
and decide what to do with their end-users (either alert them via UI,
alert themselves via Telemetry, etc.). Customers can also do their own
summarization testing, and thus catch some of these bugs before they
ship.

Included in this change are new events being emitted off the
`ContainerRuntime` class and `IContainerRuntime` interface. They
include:
- `"summarizeAllAttemptsFailed"`: All attempts to summarize have failed.
- `"summarizerStop"`: The summarizer has stopped, providing information
on why and potential error.
- `"summarizerStart"`: The summarizer has started.
- `"summarizerStartupFailed"`: The summarizer has failed to startup,
usually meaning it wasn't able to catchup. Includes information on why
startup failed.


[AB#24393](https://dev.azure.com/fluidframework/235294da-091d-4c29-84fc-cdfc3d90890b/_workitems/edit/24393)

---------

Co-authored-by: jzaffiro <110866475+jzaffiro@users.noreply.github.com>
  • Loading branch information
kian-thompson and jzaffiro authored Dec 10, 2024
1 parent 51a1728 commit cd88ee2
Show file tree
Hide file tree
Showing 19 changed files with 436 additions and 120 deletions.
10 changes: 10 additions & 0 deletions .changeset/dirty-crabs-try.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"@fluidframework/container-runtime": minor
---
---
"section": deprecation
---

Deprecated SummarizerStopReason, ISummarizeEventProps, and ISummarizerEvents

`SummarizerStopReason`, `ISummarizeEventProps`, and `ISummarizerEvents` have all been deprecated from the `"@fluidframework/container-runtime"` package. Please migrate all uses of these APIs to their respective copies in the `"@fluidframework/container-runtime-definitions"` package.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export interface IContainerRuntime extends IProvideFluidDataStoreRegistry, ICont
export type IContainerRuntimeBaseWithCombinedEvents = IContainerRuntimeBase & IEventProvider<IContainerRuntimeEvents>;

// @alpha @sealed
export interface IContainerRuntimeEvents extends IContainerRuntimeBaseEvents {
export interface IContainerRuntimeEvents extends IContainerRuntimeBaseEvents, ISummarizerEvents {
// (undocumented)
(event: "dirty" | "disconnected" | "saved" | "attached", listener: () => void): any;
// (undocumented)
Expand All @@ -46,6 +46,84 @@ export interface IContainerRuntimeWithResolveHandle_Deprecated extends IContaine
resolveHandle(request: IRequest): Promise<IResponse>;
}

// @alpha @sealed (undocumented)
export interface ISummarizeEventProps {
// (undocumented)
currentAttempt: number;
// (undocumented)
error?: any;
failureMessage?: string;
isLastSummary?: boolean;
// (undocumented)
maxAttempts: number;
// (undocumented)
result: "success" | "failure" | "canceled";
}

// @alpha @sealed (undocumented)
export interface ISummarizerEvents extends IEvent {
// (undocumented)
(event: "summarize", listener: (props: ISummarizeEventProps & ISummarizerObservabilityProps) => void): any;
// (undocumented)
(event: "summarizeAllAttemptsFailed", listener: (props: Omit<ISummarizeEventProps, "result"> & ISummarizerObservabilityProps) => void): any;
// (undocumented)
(event: "summarizerStop", listener: (props: {
stopReason: SummarizerStopReason;
error?: any;
} & ISummarizerObservabilityProps) => void): any;
// (undocumented)
(event: "summarizerStart", listener: (props: {
onBehalfOf: string;
} & ISummarizerObservabilityProps) => void): any;
// (undocumented)
(event: "summarizerStartupFailed", listener: (props: {
reason: SummarizerStopReason;
} & ISummarizerObservabilityProps) => void): any;
}

// @alpha @sealed (undocumented)
export interface ISummarizerObservabilityProps {
// (undocumented)
numUnsummarizedNonRuntimeOps: number;
// (undocumented)
numUnsummarizedRuntimeOps: number;
}

// @alpha @sealed (undocumented)
export type SummarizerStopReason =
/**
* Summarizer client failed to summarize in all attempts.
*/
"failToSummarize"
/**
* Parent client reported that it is no longer connected.
*/
| "parentNotConnected"
/**
* Parent client reported that it is no longer elected the summarizer.
* This is the normal flow; a disconnect will always trigger the parent
* client to no longer be elected as responsible for summaries. Then it
* tries to stop its spawned summarizer client.
*/
| "notElectedParent"
/**
* We are not already running the summarizer and we are not the current elected client id.
*/
| "notElectedClient"
/**
* Summarizer client was disconnected
*/
| "summarizerClientDisconnected"
/**
* running summarizer threw an exception
*/
| "summarizerException"
/**
* The previous summary state on the summarizer is not the most recently acked summary. this also happens when the
* first submitSummary attempt fails for any reason and there's a 2nd summary attempt without an ack
*/
| "latestSummaryStateStale";

// (No @packageDocumentation comment for this package)

```
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import type { AttachState } from "@fluidframework/container-definitions";
import type { IDeltaManager } from "@fluidframework/container-definitions/internal";
import type {
FluidObject,
IEvent,
IEventProvider,
IRequest,
IResponse,
Expand Down Expand Up @@ -41,11 +42,118 @@ export interface IContainerRuntimeWithResolveHandle_Deprecated extends IContaine
* @alpha
* @sealed
*/
export interface IContainerRuntimeEvents extends IContainerRuntimeBaseEvents {
export interface IContainerRuntimeEvents
extends IContainerRuntimeBaseEvents,
ISummarizerEvents {
(event: "dirty" | "disconnected" | "saved" | "attached", listener: () => void);
(event: "connected", listener: (clientId: string) => void);
}

/**
* @legacy
* @alpha
* @sealed
*/
export type SummarizerStopReason =
/**
* Summarizer client failed to summarize in all attempts.
*/
| "failToSummarize"
/**
* Parent client reported that it is no longer connected.
*/
| "parentNotConnected"
/**
* Parent client reported that it is no longer elected the summarizer.
* This is the normal flow; a disconnect will always trigger the parent
* client to no longer be elected as responsible for summaries. Then it
* tries to stop its spawned summarizer client.
*/
| "notElectedParent"
/**
* We are not already running the summarizer and we are not the current elected client id.
*/
| "notElectedClient"
/**
* Summarizer client was disconnected
*/
| "summarizerClientDisconnected"
/**
* running summarizer threw an exception
*/
| "summarizerException"
/**
* The previous summary state on the summarizer is not the most recently acked summary. this also happens when the
* first submitSummary attempt fails for any reason and there's a 2nd summary attempt without an ack
*/
| "latestSummaryStateStale";

/**
* @legacy
* @alpha
* @sealed
*/
export interface ISummarizeEventProps {
result: "success" | "failure" | "canceled";
currentAttempt: number;
maxAttempts: number;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
error?: any;
/**
* Result message of a failed summarize attempt
*/
failureMessage?: string;
/**
* Was this summarize attempt part of the lastSummary process?
*/
isLastSummary?: boolean;
}

/**
* @legacy
* @alpha
* @sealed
*/
export interface ISummarizerObservabilityProps {
numUnsummarizedRuntimeOps: number;
numUnsummarizedNonRuntimeOps: number;
}

/**
* @legacy
* @alpha
* @sealed
*/
export interface ISummarizerEvents extends IEvent {
(
event: "summarize",
listener: (props: ISummarizeEventProps & ISummarizerObservabilityProps) => void,
);
(
event: "summarizeAllAttemptsFailed",
listener: (
props: Omit<ISummarizeEventProps, "result"> & ISummarizerObservabilityProps,
) => void,
);
(
event: "summarizerStop",
listener: (
// eslint-disable-next-line @typescript-eslint/no-explicit-any
props: { stopReason: SummarizerStopReason; error?: any } & ISummarizerObservabilityProps,
) => void,
);
(
event: "summarizerStart",
listener: (props: { onBehalfOf: string } & ISummarizerObservabilityProps) => void,
);
(
event: "summarizerStartupFailed",
listener: (
props: { reason: SummarizerStopReason } & ISummarizerObservabilityProps,
) => void,
);
}

/**
* @legacy
* @alpha
Expand Down
4 changes: 4 additions & 0 deletions packages/runtime/container-runtime-definitions/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@ export type {
IContainerRuntimeBaseWithCombinedEvents,
IContainerRuntimeEvents,
IContainerRuntimeWithResolveHandle_Deprecated,
SummarizerStopReason,
ISummarizeEventProps,
ISummarizerObservabilityProps,
ISummarizerEvents,
} from "./containerRuntime.js";
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export enum ContainerMessageType {
}

// @alpha
export class ContainerRuntime extends TypedEventEmitter<IContainerRuntimeEvents & ISummarizerEvents> implements IContainerRuntime, IRuntime, ISummarizerRuntime, ISummarizerInternalsProvider, IProvideFluidHandleContext {
export class ContainerRuntime extends TypedEventEmitter<IContainerRuntimeEvents> implements IContainerRuntime, IRuntime, ISummarizerRuntime, ISummarizerInternalsProvider, IProvideFluidHandleContext {
protected constructor(context: IContainerContext, registry: IFluidDataStoreRegistry, metadata: IContainerRuntimeMetadata | undefined, electedSummarizerData: ISerializedElection | undefined, chunks: [string, string[]][], dataStoreAliasMap: [string, string][], runtimeOptions: Readonly<Required<IContainerRuntimeOptions>>, containerScope: FluidObject, baseLogger: ITelemetryBaseLogger, existing: boolean, blobManagerSnapshot: IBlobManagerLoadInfo, _storage: IDocumentStorageService, createIdCompressor: () => Promise<IIdCompressor & IIdCompressorCore>, documentsSchemaController: DocumentsSchemaController, featureGatesForTelemetry: Record<string, boolean | number | undefined>, provideEntryPoint: (containerRuntime: IContainerRuntime) => Promise<FluidObject>, requestHandler?: ((request: IRequest, runtime: IContainerRuntime) => Promise<IResponse>) | undefined, summaryConfiguration?: ISummaryConfiguration, recentBatchInfo?: [number, string][]);
// (undocumented)
protected addContainerStateToSummary(summaryTree: ISummaryTreeWithStats, fullTree: boolean, trackState: boolean, telemetryContext?: ITelemetryContext): void;
Expand Down Expand Up @@ -287,7 +287,7 @@ export interface IBroadcastSummaryResult {
// @alpha
export interface ICancellableSummarizerController extends ISummaryCancellationToken {
// (undocumented)
stop(reason: SummarizerStopReason): void;
stop(reason: SummarizerStopReason_2): void;
}

// @alpha
Expand Down Expand Up @@ -538,7 +538,7 @@ export interface ISubmitSummaryOptions extends ISummarizeOptions {
readonly summaryLogger: ITelemetryLoggerExt;
}

// @alpha (undocumented)
// @alpha @deprecated (undocumented)
export interface ISummarizeEventProps {
// (undocumented)
currentAttempt: number;
Expand All @@ -556,15 +556,15 @@ export interface ISummarizeOptions {
}

// @alpha (undocumented)
export interface ISummarizer extends IEventProvider<ISummarizerEvents> {
export interface ISummarizer extends IEventProvider<ISummarizerEvents_2> {
// (undocumented)
close(): void;
enqueueSummarize(options: IEnqueueSummarizeOptions): EnqueueSummarizeResult;
readonly ISummarizer?: ISummarizer;
// (undocumented)
run(onBehalfOf: string): Promise<SummarizerStopReason>;
run(onBehalfOf: string): Promise<SummarizerStopReason_2>;
// (undocumented)
stop(reason: SummarizerStopReason): void;
stop(reason: SummarizerStopReason_2): void;
summarizeOnDemand(options: IOnDemandSummarizeOptions): ISummarizeResults;
}

Expand All @@ -575,7 +575,7 @@ export interface ISummarizeResults {
readonly summarySubmitted: Promise<SummarizeResultPart<SubmitSummaryResult, SubmitSummaryFailureData>>;
}

// @alpha (undocumented)
// @alpha @deprecated (undocumented)
export interface ISummarizerEvents extends IEvent {
// (undocumented)
(event: "summarize", listener: (props: ISummarizeEventProps) => void): any;
Expand Down Expand Up @@ -632,7 +632,7 @@ export interface ISummaryBaseConfiguration {
}

// @alpha
export type ISummaryCancellationToken = ICancellationToken<SummarizerStopReason>;
export type ISummaryCancellationToken = ICancellationToken<SummarizerStopReason_2>;

// @alpha (undocumented)
export interface ISummaryCollectionOpEvents extends IEvent {
Expand Down Expand Up @@ -750,7 +750,7 @@ export interface SubmitSummaryFailureData {
export type SubmitSummaryResult = IBaseSummarizeResult | IGenerateSummaryTreeResult | IUploadSummaryResult | ISubmitSummaryOpResult;

// @alpha
export class Summarizer extends TypedEventEmitter<ISummarizerEvents> implements ISummarizer {
export class Summarizer extends TypedEventEmitter<ISummarizerEvents_2> implements ISummarizer {
constructor(
runtime: ISummarizerRuntime, configurationGetter: () => ISummaryConfiguration,
internalsProvider: ISummarizerInternalsProvider, handleContext: IFluidHandleContext, summaryCollection: SummaryCollection, runCoordinatorCreateFn: (runtime: IConnectableRuntime) => Promise<ICancellableSummarizerController>);
Expand All @@ -764,9 +764,9 @@ export class Summarizer extends TypedEventEmitter<ISummarizerEvents> implements
// (undocumented)
recordSummaryAttempt?(summaryRefSeqNum?: number): void;
// (undocumented)
run(onBehalfOf: string): Promise<SummarizerStopReason>;
stop(reason: SummarizerStopReason): void;
static stopReasonCanRunLastSummary(stopReason: SummarizerStopReason): boolean;
run(onBehalfOf: string): Promise<SummarizerStopReason_2>;
stop(reason: SummarizerStopReason_2): void;
static stopReasonCanRunLastSummary(stopReason: SummarizerStopReason_2): boolean;
// (undocumented)
summarizeOnDemand(options: IOnDemandSummarizeOptions): ISummarizeResults;
// (undocumented)
Expand All @@ -784,7 +784,7 @@ export type SummarizeResultPart<TSuccess, TFailure = undefined> = {
error: IRetriableFailureError;
};

// @alpha (undocumented)
// @alpha @deprecated (undocumented)
export type SummarizerStopReason =
/** Summarizer client failed to summarize in all attempts. */
"failToSummarize"
Expand Down
17 changes: 13 additions & 4 deletions packages/runtime/container-runtime/src/containerRuntime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ import {
ISubmitSummaryOptions,
ISummarizeResults,
ISummarizer,
ISummarizerEvents,
ISummarizerInternalsProvider,
ISummarizerRuntime,
ISummaryMetadataMessage,
Expand Down Expand Up @@ -840,7 +839,7 @@ export async function loadContainerRuntime(
* @alpha
*/
export class ContainerRuntime
extends TypedEventEmitter<IContainerRuntimeEvents & ISummarizerEvents>
extends TypedEventEmitter<IContainerRuntimeEvents>
implements
IContainerRuntime,
IRuntime,
Expand Down Expand Up @@ -2015,9 +2014,19 @@ export class ContainerRuntime
initialDelayMs: this.initialSummarizerDelayMs,
},
);
this.summaryManager.on("summarize", (eventProps) => {
this.emit("summarize", eventProps);
// Forward events from SummaryManager
[
"summarize",
"summarizeAllAttemptsFailed",
"summarizerStop",
"summarizerStart",
"summarizerStartupFailed",
].forEach((eventName) => {
this.summaryManager?.on(eventName, (...args: any[]) => {
this.emit(eventName, ...args);
});
});

this.summaryManager.start();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
* Licensed under the MIT License.
*/

import type { SummarizerStopReason } from "@fluidframework/container-runtime-definitions/internal";
import { assert, Deferred } from "@fluidframework/core-utils/internal";

import {
IConnectableRuntime,
ISummaryCancellationToken,
SummarizerStopReason,
} from "./summarizerTypes.js";
import { IConnectableRuntime, ISummaryCancellationToken } from "./summarizerTypes.js";

/**
* Similar to AbortController, but using promise instead of events
Expand Down
Loading

0 comments on commit cd88ee2

Please sign in to comment.