Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(UserManager): handle concurrent token refresh requests via leader election #434

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/LockManager.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// This is temporary until oidc-client-ts updates to a newer TypeScript version.
// @see https://github.com/microsoft/TypeScript-DOM-lib-generator/pull/1291
declare global {
interface Navigator {
locks : LockManager;
}

interface LockManager {
request<T>(
name : string,
callback : (lock? : Lock) => Promise<T> | T
) : Promise<T>;

request<T>(
name : string,
options : LockOptions,
callback : (lock? : Lock) => Promise<T> | T
) : Promise<T>;

query() : Promise<LockManagerSnapshot>;
}

type LockMode = "shared" | "exclusive";

interface LockOptions {
mode? : LockMode;
ifAvailable? : boolean;
steal? : boolean;
signal? : AbortSignal;
}

interface LockManagerSnapshot {
held : LockInfo[];
pending : LockInfo[];
}

interface LockInfo {
name : string;
mode : LockMode;
clientId : string;
}

interface Lock {
name : string;
mode : LockMode;
}
}

export {};
63 changes: 63 additions & 0 deletions src/UserManager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,69 @@ describe("UserManager", () => {
}),
);
});

it("should only perform one refresh concurrently", async () => {
// arrange
const user = new User({
access_token: "access_token",
token_type: "token_type",
refresh_token: "refresh_token",
profile: {
sub: "sub",
nickname: "Nick",
} as UserProfile,
});

const useRefreshTokenSpy = jest.spyOn(subject["_client"], "useRefreshToken").mockResolvedValue({
access_token: "new_access_token",
profile: {
sub: "sub",
nickname: "Nicholas",
},
} as unknown as SigninResponse);
subject["_loadUser"] = jest.fn().mockResolvedValue(user);

// act
const refreshedUsers = await Promise.all([subject.signinSilent(), subject.signinSilent()]);
expect(refreshedUsers[0]).toHaveProperty("access_token", "new_access_token");
expect(refreshedUsers[1]).toHaveProperty("access_token", "new_access_token");
expect(useRefreshTokenSpy).toBeCalledTimes(1);
});

it("should not fail when Web Locks API is unavailable", async () => {
// arrange
const user = new User({
access_token: "access_token",
token_type: "token_type",
refresh_token: "refresh_token",
profile: {
sub: "sub",
nickname: "Nick",
} as UserProfile,
});

const useRefreshTokenSpy = jest.spyOn(subject["_client"], "useRefreshToken").mockResolvedValue({
access_token: "new_access_token",
profile: {
sub: "sub",
nickname: "Nicholas",
},
} as unknown as SigninResponse);
subject["_loadUser"] = jest.fn().mockResolvedValue(user);

const originalLocks = globalThis.navigator.locks;
// @ts-expect-error It is normally disallowed to do this, fine for the test though.
globalThis.navigator.locks = undefined;

// act
try {
const refreshedUser = await subject.signinSilent();
expect(refreshedUser).toHaveProperty("access_token", "new_access_token");
expect(useRefreshTokenSpy).toBeCalledTimes(1);
} finally {
globalThis.navigator.locks = originalLocks;
}
});
});

describe("signinSilentCallback", () => {
Expand Down
41 changes: 34 additions & 7 deletions src/UserManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,42 @@ export class UserManager {
}

protected async _useRefreshToken(state: RefreshState): Promise<User> {
const response = await this._client.useRefreshToken({
state,
timeoutInSeconds: this.settings.silentRequestTimeoutInSeconds,
const refreshUser = async (): Promise<User> => {
const response = await this._client.useRefreshToken({
state,
timeoutInSeconds: this.settings.silentRequestTimeoutInSeconds,
});
return new User({ ...state, ...response });
};

if (!navigator.locks) {
// Legacy option for older browser which don't support `navigator.locks`.
const user = await refreshUser();
await this.storeUser(user);
this._events.load(user);
return user;
}

const broadcastChannel = new BroadcastChannel(`refresh_token_${state.refresh_token}`);
DASPRiD marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still needs to decide what key should be best for the BroadcastChannel and the Lock.
@kherock

let user : User | null = null;

broadcastChannel.addEventListener("message", (event : MessageEvent<User>) => {
DASPRiD marked this conversation as resolved.
Show resolved Hide resolved
user = event.data;
});
const user = new User({ ...state, ...response });

await this.storeUser(user);
this._events.load(user);
return user;
return await navigator.locks.request(
`refresh_token_${state.refresh_token}`,
async () => {
if (!user) {
user = await refreshUser();
broadcastChannel.postMessage(user);
}
Comment on lines +292 to +295
Copy link
Collaborator

@kherock kherock Apr 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way that this code handles competing requests seems leaky. It looks like when two tabs both ask for a lock on the refresh token state, they can still both send refresh tokens sequentially. It seems to make an assumption that sending a message informing the other tab of the new user via the broadcast channel is a synchronous operation, but if this were the case, no web locks would be necessary at all. I doubt that there will ever be a case where releasing and re-obtaining the lock takes longer than sending a BroadcastChannel message, but I think the possibility is there.

Instead, I think it would be better to use the Web Locks API to perform leader election:

const topic = `refresh_token_${state.refresh_token}`;
const broadcastChannel = new BroadcastChannel(topic);
const bcUserPromise = new Promise(resolve => broadcastChannel.addEventListener(ev => resolve(ev.data)));

const user = await navigator.locks.request(topic, { ifAvailable: false }, async (lock) => {
  // a null `lock` implies that this tab lost in leader election
  if (lock) {
    const user = await refreshUser();
    await this.storeUser(user);
    broadcastChannel.postMessage(user);
    return user;
  } else {
    return await bcUserPromise;
  }
});
broadcastChannel.close();
this._events.load(user);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this implementation best.

@kherock, just a few questions:

  1. Why is storeUser(user) only performed by the leader ? (and not by everyone at the end, like _events.load)
  2. Is a timeout necessary in case postMessage never occurred ? (ex: refreshUser() taking too long and in the meantime user closes the leader tab)
  3. Still wondering about the proper lock key to use... 🤔

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't think about the scenario of the tab being interrupted, but more realistically, my code doesn't handle when the leader encounters an error either.

storeUser writes the user to localStorage/sessionStorage which is presumably shared by both tabs in this scenario, so it only needs to be serialized and stored once. The user actually doesn't even need to be sent over the channel at all, the other tab could just as well do return await this._loadUser() after receiving the notification from the leader (or by waiting for the lock to release, though this doesn't account for the lock releasing because of a failure or interruption).

I think think finding a "proper" lock key to use in a separate issue. My hunch is that we should lock onto session_state when the IDP implements OIDC sessions (e.g. Keycloak) and fall back to the refresh token.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Agreed that some error handling needs to be made.

  • which is presumably shared by both tabs in this scenario

    If I'm not mistaken, sessionStorage is not shared between tabs - so it won't work in that case.

  • I've seen session_state being returned by Keycloak (but not Auth0), so yes we could use it if we have it or fall back otherwise. But I'm still not confident in using refresh token as the fall back. Each tab receives a different refresh token, which makes the purpose of the lock completely useless.

Copy link
Collaborator

@kherock kherock Apr 4, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In instances where sessionStorage isn't shared, how would two tabs obtain the same refresh token in the first place? Also, see this comment: #434 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DASPRiD, I'm using neither LocalStorage nor SessionStorage, I'm keeping everything in memory. But as I see it, this concurrency issue exists no matter the type of storage, so I would opt for a way to solve it once and for all (and not only for LocalStorage). Why do you think your proposal does only cover LocalStorage ? I was convinced that simply using a common lock key would be enough to solved it for SessionStorage/MemoryStorage too. Don't you think ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually no, this issue does not affect memory storage, as memory storage is always bound to it's own session without sharing it with any other process.

The problem with SessionStorage is the part about session storage duplication when a tab is duplicated. In that instance, they will share the same session (from the perspective of the IDP), but not from their own perspective.

Copy link
Contributor Author

@DASPRiD DASPRiD Apr 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kherock Actually, I do see an issue with using client ID + sub as the lock key: It would block unrelated tabs with different sessions but same client ID + sub from performing their refresh, that's why I was using the refresh token.

Copy link
Contributor

@Badisi Badisi Apr 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, then we are talking about two different concurrency issues.

  • The one you are facing, ie. when tokens are physically shared between tabs (over a storage). Tabs should wait for the first one to refresh it for the others.
  • And the one I'm facing (only seen with Keycloak and might be a bug but didn't get any answers from them yet), where the first tab to do a refresh will cancel any previous generated tokens for that same client_id/session (ie. the ones generated from the other tabs). So here, tabs should also wait for the first one to refresh the tokens and send them to the others. This feels hacky and weird (because they all started with different tokens and ends with identical ones) but at least your PR could solve it, as long as the lock key is identical for each tabs 🙂.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well yeah, the issue is absolutely on their side. Solving it with a shared lock key like that is a hacky way to solve your issue though. Especially since it would only really solve it as long as your are using local storage. If you are using anything else (based on Kheros asked change of not using the BroadcastChannel), this would not help with your issue.

It would, as mentioned above, also introduce the issue that it would block possibly unrelated tabs from performing their refresh. I really only see one way here to address your issue without affecting all other users:

  1. You would have to opt into local storage (since Keycloak forces a shared session on you anyway).
  2. Add an option to the Usermanager so you can opt into using client ID + sub as the lock key, although the default would be the refresh token.

@kherock Actually, instead of using the refresh token, we could also add a randomly generated ID to the store which is used as common identifier for the lock, but when session_state is available, store that as the common identifier. That way it would address @Badisi issue, and make us not rely on the value of the refresh token.

Although to ultimately address their issue, we'd need to use the BroadcastChannel. Thinking about that, we could add a delay of maybe 250ms or so into the leader function. That would address any possible common race condition. We probably might not even need such a long delay.


await this.storeUser(user);
this._events.load(user);
return user;
},
);
}

/**
Expand Down
93 changes: 92 additions & 1 deletion test/setup.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,97 @@
import { Log } from "../src";

beforeAll(() => {
// While NodeJs 15.4 has an experimental implementation, it is not API compatible with the browser version.
class BroadcastChannelPolyfill {
public onmessage = null;
public onmessageerror = null;
private static _eventTargets: Record<string, EventTarget> = {};

public constructor(public readonly name: string) {
if (!(name in BroadcastChannelPolyfill._eventTargets)) {
BroadcastChannelPolyfill._eventTargets[name] = new EventTarget();
}
}

public close(): void {
// no-op
}

public dispatchEvent(): boolean {
return true;
}

public postMessage(message: unknown): void {
const messageEvent = new Event("message") as Event & { data : unknown };
messageEvent.data = message;
BroadcastChannelPolyfill._eventTargets[this.name].dispatchEvent(messageEvent);
}

public addEventListener<K extends keyof BroadcastChannelEventMap>(
type: K,
listener: (this: BroadcastChannel, ev: BroadcastChannelEventMap[K]) => unknown,
options?: boolean | AddEventListenerOptions,
): void;
public addEventListener(
type: string,
listener: EventListenerOrEventListenerObject,
options?: boolean | AddEventListenerOptions,
): void {
BroadcastChannelPolyfill._eventTargets[this.name].addEventListener("message", listener, options);
}

public removeEventListener<K extends keyof BroadcastChannelEventMap>(
type: K,
listener: (this: BroadcastChannel, ev: BroadcastChannelEventMap[K]) => unknown,
options?: boolean | EventListenerOptions,
): void;
public removeEventListener(
type: string,
listener: EventListenerOrEventListenerObject,
options?: boolean | EventListenerOptions,
): void {
BroadcastChannelPolyfill._eventTargets[this.name].removeEventListener("message", listener, options);
}
}

globalThis.BroadcastChannel = BroadcastChannelPolyfill;

class LockManagerPolyfill {
private _locks: Set<string> = new Set();

public async request<T>(
name: string,
options: LockOptions | ((lock?: Lock) => Promise<T> | T),
callback?: (lock?: Lock) => Promise<T> | T,
): Promise<T> {
if (options instanceof Function) {
callback = options;
options = {};
}

while (this._locks.has(name)) {
await new Promise(resolve => setTimeout(resolve, 10));
}

this._locks.add(name);

try {
return await callback!({ name, mode: options.mode ?? "exclusive" });
} finally {
this._locks.delete(name);
}
}

public async query(): Promise<LockManagerSnapshot> {
return await Promise.resolve({
held: [],
pending: [],
});
}
}

globalThis.navigator.locks = new LockManagerPolyfill();

beforeAll(async () => {
globalThis.fetch = jest.fn();

const unload = () => window.dispatchEvent(new Event("unload"));
Expand Down
2 changes: 1 addition & 1 deletion tsconfig.build.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@
"emitDeclarationOnly": true,
"tsBuildInfoFile": "tsconfig.build.tsbuildinfo"
},
"files": ["src/index.ts"],
"files": ["src/index.ts", "src/LockManager.d.ts"],
"include": []
}