Skip to content

Commit 8f7bed0

Browse files
committed
fix(ui): improve provider rate limit display
1 parent e851b93 commit 8f7bed0

File tree

25 files changed

+234
-25
lines changed

25 files changed

+234
-25
lines changed

packages/evals/src/cli/runTask.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO
301301
"diff_error",
302302
"condense_context",
303303
"condense_context_error",
304+
"api_req_rate_limit_wait",
304305
"api_req_retry_delayed",
305306
"api_req_retried",
306307
]
@@ -314,7 +315,7 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO
314315
if (
315316
eventName === RooCodeEventName.Message &&
316317
payload[0].message.say &&
317-
["api_req_retry_delayed", "api_req_retried"].includes(payload[0].message.say)
318+
["api_req_rate_limit_wait", "api_req_retry_delayed", "api_req_retried"].includes(payload[0].message.say)
318319
) {
319320
isApiUnstable = true
320321
}

packages/types/src/message.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ export function isNonBlockingAsk(ask: ClineAsk): ask is NonBlockingAsk {
129129
* - `api_req_finished`: Indicates an API request has completed successfully
130130
* - `api_req_retried`: Indicates an API request is being retried after a failure
131131
* - `api_req_retry_delayed`: Indicates an API request retry has been delayed
132+
* - `api_req_rate_limit_wait`: Indicates a configured rate-limit wait (not an error)
132133
* - `api_req_deleted`: Indicates an API request has been deleted/cancelled
133134
* - `text`: General text message or assistant response
134135
* - `reasoning`: Assistant's reasoning or thought process (often hidden from user)
@@ -155,6 +156,7 @@ export const clineSays = [
155156
"api_req_finished",
156157
"api_req_retried",
157158
"api_req_retry_delayed",
159+
"api_req_rate_limit_wait",
158160
"api_req_deleted",
159161
"text",
160162
"image",

src/core/task/Task.ts

Lines changed: 57 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2343,6 +2343,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
23432343
const modelId = getModelId(this.apiConfiguration)
23442344
const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
23452345

2346+
// Respect user-configured provider rate limiting BEFORE we emit api_req_started.
2347+
// This prevents the UI from showing an "API Request..." spinner while we are
2348+
// intentionally waiting due to the rate limit slider.
2349+
//
2350+
// NOTE: We also set Task.lastGlobalApiRequestTime here to reserve this slot
2351+
// before we build environment details (which can take time).
2352+
// This ensures subsequent requests (including subtasks) still honour the
2353+
// provider rate-limit window.
2354+
await this.maybeWaitForProviderRateLimit(currentItem.retryAttempt ?? 0)
2355+
Task.lastGlobalApiRequestTime = performance.now()
2356+
23462357
await this.say(
23472358
"api_req_started",
23482359
JSON.stringify({
@@ -2550,7 +2561,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
25502561
// Yields only if the first chunk is successful, otherwise will
25512562
// allow the user to retry the request (most likely due to rate
25522563
// limit error, which gets thrown on the first chunk).
2553-
const stream = this.attemptApiRequest()
2564+
const stream = this.attemptApiRequest(currentItem.retryAttempt ?? 0, { skipProviderRateLimit: true })
25542565
let assistantMessage = ""
25552566
let reasoningMessage = ""
25562567
let pendingGroundingSources: GroundingSource[] = []
@@ -3652,7 +3663,43 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
36523663
await this.providerRef.deref()?.postMessageToWebview({ type: "condenseTaskContextResponse", text: this.taskId })
36533664
}
36543665

3655-
public async *attemptApiRequest(retryAttempt: number = 0): ApiStream {
3666+
/**
3667+
* Enforce the user-configured provider rate limit.
3668+
*
3669+
* NOTE: This is intentionally treated as expected behavior and is surfaced via
3670+
* the `api_req_rate_limit_wait` say type (not an error).
3671+
*/
3672+
private async maybeWaitForProviderRateLimit(retryAttempt: number): Promise<void> {
3673+
const state = await this.providerRef.deref()?.getState()
3674+
const rateLimitSeconds =
3675+
state?.apiConfiguration?.rateLimitSeconds ?? this.apiConfiguration?.rateLimitSeconds ?? 0
3676+
3677+
if (rateLimitSeconds <= 0 || !Task.lastGlobalApiRequestTime) {
3678+
return
3679+
}
3680+
3681+
const now = performance.now()
3682+
const timeSinceLastRequest = now - Task.lastGlobalApiRequestTime
3683+
const rateLimitDelay = Math.ceil(
3684+
Math.min(rateLimitSeconds, Math.max(0, rateLimitSeconds * 1000 - timeSinceLastRequest) / 1000),
3685+
)
3686+
3687+
// Only show the countdown UX on the first attempt. Retry flows have their own delay messaging.
3688+
if (rateLimitDelay > 0 && retryAttempt === 0) {
3689+
for (let i = rateLimitDelay; i > 0; i--) {
3690+
const delayMessage = `Rate limiting for ${i} seconds...`
3691+
await this.say("api_req_rate_limit_wait", delayMessage, undefined, true)
3692+
await delay(1000)
3693+
}
3694+
// Finalize the partial message so the UI doesn't keep rendering an in-progress spinner.
3695+
await this.say("api_req_rate_limit_wait", undefined, undefined, false)
3696+
}
3697+
}
3698+
3699+
public async *attemptApiRequest(
3700+
retryAttempt: number = 0,
3701+
options: { skipProviderRateLimit?: boolean } = {},
3702+
): ApiStream {
36563703
const state = await this.providerRef.deref()?.getState()
36573704

36583705
const {
@@ -3689,29 +3736,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
36893736
}
36903737
}
36913738

3692-
let rateLimitDelay = 0
3693-
3694-
// Use the shared timestamp so that subtasks respect the same rate-limit
3695-
// window as their parent tasks.
3696-
if (Task.lastGlobalApiRequestTime) {
3697-
const now = performance.now()
3698-
const timeSinceLastRequest = now - Task.lastGlobalApiRequestTime
3699-
const rateLimit = apiConfiguration?.rateLimitSeconds || 0
3700-
rateLimitDelay = Math.ceil(Math.min(rateLimit, Math.max(0, rateLimit * 1000 - timeSinceLastRequest) / 1000))
3701-
}
3702-
3703-
// Only show rate limiting message if we're not retrying. If retrying, we'll include the delay there.
3704-
if (rateLimitDelay > 0 && retryAttempt === 0) {
3705-
// Show countdown timer
3706-
for (let i = rateLimitDelay; i > 0; i--) {
3707-
const delayMessage = `Rate limiting for ${i} seconds...`
3708-
await this.say("api_req_retry_delayed", delayMessage, undefined, true)
3709-
await delay(1000)
3710-
}
3739+
if (!options.skipProviderRateLimit) {
3740+
await this.maybeWaitForProviderRateLimit(retryAttempt)
37113741
}
37123742

3713-
// Update last request time before making the request so that subsequent
3743+
// Update last request time right before making the request so that subsequent
37143744
// requests — even from new subtasks — will honour the provider's rate-limit.
3745+
//
3746+
// NOTE: When recursivelyMakeClineRequests handles rate limiting, it sets the
3747+
// timestamp earlier to include the environment details build. We still set it
3748+
// here for direct callers (tests) and for the case where we didn't rate-limit
3749+
// in the caller.
37153750
Task.lastGlobalApiRequestTime = performance.now()
37163751

37173752
const systemPrompt = await this.getSystemPrompt()

src/core/task/__tests__/Task.spec.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,9 @@ describe("Cline", () => {
10411041
startTask: false,
10421042
})
10431043

1044+
// Spy on child.say to verify the emitted message type
1045+
const saySpy = vi.spyOn(child, "say")
1046+
10441047
// Mock the child's API stream
10451048
const childMockStream = {
10461049
async *[Symbol.asyncIterator]() {
@@ -1067,6 +1070,17 @@ describe("Cline", () => {
10671070
// Verify rate limiting was applied
10681071
expect(mockDelay).toHaveBeenCalledTimes(mockApiConfig.rateLimitSeconds)
10691072
expect(mockDelay).toHaveBeenCalledWith(1000)
1073+
1074+
// Verify we used the non-error rate-limit wait message type
1075+
expect(saySpy).toHaveBeenCalledWith(
1076+
"api_req_rate_limit_wait",
1077+
expect.stringContaining("Rate limiting for"),
1078+
undefined,
1079+
true,
1080+
)
1081+
1082+
// Verify the wait message was finalized
1083+
expect(saySpy).toHaveBeenCalledWith("api_req_rate_limit_wait", undefined, undefined, false)
10701084
}, 10000) // Increase timeout to 10 seconds
10711085

10721086
it("should not apply rate limiting if enough time has passed", async () => {

webview-ui/src/components/chat/ChatRow.tsx

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ export const ChatRowContent = ({
298298
style={{ color: successColor, marginBottom: "-1.5px" }}></span>,
299299
<span style={{ color: successColor, fontWeight: "bold" }}>{t("chat:taskCompleted")}</span>,
300300
]
301+
case "api_req_rate_limit_wait":
302+
return []
301303
case "api_req_retry_delayed":
302304
return []
303305
case "api_req_started":
@@ -327,8 +329,10 @@ export const ChatRowContent = ({
327329
getIconSpan("arrow-swap", normalColor)
328330
) : apiRequestFailedMessage ? (
329331
getIconSpan("error", errorColor)
330-
) : (
332+
) : isLast ? (
331333
<ProgressIndicator />
334+
) : (
335+
getIconSpan("arrow-swap", normalColor)
332336
),
333337
apiReqCancelReason !== null && apiReqCancelReason !== undefined ? (
334338
apiReqCancelReason === "user_cancelled" ? (
@@ -356,7 +360,17 @@ export const ChatRowContent = ({
356360
default:
357361
return [null, null]
358362
}
359-
}, [type, isCommandExecuting, message, isMcpServerResponding, apiReqCancelReason, cost, apiRequestFailedMessage, t])
363+
}, [
364+
type,
365+
isCommandExecuting,
366+
message,
367+
isMcpServerResponding,
368+
apiReqCancelReason,
369+
cost,
370+
apiRequestFailedMessage,
371+
t,
372+
isLast,
373+
])
360374

361375
const headerStyle: React.CSSProperties = {
362376
display: "flex",
@@ -1149,6 +1163,50 @@ export const ChatRowContent = ({
11491163
errorDetails={rawError}
11501164
/>
11511165
)
1166+
case "api_req_rate_limit_wait": {
1167+
const isWaiting = message.partial === true
1168+
const getIconSpan = (iconName: string, color: string) => (
1169+
<div
1170+
style={{
1171+
width: 16,
1172+
height: 16,
1173+
display: "flex",
1174+
alignItems: "center",
1175+
justifyContent: "center",
1176+
}}>
1177+
<span
1178+
className={`codicon codicon-${iconName}`}
1179+
style={{ color, fontSize: 16, marginBottom: "-1.5px" }}
1180+
/>
1181+
</div>
1182+
)
1183+
1184+
const waitSeconds = (() => {
1185+
if (!message.text) return undefined
1186+
const match = message.text.match(/Rate limiting for (\d+) seconds\.{3}/)
1187+
return match ? Number(match[1]) : undefined
1188+
})()
1189+
1190+
return (
1191+
<div
1192+
className={`group text-sm transition-opacity ${isWaiting ? "opacity-100" : "opacity-40 hover:opacity-100"}`}
1193+
style={{
1194+
...headerStyle,
1195+
marginBottom: 0,
1196+
justifyContent: "space-between",
1197+
}}>
1198+
<div style={{ display: "flex", alignItems: "center", gap: "10px", flexGrow: 1 }}>
1199+
{isWaiting ? <ProgressIndicator /> : getIconSpan("clock", cancelledColor)}
1200+
<span style={{ color: normalColor }}>{t("chat:apiRequest.rateLimitWait")}</span>
1201+
</div>
1202+
{isWaiting && waitSeconds !== undefined && (
1203+
<span className="text-xs font-light text-vscode-descriptionForeground">
1204+
{waitSeconds}s
1205+
</span>
1206+
)}
1207+
</div>
1208+
)
1209+
}
11521210
case "api_req_finished":
11531211
return null // we should never see this message type
11541212
case "text":

webview-ui/src/components/chat/ChatView.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
400400
// an "ask" while ask is waiting for response.
401401
switch (lastMessage.say) {
402402
case "api_req_retry_delayed":
403+
case "api_req_rate_limit_wait":
403404
setSendingDisabled(true)
404405
break
405406
case "api_req_started":
@@ -957,6 +958,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
957958
case "api_req_deleted":
958959
return false
959960
case "api_req_retry_delayed":
961+
case "api_req_rate_limit_wait":
960962
const last1 = modifiedMessages.at(-1)
961963
const last2 = modifiedMessages.at(-2)
962964
if (last1?.ask === "resume_task" && last2 === message) {
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import React from "react"
2+
3+
import { render, screen } from "@/utils/test-utils"
4+
import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
5+
import { ExtensionStateContextProvider } from "@src/context/ExtensionStateContext"
6+
import { ChatRowContent } from "../ChatRow"
7+
8+
// Mock i18n
9+
vi.mock("react-i18next", () => ({
10+
useTranslation: () => ({
11+
t: (key: string) => {
12+
const map: Record<string, string> = {
13+
"chat:apiRequest.rateLimitWait": "Rate limiting",
14+
}
15+
return map[key] ?? key
16+
},
17+
}),
18+
Trans: ({ children }: { children?: React.ReactNode }) => <>{children}</>,
19+
initReactI18next: { type: "3rdParty", init: () => {} },
20+
}))
21+
22+
const queryClient = new QueryClient()
23+
24+
function renderChatRow(message: any) {
25+
return render(
26+
<ExtensionStateContextProvider>
27+
<QueryClientProvider client={queryClient}>
28+
<ChatRowContent
29+
message={message}
30+
isExpanded={false}
31+
isLast={false}
32+
isStreaming={false}
33+
onToggleExpand={() => {}}
34+
onSuggestionClick={() => {}}
35+
onBatchFileResponse={() => {}}
36+
onFollowUpUnmount={() => {}}
37+
isFollowUpAnswered={false}
38+
/>
39+
</QueryClientProvider>
40+
</ExtensionStateContextProvider>,
41+
)
42+
}
43+
44+
describe("ChatRow - rate limit wait", () => {
45+
it("renders a non-error progress row for api_req_rate_limit_wait", () => {
46+
const message: any = {
47+
type: "say",
48+
say: "api_req_rate_limit_wait",
49+
ts: Date.now(),
50+
partial: true,
51+
text: "Rate limiting for 1 seconds...",
52+
}
53+
54+
renderChatRow(message)
55+
56+
expect(screen.getByText("Rate limiting")).toBeInTheDocument()
57+
// Should show countdown, but should NOT show the error-details affordance.
58+
expect(screen.getByText("1s")).toBeInTheDocument()
59+
expect(screen.queryByText("Details")).toBeNull()
60+
})
61+
62+
it("renders a greyed-out completed row with a clock icon when wait is complete", () => {
63+
const message: any = {
64+
type: "say",
65+
say: "api_req_rate_limit_wait",
66+
ts: Date.now(),
67+
partial: false,
68+
text: undefined,
69+
}
70+
71+
renderChatRow(message)
72+
73+
expect(screen.getByText("Rate limiting")).toBeInTheDocument()
74+
// Completed row should not show countdown
75+
expect(screen.queryByText(/\ds/)).toBeNull()
76+
// And should render a clock icon (codicon-clock) in the DOM.
77+
expect(document.querySelector(".codicon-clock")).not.toBeNull()
78+
})
79+
})

webview-ui/src/i18n/locales/ca/chat.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webview-ui/src/i18n/locales/de/chat.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webview-ui/src/i18n/locales/en/chat.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
"streaming": "API Request...",
145145
"cancelled": "API Request Cancelled",
146146
"streamingFailed": "API Streaming Failed",
147+
"rateLimitWait": "Rate limiting",
147148
"errorTitle": "Provider Error {{code}}",
148149
"errorMessage": {
149150
"docs": "Docs",

0 commit comments

Comments
 (0)