Skip to content

Commit

Permalink
Temporarily using simple rate limit error handler..
Browse files Browse the repository at this point in the history
  • Loading branch information
jakenuts committed Nov 16, 2024
1 parent be6bc6e commit e43f451
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 58 deletions.
30 changes: 15 additions & 15 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "claude-dev-unleashed",
"displayName": "Cline Unleashed⛓️‍💥 (prev. Claude Dev)",
"description": "Autonomous coding agent right in your IDE, capable of creating/editing files, running commands, using the browser, and more with your permission every step of the way.",
"version": "2.1.6",
"version": "2.1.6-ratelimit",
"icon": "assets/icons/icon.png",
"galleryBanner": {
"color": "#617A91",
Expand Down Expand Up @@ -152,9 +152,9 @@
"typescript": "^5.4.5"
},
"dependencies": {
"@anthropic-ai/bedrock-sdk": "^0.10.2",
"@anthropic-ai/sdk": "^0.26.0",
"@anthropic-ai/vertex-sdk": "^0.4.1",
"@anthropic-ai/bedrock-sdk": "^0.11.2",
"@anthropic-ai/sdk": "^0.32.1",
"@anthropic-ai/vertex-sdk": "^0.5.2",
"@google/generative-ai": "^0.18.0",
"@types/clone-deep": "^4.0.4",
"@types/pdf-parse": "^1.1.4",
Expand Down
34 changes: 17 additions & 17 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 30 additions & 22 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,34 +40,40 @@ export class AnthropicHandler implements ApiHandler {
)
const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1
const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1

const outgoingMessages:Array<any> = messages.map((message, index) => {
if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
return {
...message,
content:
typeof message.content === "string"
? [
{
type: "text",
text: message.content,
cache_control: { type: "ephemeral" },
},
]
: message.content.map((content, contentIndex) =>
contentIndex === message.content.length - 1
? { ...content, cache_control: { type: "ephemeral" } }
: content
),
}
}
return message
});

const currentTime = new Date().toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' });
console.log(`[${currentTime}] 🤖 Sending ${outgoingMessages.length} messages to the prompt caching API`);

stream = await this.client.beta.promptCaching.messages.create(
{
model: modelId,
max_tokens: this.getModel().info.maxTokens || 8192,
temperature: 0,
system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it
messages: messages.map((message, index) => {
if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
return {
...message,
content:
typeof message.content === "string"
? [
{
type: "text",
text: message.content,
cache_control: { type: "ephemeral" },
},
]
: message.content.map((content, contentIndex) =>
contentIndex === message.content.length - 1
? { ...content, cache_control: { type: "ephemeral" } }
: content
),
}
}
return message
}),
messages: outgoingMessages,
// tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching)
// tool_choice: { type: "auto" },
// tools: tools,
Expand All @@ -93,6 +99,8 @@ export class AnthropicHandler implements ApiHandler {
break
}
default: {
console.log(`🤖 Sending ${messages.length} to the API`, messages)

stream = (await this.client.messages.create({
model: modelId,
max_tokens: this.getModel().info.maxTokens || 8192,
Expand Down
14 changes: 14 additions & 0 deletions src/core/Cline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -776,12 +776,26 @@ export class Cline {

const stream = this.api.createMessage(systemPrompt, this.apiConversationHistory)
const iterator = stream[Symbol.asyncIterator]()
let rateLimitsErrors = 0;
let rateLimitDelay = 30;

try {
// awaiting first chunk to see if it will throw an error
const firstChunk = await iterator.next()
yield firstChunk.value
} catch (error) {

if (error.status === 429 && rateLimitsErrors <= 3) {

const currentTime = new Date().toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' });
console.log(`[${currentTime}] 🤖 Rate limited, waiting ${rateLimitDelay} seconds to retry`, error);
await delay(rateLimitDelay * 1000);
rateLimitsErrors++;
rateLimitDelay *= 2;
yield* this.attemptApiRequest(previousApiReqIndex)
return
}

// note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely.
const { response } = await this.ask(
"api_req_failed",
Expand Down

0 comments on commit e43f451

Please sign in to comment.