From e43f45104dfdc66ef7808e0e1ae9179d4bb72f7b Mon Sep 17 00:00:00 2001 From: James White Date: Fri, 15 Nov 2024 16:27:12 -0800 Subject: [PATCH] Temporarily using simple rate limit error handler.. --- package-lock.json | 30 ++++++++++---------- package.json | 8 +++--- pnpm-lock.yaml | 34 +++++++++++----------- src/api/providers/anthropic.ts | 52 ++++++++++++++++++++-------------- src/core/Cline.ts | 14 +++++++++ 5 files changed, 80 insertions(+), 58 deletions(-) diff --git a/package-lock.json b/package-lock.json index beb0d23f7..9442abf5c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,9 +9,9 @@ "version": "2.1.6", "license": "Apache-2.0", "dependencies": { - "@anthropic-ai/bedrock-sdk": "^0.10.2", - "@anthropic-ai/sdk": "^0.26.0", - "@anthropic-ai/vertex-sdk": "^0.4.1", + "@anthropic-ai/bedrock-sdk": "^0.11.2", + "@anthropic-ai/sdk": "^0.32.1", + "@anthropic-ai/vertex-sdk": "^0.5.2", "@google/generative-ai": "^0.18.0", "@types/clone-deep": "^4.0.4", "@types/pdf-parse": "^1.1.4", @@ -59,9 +59,9 @@ } }, "node_modules/@anthropic-ai/bedrock-sdk": { - "version": "0.10.4", - "resolved": "https://registry.npmjs.org/@anthropic-ai/bedrock-sdk/-/bedrock-sdk-0.10.4.tgz", - "integrity": "sha512-szduEHbMli6XL934xrraYg5cFuKL/1oMyj/iZuEVjtddQ7eD5cXObzWobsv5mTLWijQmSzMfFD+JAUHDPHlQ/Q==", + "version": "0.11.2", + "resolved": "https://registry.npmjs.org/@anthropic-ai/bedrock-sdk/-/bedrock-sdk-0.11.2.tgz", + "integrity": "sha512-s6YumXjxXAxUW+yS/EUcN1nrKhd4HNZ4bGHxxu2jnU0/y7jYRQ4YHnTtCndDVSgxMq2IKMM0MhLAGKYfWPiing==", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": "^0", @@ -78,9 +78,9 @@ } }, "node_modules/@anthropic-ai/sdk": { - "version": "0.26.1", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.26.1.tgz", - "integrity": "sha512-HeMJP1bDFfQPQS3XTJAmfXkFBdZ88wvfkE05+vsoA9zGn5dHqEaHOPsqkazf/i0gXYg2XlLxxZrf6rUAarSqzw==", + "version": "0.32.1", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.32.1.tgz", + "integrity": "sha512-U9JwTrDvdQ9iWuABVsMLj8nJVwAyQz6QXvgLsVhryhCEPkLsbcP/MXxm+jYcAwLoV8ESbaTTjnD4kuAFa+Hyjg==", "license": "MIT", "dependencies": { "@types/node": "^18.11.18", @@ -108,9 +108,9 @@ "license": "MIT" }, "node_modules/@anthropic-ai/vertex-sdk": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.4.3.tgz", - "integrity": "sha512-2Uef0C5P2Hx+T88RnUSRA3u4aZqmqnrRSOb2N64ozgKPiSUPTM5JlggAq2b32yWMj5d3MLYa6spJXKMmHXOcoA==", + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.5.2.tgz", + "integrity": "sha512-FLf+OSuJ+opN/cWeQuAWBc7vRDemDwQ52TdCNmd8giEmc3S843Y9Ne3+JybJ9l+/eVkLIG/BZLXHASIckTtGnQ==", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": ">=0.14 <1", @@ -6484,9 +6484,9 @@ } }, "node_modules/es-abstract": { - "version": "1.23.4", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.4.tgz", - "integrity": "sha512-HR1gxH5OaiN7XH7uiWH0RLw0RcFySiSoW1ctxmD1ahTw3uGBtkmm/ng0tDU1OtYx5OK6EOL5Y6O21cDflG3Jcg==", + "version": "1.23.5", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.5.tgz", + "integrity": "sha512-vlmniQ0WNPwXqA0BnmwV3Ng7HxiGlh6r5U6JcTMNx8OilcAGqVJBHJcPjqOMaczU9fRuRK5Px2BdVyPRnKMMVQ==", "dev": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 23dd99a32..26af4d721 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "claude-dev-unleashed", "displayName": "Cline Unleashed⛓️‍💥 (prev. Claude Dev)", "description": "Autonomous coding agent right in your IDE, capable of creating/editing files, running commands, using the browser, and more with your permission every step of the way.", - "version": "2.1.6", + "version": "2.1.6-ratelimit", "icon": "assets/icons/icon.png", "galleryBanner": { "color": "#617A91", @@ -152,9 +152,9 @@ "typescript": "^5.4.5" }, "dependencies": { - "@anthropic-ai/bedrock-sdk": "^0.10.2", - "@anthropic-ai/sdk": "^0.26.0", - "@anthropic-ai/vertex-sdk": "^0.4.1", + "@anthropic-ai/bedrock-sdk": "^0.11.2", + "@anthropic-ai/sdk": "^0.32.1", + "@anthropic-ai/vertex-sdk": "^0.5.2", "@google/generative-ai": "^0.18.0", "@types/clone-deep": "^4.0.4", "@types/pdf-parse": "^1.1.4", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9664a6e50..56775aa46 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,14 +9,14 @@ importers: .: dependencies: '@anthropic-ai/bedrock-sdk': - specifier: ^0.10.2 - version: 0.10.4(@aws-sdk/client-sso-oidc@3.691.0(@aws-sdk/client-sts@3.691.0)) + specifier: ^0.11.2 + version: 0.11.2(@aws-sdk/client-sso-oidc@3.691.0(@aws-sdk/client-sts@3.691.0)) '@anthropic-ai/sdk': - specifier: ^0.26.0 - version: 0.26.1 + specifier: ^0.32.1 + version: 0.32.1 '@anthropic-ai/vertex-sdk': - specifier: ^0.4.1 - version: 0.4.3 + specifier: ^0.5.2 + version: 0.5.2 '@google/generative-ai': specifier: ^0.18.0 version: 0.18.0 @@ -138,14 +138,14 @@ importers: packages: - '@anthropic-ai/bedrock-sdk@0.10.4': - resolution: {integrity: sha512-szduEHbMli6XL934xrraYg5cFuKL/1oMyj/iZuEVjtddQ7eD5cXObzWobsv5mTLWijQmSzMfFD+JAUHDPHlQ/Q==} + '@anthropic-ai/bedrock-sdk@0.11.2': + resolution: {integrity: sha512-s6YumXjxXAxUW+yS/EUcN1nrKhd4HNZ4bGHxxu2jnU0/y7jYRQ4YHnTtCndDVSgxMq2IKMM0MhLAGKYfWPiing==} - '@anthropic-ai/sdk@0.26.1': - resolution: {integrity: sha512-HeMJP1bDFfQPQS3XTJAmfXkFBdZ88wvfkE05+vsoA9zGn5dHqEaHOPsqkazf/i0gXYg2XlLxxZrf6rUAarSqzw==} + '@anthropic-ai/sdk@0.32.1': + resolution: {integrity: sha512-U9JwTrDvdQ9iWuABVsMLj8nJVwAyQz6QXvgLsVhryhCEPkLsbcP/MXxm+jYcAwLoV8ESbaTTjnD4kuAFa+Hyjg==} - '@anthropic-ai/vertex-sdk@0.4.3': - resolution: {integrity: sha512-2Uef0C5P2Hx+T88RnUSRA3u4aZqmqnrRSOb2N64ozgKPiSUPTM5JlggAq2b32yWMj5d3MLYa6spJXKMmHXOcoA==} + '@anthropic-ai/vertex-sdk@0.5.2': + resolution: {integrity: sha512-FLf+OSuJ+opN/cWeQuAWBc7vRDemDwQ52TdCNmd8giEmc3S843Y9Ne3+JybJ9l+/eVkLIG/BZLXHASIckTtGnQ==} '@aws-crypto/crc32@3.0.0': resolution: {integrity: sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==} @@ -2750,9 +2750,9 @@ packages: snapshots: - '@anthropic-ai/bedrock-sdk@0.10.4(@aws-sdk/client-sso-oidc@3.691.0(@aws-sdk/client-sts@3.691.0))': + '@anthropic-ai/bedrock-sdk@0.11.2(@aws-sdk/client-sso-oidc@3.691.0(@aws-sdk/client-sts@3.691.0))': dependencies: - '@anthropic-ai/sdk': 0.26.1 + '@anthropic-ai/sdk': 0.32.1 '@aws-crypto/sha256-js': 4.0.0 '@aws-sdk/client-bedrock-runtime': 3.691.0 '@aws-sdk/credential-providers': 3.691.0(@aws-sdk/client-sso-oidc@3.691.0(@aws-sdk/client-sts@3.691.0)) @@ -2768,7 +2768,7 @@ snapshots: - aws-crt - encoding - '@anthropic-ai/sdk@0.26.1': + '@anthropic-ai/sdk@0.32.1': dependencies: '@types/node': 18.19.64 '@types/node-fetch': 2.6.12 @@ -2780,9 +2780,9 @@ snapshots: transitivePeerDependencies: - encoding - '@anthropic-ai/vertex-sdk@0.4.3': + '@anthropic-ai/vertex-sdk@0.5.2': dependencies: - '@anthropic-ai/sdk': 0.26.1 + '@anthropic-ai/sdk': 0.32.1 google-auth-library: 9.14.2 transitivePeerDependencies: - encoding diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 5014ec86a..8539877be 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -40,34 +40,40 @@ export class AnthropicHandler implements ApiHandler { ) const lastUserMsgIndex = userMsgIndices[userMsgIndices.length - 1] ?? -1 const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 + + const outgoingMessages:Array = messages.map((message, index) => { + if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { + return { + ...message, + content: + typeof message.content === "string" + ? [ + { + type: "text", + text: message.content, + cache_control: { type: "ephemeral" }, + }, + ] + : message.content.map((content, contentIndex) => + contentIndex === message.content.length - 1 + ? { ...content, cache_control: { type: "ephemeral" } } + : content + ), + } + } + return message + }); + + const currentTime = new Date().toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' }); + console.log(`[${currentTime}] 🤖 Sending ${outgoingMessages.length} messages to the prompt caching API`); + stream = await this.client.beta.promptCaching.messages.create( { model: modelId, max_tokens: this.getModel().info.maxTokens || 8192, temperature: 0, system: [{ text: systemPrompt, type: "text", cache_control: { type: "ephemeral" } }], // setting cache breakpoint for system prompt so new tasks can reuse it - messages: messages.map((message, index) => { - if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { - return { - ...message, - content: - typeof message.content === "string" - ? [ - { - type: "text", - text: message.content, - cache_control: { type: "ephemeral" }, - }, - ] - : message.content.map((content, contentIndex) => - contentIndex === message.content.length - 1 - ? { ...content, cache_control: { type: "ephemeral" } } - : content - ), - } - } - return message - }), + messages: outgoingMessages, // tools, // cache breakpoints go from tools > system > messages, and since tools dont change, we can just set the breakpoint at the end of system (this avoids having to set a breakpoint at the end of tools which by itself does not meet min requirements for haiku caching) // tool_choice: { type: "auto" }, // tools: tools, @@ -93,6 +99,8 @@ export class AnthropicHandler implements ApiHandler { break } default: { + console.log(`🤖 Sending ${messages.length} to the API`, messages) + stream = (await this.client.messages.create({ model: modelId, max_tokens: this.getModel().info.maxTokens || 8192, diff --git a/src/core/Cline.ts b/src/core/Cline.ts index a587dd90a..02a9ee9c9 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -776,12 +776,26 @@ export class Cline { const stream = this.api.createMessage(systemPrompt, this.apiConversationHistory) const iterator = stream[Symbol.asyncIterator]() + let rateLimitsErrors = 0; + let rateLimitDelay = 30; try { // awaiting first chunk to see if it will throw an error const firstChunk = await iterator.next() yield firstChunk.value } catch (error) { + + if (error.status === 429 && rateLimitsErrors <= 3) { + + const currentTime = new Date().toLocaleTimeString([], { hour: '2-digit', minute: '2-digit', second: '2-digit' }); + console.log(`[${currentTime}] 🤖 Rate limited, waiting ${rateLimitDelay} seconds to retry`, error); + await delay(rateLimitDelay * 1000); + rateLimitsErrors++; + rateLimitDelay *= 2; + yield* this.attemptApiRequest(previousApiReqIndex) + return + } + // note that this api_req_failed ask is unique in that we only present this option if the api hasn't streamed any content yet (ie it fails on the first chunk due), as it would allow them to hit a retry button. However if the api failed mid-stream, it could be in any arbitrary state where some tools may have executed, so that error is handled differently and requires cancelling the task entirely. const { response } = await this.ask( "api_req_failed",