diff --git a/apps/postgres-new/.env.example b/apps/postgres-new/.env.example index b8560b13..b4114483 100644 --- a/apps/postgres-new/.env.example +++ b/apps/postgres-new/.env.example @@ -14,3 +14,7 @@ KV_REST_API_TOKEN="local_token" NEXT_PUBLIC_LEGACY_DOMAIN=https://postgres.new NEXT_PUBLIC_CURRENT_DOMAIN=https://database.build REDIRECT_LEGACY_DOMAIN=false + +# Optional +#LOGFLARE_SOURCE="" +#LOGFLARE_API_KEY="" diff --git a/apps/postgres-new/app/api/chat/route.ts b/apps/postgres-new/app/api/chat/route.ts index b81e4af5..ac5784d0 100644 --- a/apps/postgres-new/app/api/chat/route.ts +++ b/apps/postgres-new/app/api/chat/route.ts @@ -5,6 +5,7 @@ import { ToolInvocation, convertToCoreMessages, streamText } from 'ai' import { codeBlock } from 'common-tags' import { convertToCoreTools, maxMessageContext, maxRowLimit, tools } from '~/lib/tools' import { createClient } from '~/utils/supabase/server' +import { logEvent } from '~/utils/telemetry' // Allow streaming responses up to 30 seconds export const maxDuration = 30 @@ -46,20 +47,31 @@ export async function POST(req: Request) { return new Response('Unauthorized', { status: 401 }) } - const { user } = data + const { + user: { id: userId }, + } = data - const { remaining: inputRemaining } = await inputTokenRateLimit.getRemaining(user.id) - const { remaining: outputRemaining } = await outputTokenRateLimit.getRemaining(user.id) + const { remaining: inputTokensRemaining } = await inputTokenRateLimit.getRemaining(userId) + const { remaining: outputTokensRemaining } = await outputTokenRateLimit.getRemaining(userId) - if (inputRemaining <= 0 || outputRemaining <= 0) { + const { messages, databaseId }: { messages: Message[]; databaseId: string } = await req.json() + + if (inputTokensRemaining <= 0 || outputTokensRemaining <= 0) { + logEvent('chat-rate-limit', { + databaseId, + userId, + inputTokensRemaining, + outputTokensRemaining, + }) return new Response('Rate limited', { status: 429 }) } - const { messages }: { messages: Message[] } = await req.json() - // Trim the message context sent to the LLM to mitigate token abuse const trimmedMessageContext = messages.slice(-maxMessageContext) + const coreMessages = convertToCoreMessages(trimmedMessageContext) + const coreTools = convertToCoreTools(tools) + const result = await streamText({ system: codeBlock` You are a helpful database assistant. Under the hood you have access to an in-browser Postgres database called PGlite (https://github.com/electric-sql/pglite). @@ -104,15 +116,38 @@ export async function POST(req: Request) { Feel free to suggest corrections for suspected typos. `, model: openai(chatModel), - messages: convertToCoreMessages(trimmedMessageContext), - tools: convertToCoreTools(tools), - async onFinish({ usage }) { - await inputTokenRateLimit.limit(user.id, { + messages: coreMessages, + tools: coreTools, + async onFinish({ usage, finishReason, toolCalls }) { + await inputTokenRateLimit.limit(userId, { rate: usage.promptTokens, }) - await outputTokenRateLimit.limit(user.id, { + await outputTokenRateLimit.limit(userId, { rate: usage.completionTokens, }) + + // The last message should always be an input message (user message or tool result) + const inputMessage = coreMessages.at(-1) + if (!inputMessage || (inputMessage.role !== 'user' && inputMessage.role !== 'tool')) { + return + } + + // `tool` role indicates a tool result, `user` role indicates a user message + const inputType = inputMessage.role === 'tool' ? 'tool-result' : 'user-message' + + // +1 for the assistant message just received + const messageCount = coreMessages.length + 1 + + logEvent('chat-inference', { + databaseId, + userId, + messageCount, + inputType, + inputTokens: usage.promptTokens, + outputTokens: usage.completionTokens, + finishReason, + toolCalls: toolCalls?.map((toolCall) => toolCall.toolName), + }) }, }) diff --git a/apps/postgres-new/components/workspace.tsx b/apps/postgres-new/components/workspace.tsx index e0a6dfa9..8ce6b4bc 100644 --- a/apps/postgres-new/components/workspace.tsx +++ b/apps/postgres-new/components/workspace.tsx @@ -71,6 +71,9 @@ export default function Workspace({ maxToolRoundtrips: 10, keepLastMessageOnError: true, onToolCall: onToolCall as any, // our `OnToolCall` type is more specific than `ai` SDK's + body: { + databaseId, + }, initialMessages: existingMessages && existingMessages.length > 0 ? existingMessages : initialMessages, async onFinish(message) { diff --git a/apps/postgres-new/utils/telemetry.ts b/apps/postgres-new/utils/telemetry.ts new file mode 100644 index 00000000..b093cf31 --- /dev/null +++ b/apps/postgres-new/utils/telemetry.ts @@ -0,0 +1,70 @@ +/** + * Event for an AI chat rate limit. Includes the + * remaining input and output tokens in the rate + * limit window (one of these will be <= 0). + */ +export type ChatRateLimitEvent = { + type: 'chat-rate-limit' + metadata: { + databaseId: string + userId: string + inputTokensRemaining: number + outputTokensRemaining: number + } +} + +/** + * Event for an AI chat inference request-response. + * Includes both input and output metadata. + */ +export type ChatInferenceEvent = { + type: 'chat-inference' + metadata: { + databaseId: string + userId: string + messageCount: number + inputType: 'user-message' | 'tool-result' + inputTokens: number + outputTokens: number + finishReason: + | 'stop' + | 'length' + | 'content-filter' + | 'tool-calls' + | 'error' + | 'other' + | 'unknown' + toolCalls?: string[] + } +} + +export type TelemetryEvent = ChatRateLimitEvent | ChatInferenceEvent + +export async function logEvent(type: E['type'], metadata: E['metadata']) { + if (!process.env.LOGFLARE_SOURCE || !process.env.LOGFLARE_API_KEY) { + if (process.env.DEBUG) { + console.log(type, metadata) + } + return + } + + const response = await fetch( + `https://api.logflare.app/logs?source=${process.env.LOGFLARE_SOURCE}`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-API-KEY': process.env.LOGFLARE_API_KEY, + }, + body: JSON.stringify({ + event_message: type, + metadata, + }), + } + ) + + if (!response.ok) { + const { error } = await response.json() + console.error('failed to send logflare event', error) + } +}