From 1516f71e28f853ccb3f067088f3d12055f12cf85 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 14:31:48 +1100 Subject: [PATCH 1/8] services --- packages/agent/src/index.ts | 4 +- packages/client-discord/src/index.ts | 4 +- packages/client-discord/src/messages.ts | 60 +++++++++++++------ packages/client-discord/src/voice.ts | 47 ++++++++++----- .../client-telegram/src/telegramClient.ts | 2 +- packages/client-twitter/src/base.ts | 2 +- packages/client-twitter/src/interactions.ts | 4 +- packages/core/src/generation.ts | 41 ++++++++----- packages/core/src/runtime.ts | 9 +-- packages/core/src/types.ts | 6 +- packages/plugin-node/src/services/video.ts | 13 ++-- 11 files changed, 125 insertions(+), 67 deletions(-) diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 725a65cfca..203f3df825 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -21,7 +21,7 @@ import Database from "better-sqlite3"; import fs from "fs"; import readline from "readline"; import yargs from "yargs"; -import { character } from "./character.ts"; +import blobert from "./blobert.ts"; export const wait = (minTime: number = 1000, maxTime: number = 3000) => { const waitTime = @@ -268,7 +268,7 @@ const startAgents = async () => { let charactersArg = args.characters || args.character; - let characters = [character]; + let characters = [blobert]; if (charactersArg) { characters = await loadCharacters(charactersArg); diff --git a/packages/client-discord/src/index.ts b/packages/client-discord/src/index.ts index 992d4e9255..57b70c22dd 100644 --- a/packages/client-discord/src/index.ts +++ b/packages/client-discord/src/index.ts @@ -25,8 +25,8 @@ import { VoiceManager } from "./voice.ts"; export class DiscordClient extends EventEmitter { apiToken: string; - private client: Client; - private runtime: IAgentRuntime; + client: Client; + runtime: IAgentRuntime; character: Character; private messageManager: MessageManager; private voiceManager: VoiceManager; diff --git a/packages/client-discord/src/messages.ts b/packages/client-discord/src/messages.ts index 4d1631664b..cab78a8ad2 100644 --- a/packages/client-discord/src/messages.ts +++ b/packages/client-discord/src/messages.ts @@ -515,10 +515,17 @@ export class MessageManager { } if (message.channel.type === ChannelType.GuildVoice) { // For voice channels, use text-to-speech - const audioStream = await this.runtime - .getService(ServiceType.SPEECH_GENERATION) - .getInstance() - .generate(this.runtime, content.text); + + const speechService = + this.runtime.getService( + ServiceType.SPEECH_GENERATION + ); + + const audioStream = await speechService.generate( + this.runtime, + content.text + ); + await this.voiceManager.playAudioStream( userId, audioStream @@ -603,10 +610,18 @@ export class MessageManager { if (message.channel.type === ChannelType.GuildVoice) { // For voice channels, use text-to-speech for the error message const errorMessage = "Sorry, I had a glitch. What was that?"; - const audioStream = await this.runtime - .getService(ServiceType.SPEECH_GENERATION) - .getInstance() - .generate(this.runtime, errorMessage); + + const speechService = this.runtime.getService( + ServiceType.SPEECH_GENERATION + ); + if (!speechService) { + throw new Error("Speech generation service not found"); + } + + const audioStream = await speechService.generate( + this.runtime, + errorMessage + ); await this.voiceManager.playAudioStream(userId, audioStream); } else { // For text channels, send the error message @@ -670,14 +685,17 @@ export class MessageManager { for (const url of urls) { if ( this.runtime - .getService(ServiceType.VIDEO) - .getInstance() + .getService(ServiceType.VIDEO) .isVideoUrl(url) ) { - const videoInfo = await this.runtime - .getService(ServiceType.VIDEO) - .getInstance() - .processVideo(url); + const videoService = this.runtime.getService( + ServiceType.VIDEO + ); + if (!videoService) { + throw new Error("Video service not found"); + } + const videoInfo = await videoService.processVideo(url); + attachments.push({ id: `youtube-${Date.now()}`, url: url, @@ -687,10 +705,16 @@ export class MessageManager { text: videoInfo.text, }); } else { - const { title, bodyContent } = await this.runtime - .getService(ServiceType.BROWSER) - .getInstance() - .getPageContent(url, this.runtime); + const browserService = this.runtime.getService( + ServiceType.BROWSER + ); + if (!browserService) { + throw new Error("Browser service not found"); + } + + const { title, bodyContent } = + await browserService.getPageContent(url, this.runtime); + const { title: newTitle, description } = await generateSummary( this.runtime, title + "\n" + bodyContent diff --git a/packages/client-discord/src/voice.ts b/packages/client-discord/src/voice.ts index e2abc0927e..6a28a1352a 100644 --- a/packages/client-discord/src/voice.ts +++ b/packages/client-discord/src/voice.ts @@ -64,6 +64,7 @@ export function getWavHeader( } import { messageCompletionFooter } from "@ai16z/eliza/src/parsing.ts"; +import { DiscordClient } from "."; const discordVoiceHandlerTemplate = `# Task: Generate conversational voice dialog for {{agentName}}. @@ -183,7 +184,7 @@ export class VoiceManager extends EventEmitter { { channel: BaseGuildVoiceChannel; monitor: AudioMonitor } > = new Map(); - constructor(client: any) { + constructor(client: DiscordClient) { super(); this.client = client.client; this.runtime = client.runtime; @@ -260,10 +261,10 @@ export class VoiceManager extends EventEmitter { member: GuildMember, channel: BaseGuildVoiceChannel ) { - const userId = member.id; - const userName = member.user.username; - const name = member.user.displayName; - const connection = getVoiceConnection(member.guild.id); + const userId = member?.id; + const userName = member?.user?.username; + const name = member?.user?.displayName; + const connection = getVoiceConnection(member?.guild?.id); const receiveStream = connection?.receiver.subscribe(userId, { autoDestroy: true, emitClose: true, @@ -368,13 +369,12 @@ export class VoiceManager extends EventEmitter { let lastChunkTime = Date.now(); let transcriptionStarted = false; let transcriptionText = ""; - console.log("new audio monitor for: ", userId); const monitor = new AudioMonitor( audioStream, 10000000, async (buffer) => { - console.log("buffer: ", buffer); + // console.log("buffer: ", buffer); const currentTime = Date.now(); const silenceDuration = currentTime - lastChunkTime; if (!buffer) { @@ -397,11 +397,14 @@ export class VoiceManager extends EventEmitter { const wavBuffer = await this.convertOpusToWav(inputBuffer); - console.log("starting transcription"); - const text = await this.runtime - .getService(ServiceType.TRANSCRIPTION) - .getInstance() - .transcribe(wavBuffer); + const transcriptionService = + this.runtime.getService( + ServiceType.TRANSCRIPTION + ); + + const text = + await transcriptionService.transcribe(wavBuffer); + console.log("transcribed text: ", text); transcriptionText += text; } catch (error) { @@ -539,10 +542,22 @@ export class VoiceManager extends EventEmitter { await this.runtime.updateRecentMessageState( state ); - const responseStream = await this.runtime - .getService(ServiceType.SPEECH_GENERATION) - .getInstance() - .generate(this.runtime, content.text); + + const speechService = + this.runtime.getService( + ServiceType.SPEECH_GENERATION + ); + if (!speechService) { + throw new Error( + "Speech generation service not found" + ); + } + + const responseStream = + await speechService.generate( + this.runtime, + content.text + ); if (responseStream) { await this.playAudioStream( diff --git a/packages/client-telegram/src/telegramClient.ts b/packages/client-telegram/src/telegramClient.ts index 462517c2a2..dd769c25f1 100644 --- a/packages/client-telegram/src/telegramClient.ts +++ b/packages/client-telegram/src/telegramClient.ts @@ -32,7 +32,7 @@ export class TelegramClient { this.bot.botInfo = botInfo; }); - console.log(`Bot username: @${this.bot.botInfo?.username}`); + elizaLogger.success(`Bot username: @${this.bot.botInfo?.username}`); this.messageManager.bot = this.bot; diff --git a/packages/client-twitter/src/base.ts b/packages/client-twitter/src/base.ts index 1a910c3769..54d4e4d9f0 100644 --- a/packages/client-twitter/src/base.ts +++ b/packages/client-twitter/src/base.ts @@ -220,7 +220,7 @@ export class ClientBase extends EventEmitter { this.runtime.getSetting("TWITTER_EMAIL"), this.runtime.getSetting("TWITTER_2FA_SECRET") ); - console.log("Logged in to Twitter"); + elizaLogger.log("Logged in to Twitter"); const cookies = await this.twitterClient.getCookies(); fs.writeFileSync( cookiesFilePath, diff --git a/packages/client-twitter/src/interactions.ts b/packages/client-twitter/src/interactions.ts index 75c4f8c2c8..f0a97aa73e 100644 --- a/packages/client-twitter/src/interactions.ts +++ b/packages/client-twitter/src/interactions.ts @@ -238,7 +238,7 @@ export class TwitterInteractionClient extends ClientBase { ); } - console.log("Thread: ", thread); + elizaLogger.debug("Thread: ", thread); const formattedConversation = thread .map( (tweet) => `@${tweet.username} (${new Date( @@ -253,7 +253,7 @@ export class TwitterInteractionClient extends ClientBase { ) .join("\n\n"); - console.log("formattedConversation: ", formattedConversation); + elizaLogger.debug("formattedConversation: ", formattedConversation); const formattedHomeTimeline = `# ${this.runtime.character.name}'s Home Timeline\n\n` + diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index 1b82fae9b9..ba008a0fd7 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -244,17 +244,23 @@ export async function generateText({ elizaLogger.debug( "Using local Llama model for text completion." ); - response = await runtime - .getService(ServiceType.TEXT_GENERATION) - .getInstance() - .queueTextCompletion( - context, - temperature, - _stop, - frequency_penalty, - presence_penalty, - max_response_length + const textGenerationService = + runtime.getService( + ServiceType.TEXT_GENERATION ); + + if (!textGenerationService) { + throw new Error("Text generation service not found"); + } + + response = await textGenerationService.queueTextCompletion( + context, + temperature, + _stop, + frequency_penalty, + presence_penalty, + max_response_length + ); elizaLogger.debug("Received response from local Llama model."); break; } @@ -852,16 +858,21 @@ export const generateCaption = async ( description: string; }> => { const { imageUrl } = data; - const resp = await runtime - .getService(ServiceType.IMAGE_DESCRIPTION) - .getInstance() - .describeImage(imageUrl); + const imageDescriptionService = + runtime.getService( + ServiceType.IMAGE_DESCRIPTION + ); + + if (!imageDescriptionService) { + throw new Error("Image description service not found"); + } + + const resp = await imageDescriptionService.describeImage(imageUrl); return { title: resp.title.trim(), description: resp.description.trim(), }; }; - /** * Configuration options for generating objects with a model. */ diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index dee40e7cfd..a5bfea5b4a 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -150,16 +150,17 @@ export class AgentRuntime implements IAgentRuntime { return this.memoryManagers.get(tableName) || null; } - getService(service: ServiceType): typeof Service | null { + getService(service: ServiceType): T | null { const serviceInstance = this.services.get(service); if (!serviceInstance) { elizaLogger.error(`Service ${service} not found`); return null; } - return serviceInstance as typeof Service; + return serviceInstance as T; } + registerService(service: Service): void { - const serviceType = (service as typeof Service).serviceType; + const serviceType = service.serviceType; elizaLogger.log("Registering service:", serviceType); if (this.services.has(serviceType)) { elizaLogger.warn( @@ -168,7 +169,7 @@ export class AgentRuntime implements IAgentRuntime { return; } - this.services.set((service as typeof Service).serviceType, service); + this.services.set(serviceType, service); } /** diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index b4fefaa114..f5b71e9bd2 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -524,7 +524,7 @@ export interface IMemoryManager { export abstract class Service { private static instance: Service | null = null; - static serviceType: ServiceType; + serviceType: ServiceType; public static getInstance(): T { if (!Service.instance) { @@ -556,7 +556,7 @@ export interface IAgentRuntime { getMemoryManager(name: string): IMemoryManager | null; - getService(service: string): typeof Service | null; + getService(service: ServiceType): T | null; registerService(service: Service): void; @@ -608,6 +608,7 @@ export interface IImageDescriptionService extends Service { } export interface ITranscriptionService extends Service { + getInstance(): ITranscriptionService; transcribeAttachment(audioBuffer: ArrayBuffer): Promise; transcribeAttachmentLocally( audioBuffer: ArrayBuffer @@ -617,6 +618,7 @@ export interface ITranscriptionService extends Service { } export interface IVideoService extends Service { + getInstance(): IVideoService; isVideoUrl(url: string): boolean; processVideo(url: string): Promise; fetchVideoInfo(url: string): Promise; diff --git a/packages/plugin-node/src/services/video.ts b/packages/plugin-node/src/services/video.ts index a8bee25ac7..c4c27ea383 100644 --- a/packages/plugin-node/src/services/video.ts +++ b/packages/plugin-node/src/services/video.ts @@ -327,10 +327,15 @@ export class VideoService extends Service { console.log("Starting transcription..."); const startTime = Date.now(); - const transcript = await runtime - .getService(ServiceType.TRANSCRIPTION) - .getInstance() - .transcribe(audioBuffer); + const transcriptionService = runtime.getService( + ServiceType.TRANSCRIPTION + ); + if (!transcriptionService) { + throw new Error("Transcription service not found"); + } + + const transcript = await transcriptionService.transcribe(audioBuffer); + const endTime = Date.now(); console.log( `Transcription completed in ${(endTime - startTime) / 1000} seconds` From eb72f01b9ae4244df78789a1f5fa73d664b58f56 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 14:34:23 +1100 Subject: [PATCH 2/8] remove character --- packages/agent/src/index.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 203f3df825..5ecb0d5800 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -5,13 +5,12 @@ import { DiscordClientInterface } from "@ai16z/client-discord"; import { AutoClientInterface } from "@ai16z/client-auto"; import { TelegramClientInterface } from "@ai16z/client-telegram"; import { TwitterClientInterface } from "@ai16z/client-twitter"; -import { defaultCharacter } from "@ai16z/eliza"; -import { AgentRuntime } from "@ai16z/eliza"; -import { settings } from "@ai16z/eliza"; import { + defaultCharacter, + AgentRuntime, + settings, Character, IAgentRuntime, - IDatabaseAdapter, ModelProviderName, } from "@ai16z/eliza"; import { bootstrapPlugin } from "@ai16z/plugin-bootstrap"; @@ -21,7 +20,7 @@ import Database from "better-sqlite3"; import fs from "fs"; import readline from "readline"; import yargs from "yargs"; -import blobert from "./blobert.ts"; +import { character } from "./character.ts"; export const wait = (minTime: number = 1000, maxTime: number = 3000) => { const waitTime = @@ -268,7 +267,7 @@ const startAgents = async () => { let charactersArg = args.characters || args.character; - let characters = [blobert]; + let characters = [character]; if (charactersArg) { characters = await loadCharacters(charactersArg); From 780f483b6f2abeb8a2d9c4bdb6d89217dc995d1f Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 14:51:55 +1100 Subject: [PATCH 3/8] fix logging --- packages/agent/src/index.ts | 7 ++++++- packages/client-direct/src/index.ts | 2 +- packages/client-discord/src/index.ts | 2 +- packages/client-discord/src/voice.ts | 8 ++++---- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 5ecb0d5800..15719c89cd 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -12,6 +12,7 @@ import { Character, IAgentRuntime, ModelProviderName, + elizaLogger, } from "@ai16z/eliza"; import { bootstrapPlugin } from "@ai16z/plugin-bootstrap"; import { solanaPlugin } from "@ai16z/plugin-solana"; @@ -218,7 +219,11 @@ export async function createAgent( db: any, token: string ) { - console.log("Creating runtime for character", character.name); + elizaLogger.success( + elizaLogger.successesTitle, + "Creating runtime for character", + character.name + ); return new AgentRuntime({ databaseAdapter: db, token, diff --git a/packages/client-direct/src/index.ts b/packages/client-direct/src/index.ts index 1ca8a97658..123600bf55 100644 --- a/packages/client-direct/src/index.ts +++ b/packages/client-direct/src/index.ts @@ -61,7 +61,7 @@ export class DirectClient { private agents: Map; constructor() { - console.log("DirectClient constructor"); + elizaLogger.log("DirectClient constructor"); this.app = express(); this.app.use(cors()); this.agents = new Map(); diff --git a/packages/client-discord/src/index.ts b/packages/client-discord/src/index.ts index 57b70c22dd..0b27015b65 100644 --- a/packages/client-discord/src/index.ts +++ b/packages/client-discord/src/index.ts @@ -193,7 +193,7 @@ export class DiscordClient extends EventEmitter { } async handleReactionRemove(reaction: MessageReaction, user: User) { - console.log("Reaction removed"); + elizaLogger.log("Reaction removed"); // if (user.bot) return; let emoji = reaction.emoji.name; diff --git a/packages/client-discord/src/voice.ts b/packages/client-discord/src/voice.ts index 6a28a1352a..3ab11c9cdd 100644 --- a/packages/client-discord/src/voice.ts +++ b/packages/client-discord/src/voice.ts @@ -20,7 +20,7 @@ import { import EventEmitter from "events"; import prism from "prism-media"; import { Readable, pipeline } from "stream"; -import { composeContext } from "@ai16z/eliza"; +import { composeContext, elizaLogger } from "@ai16z/eliza"; import { generateMessageResponse } from "@ai16z/eliza"; import { embeddingZeroVector } from "@ai16z/eliza"; import { @@ -121,7 +121,7 @@ export class AudioMonitor { } }); this.readable.on("end", () => { - console.log("AudioMonitor ended"); + elizaLogger.log("AudioMonitor ended"); this.ended = true; if (this.lastFlagged < 0) return; callback(this.getBufferFromStart()); @@ -129,13 +129,13 @@ export class AudioMonitor { }); this.readable.on("speakingStopped", () => { if (this.ended) return; - console.log("Speaking stopped"); + elizaLogger.log("Speaking stopped"); if (this.lastFlagged < 0) return; callback(this.getBufferFromStart()); }); this.readable.on("speakingStarted", () => { if (this.ended) return; - console.log("Speaking started"); + elizaLogger.log("Speaking started"); this.reset(); }); } From 97cd09bda9a077f5a69d53dd2050fc41bbfd4a48 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 14:52:12 +1100 Subject: [PATCH 4/8] linting --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a9abbd61e9..15e68e36bc 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ ## 🌍 README Translations + [中文说明](./README_CN.md) | [日本語の説明](./README_JA.md) | [한국어 설명](./README_KOR.md) | [Français](./README_FR.md) | [Português](./README_PTBR.md) | [Türkçe](./README_TR.md) | [Русский](./README_RU.md) | [Español](./README_ES.md) ## ✨ Features From 6b0baadb30618fc3b76b1124528abaa3fb55a285 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 21:59:01 +1100 Subject: [PATCH 5/8] systems work --- packages/agent/src/index.ts | 10 ++--- .../src/actions/download_media.ts | 4 +- packages/client-discord/src/attachments.ts | 45 ++++++++++--------- packages/client-discord/src/voice.ts | 4 ++ .../client-telegram/src/messageManager.ts | 5 +-- packages/client-twitter/src/base.ts | 2 +- packages/client-twitter/src/index.ts | 6 +-- packages/client-twitter/src/post.ts | 6 +-- packages/client-twitter/src/search.ts | 6 ++- packages/core/src/generation.ts | 14 +++--- packages/core/src/tests/goals.test.ts | 17 ++++--- packages/core/src/types.ts | 18 ++++++-- packages/plugin-node/src/index.ts | 39 ++++++++-------- packages/plugin-node/src/services/image.ts | 32 +++++++------ packages/plugin-node/src/services/index.ts | 17 +++++++ packages/plugin-node/src/services/llama.ts | 5 ++- packages/plugin-node/src/services/pdf.ts | 4 +- packages/plugin-node/src/services/speech.ts | 6 ++- .../plugin-node/src/services/transcription.ts | 4 +- packages/plugin-node/src/services/video.ts | 8 ++-- pnpm-lock.yaml | 18 ++++---- 21 files changed, 159 insertions(+), 111 deletions(-) create mode 100644 packages/plugin-node/src/services/index.ts diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 15719c89cd..76fce33992 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -21,7 +21,7 @@ import Database from "better-sqlite3"; import fs from "fs"; import readline from "readline"; import yargs from "yargs"; -import { character } from "./character.ts"; +import blobert from "./blobert.ts"; export const wait = (minTime: number = 1000, maxTime: number = 3000) => { const waitTime = @@ -272,7 +272,7 @@ const startAgents = async () => { let charactersArg = args.characters || args.character; - let characters = [character]; + let characters = [blobert]; if (charactersArg) { characters = await loadCharacters(charactersArg); @@ -283,7 +283,7 @@ const startAgents = async () => { await startAgent(character, directClient); } } catch (error) { - console.error("Error starting agents:", error); + elizaLogger.error("Error starting agents:", error); } function chat() { @@ -296,12 +296,12 @@ const startAgents = async () => { }); } - console.log("Chat started. Type 'exit' to quit."); + elizaLogger.log("Chat started. Type 'exit' to quit."); chat(); }; startAgents().catch((error) => { - console.error("Unhandled error in startAgents:", error); + elizaLogger.error("Unhandled error in startAgents:", error); process.exit(1); // Exit the process after logging }); diff --git a/packages/client-discord/src/actions/download_media.ts b/packages/client-discord/src/actions/download_media.ts index 0b535139cd..8c68ea4467 100644 --- a/packages/client-discord/src/actions/download_media.ts +++ b/packages/client-discord/src/actions/download_media.ts @@ -86,8 +86,8 @@ export default { callback: HandlerCallback ) => { const videoService = runtime - .getService(ServiceType.VIDEO) - .getInstance(); + .getService(ServiceType.VIDEO) + .getInstance(); if (!state) { state = (await runtime.composeState(message)) as State; } diff --git a/packages/client-discord/src/attachments.ts b/packages/client-discord/src/attachments.ts index ffe67bea15..7746beda4e 100644 --- a/packages/client-discord/src/attachments.ts +++ b/packages/client-discord/src/attachments.ts @@ -104,8 +104,7 @@ export class AttachmentManager { } else if ( attachment.contentType?.startsWith("video/") || this.runtime - .getService(ServiceType.VIDEO) - .getInstance() + .getService(ServiceType.VIDEO) .isVideoUrl(attachment.url) ) { media = await this.processVideoAttachment(attachment); @@ -137,10 +136,16 @@ export class AttachmentManager { throw new Error("Unsupported audio/video format"); } - const transcription = await this.runtime - .getService(ServiceType.TRANSCRIPTION) - .getInstance() - .transcribeAttachment(audioBuffer); + const transcriptionService = + this.runtime.getService( + ServiceType.TRANSCRIPTION + ); + if (!transcriptionService) { + throw new Error("Transcription service not found"); + } + + const transcription = + await transcriptionService.transcribeAttachment(audioBuffer); const { title, description } = await generateSummary( this.runtime, transcription @@ -220,8 +225,7 @@ export class AttachmentManager { const response = await fetch(attachment.url); const pdfBuffer = await response.arrayBuffer(); const text = await this.runtime - .getService(ServiceType.PDF) - .getInstance() + .getService(ServiceType.PDF) .convertPdfToText(Buffer.from(pdfBuffer)); const { title, description } = await generateSummary( this.runtime, @@ -289,8 +293,9 @@ export class AttachmentManager { ): Promise { try { const { description, title } = await this.runtime - .getService(ServiceType.IMAGE_DESCRIPTION) - .getInstance() + .getService( + ServiceType.IMAGE_DESCRIPTION + ) .describeImage(attachment.url); return { id: attachment.id, @@ -322,16 +327,16 @@ export class AttachmentManager { private async processVideoAttachment( attachment: Attachment ): Promise { - if ( - this.runtime - .getService(ServiceType.VIDEO) - .getInstance() - .isVideoUrl(attachment.url) - ) { - const videoInfo = await this.runtime - .getService(ServiceType.VIDEO) - .getInstance() - .processVideo(attachment.url); + const videoService = this.runtime.getService( + ServiceType.VIDEO + ); + + if (!videoService) { + throw new Error("Video service not found"); + } + + if (videoService.isVideoUrl(attachment.url)) { + const videoInfo = await videoService.processVideo(attachment.url); return { id: attachment.id, url: attachment.url, diff --git a/packages/client-discord/src/voice.ts b/packages/client-discord/src/voice.ts index 3ab11c9cdd..cf03ac45e5 100644 --- a/packages/client-discord/src/voice.ts +++ b/packages/client-discord/src/voice.ts @@ -401,6 +401,10 @@ export class VoiceManager extends EventEmitter { this.runtime.getService( ServiceType.TRANSCRIPTION ); + console.log( + "transcriptionService: ", + transcriptionService + ); const text = await transcriptionService.transcribe(wavBuffer); diff --git a/packages/client-telegram/src/messageManager.ts b/packages/client-telegram/src/messageManager.ts index ca6ceb0494..3b3f53d3bf 100644 --- a/packages/client-telegram/src/messageManager.ts +++ b/packages/client-telegram/src/messageManager.ts @@ -178,9 +178,8 @@ export class MessageManager { } if (imageUrl) { - const { title, description } = await this.imageService - .getInstance() - .describeImage(imageUrl); + const { title, description } = + await this.imageService.describeImage(imageUrl); const fullDescription = `[Image: ${title}\n${description}]`; return { description: fullDescription }; } diff --git a/packages/client-twitter/src/base.ts b/packages/client-twitter/src/base.ts index 54d4e4d9f0..96f3f29a17 100644 --- a/packages/client-twitter/src/base.ts +++ b/packages/client-twitter/src/base.ts @@ -270,7 +270,7 @@ export class ClientBase extends EventEmitter { console.error("Failed to get user ID"); return; } - console.log("Twitter user ID:", userId); + elizaLogger.log("Twitter user ID:", userId); this.twitterUserId = userId; // Initialize Twitter profile diff --git a/packages/client-twitter/src/index.ts b/packages/client-twitter/src/index.ts index 6a3097c524..742b5ac34d 100644 --- a/packages/client-twitter/src/index.ts +++ b/packages/client-twitter/src/index.ts @@ -1,7 +1,7 @@ import { TwitterPostClient } from "./post.ts"; import { TwitterSearchClient } from "./search.ts"; import { TwitterInteractionClient } from "./interactions.ts"; -import { IAgentRuntime, Client } from "@ai16z/eliza"; +import { IAgentRuntime, Client, elizaLogger } from "@ai16z/eliza"; class TwitterAllClient { post: TwitterPostClient; @@ -19,11 +19,11 @@ class TwitterAllClient { export const TwitterClientInterface: Client = { async start(runtime: IAgentRuntime) { - console.log("Twitter client started"); + elizaLogger.log("Twitter client started"); return new TwitterAllClient(runtime); }, async stop(runtime: IAgentRuntime) { - console.warn("Twitter client does not support stopping yet"); + elizaLogger.warn("Twitter client does not support stopping yet"); }, }; diff --git a/packages/client-twitter/src/post.ts b/packages/client-twitter/src/post.ts index d74d34ef2e..02778dae2d 100644 --- a/packages/client-twitter/src/post.ts +++ b/packages/client-twitter/src/post.ts @@ -1,6 +1,6 @@ import { Tweet } from "agent-twitter-client"; import fs from "fs"; -import { composeContext } from "@ai16z/eliza"; +import { composeContext, elizaLogger } from "@ai16z/eliza"; import { generateText } from "@ai16z/eliza"; import { embeddingZeroVector } from "@ai16z/eliza"; import { IAgentRuntime, ModelClass } from "@ai16z/eliza"; @@ -76,7 +76,7 @@ export class TwitterPostClient extends ClientBase { generateNewTweetLoop(); // Set up next iteration }, delay); - console.log(`Next tweet scheduled in ${randomMinutes} minutes`); + elizaLogger.log(`Next tweet scheduled in ${randomMinutes} minutes`); }; if (postImmediately) { @@ -92,7 +92,7 @@ export class TwitterPostClient extends ClientBase { } private async generateNewTweet() { - console.log("Generating new tweet"); + elizaLogger.log("Generating new tweet"); try { await this.runtime.ensureUserExists( this.runtime.agentId, diff --git a/packages/client-twitter/src/search.ts b/packages/client-twitter/src/search.ts index 3ece65fa63..38d8e2d2ec 100644 --- a/packages/client-twitter/src/search.ts +++ b/packages/client-twitter/src/search.ts @@ -234,8 +234,10 @@ export class TwitterSearchClient extends ClientBase { const imageDescriptions = []; for (const photo of selectedTweet.photos) { const description = await this.runtime - .getService(ServiceType.IMAGE_DESCRIPTION) - .getInstance() + .getService( + ServiceType.IMAGE_DESCRIPTION + ) + .getInstance() .describeImage(photo.url); imageDescriptions.push(description); } diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index ba008a0fd7..90e687ff83 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -244,10 +244,11 @@ export async function generateText({ elizaLogger.debug( "Using local Llama model for text completion." ); - const textGenerationService = - runtime.getService( + const textGenerationService = runtime + .getService( ServiceType.TEXT_GENERATION - ); + ) + .getInstance(); if (!textGenerationService) { throw new Error("Text generation service not found"); @@ -858,10 +859,9 @@ export const generateCaption = async ( description: string; }> => { const { imageUrl } = data; - const imageDescriptionService = - runtime.getService( - ServiceType.IMAGE_DESCRIPTION - ); + const imageDescriptionService = runtime + .getService(ServiceType.IMAGE_DESCRIPTION) + .getInstance(); if (!imageDescriptionService) { throw new Error("Image description service not found"); diff --git a/packages/core/src/tests/goals.test.ts b/packages/core/src/tests/goals.test.ts index cd845d1220..d931f77663 100644 --- a/packages/core/src/tests/goals.test.ts +++ b/packages/core/src/tests/goals.test.ts @@ -15,18 +15,19 @@ import { Memory, ModelProviderName, Service, + ServiceType, State, } from "../types"; // Mock the database adapter -const mockDatabaseAdapter = { +export const mockDatabaseAdapter = { getGoals: jest.fn(), updateGoal: jest.fn(), createGoal: jest.fn(), }; - +const services = new Map(); // Mock the runtime -const mockRuntime: IAgentRuntime = { +export const mockRuntime: IAgentRuntime = { databaseAdapter: mockDatabaseAdapter as any, agentId: "qweqew-qweqwe-qweqwe-qweqwe-qweeqw", serverUrl: "", @@ -87,8 +88,8 @@ const mockRuntime: IAgentRuntime = { getMemoryManager: function (_name: string): IMemoryManager | null { throw new Error("Function not implemented."); }, - registerService: function (_service: Service): void { - throw new Error("Function not implemented."); + registerService: function (service: Service): void { + services.set(service.serviceType, service); }, getSetting: function (_key: string): string | null { throw new Error("Function not implemented."); @@ -155,8 +156,10 @@ const mockRuntime: IAgentRuntime = { updateRecentMessageState: function (_state: State): Promise { throw new Error("Function not implemented."); }, - getService: function (_service: string): typeof Service | null { - throw new Error("Function not implemented."); + getService: function ( + serviceType: ServiceType + ): T | null { + return (services.get(serviceType) as T) || null; }, }; diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index f5b71e9bd2..1c89a76c28 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -524,15 +524,24 @@ export interface IMemoryManager { export abstract class Service { private static instance: Service | null = null; - serviceType: ServiceType; + + static get serviceType(): ServiceType { + throw new Error("Service must implement static serviceType getter"); + } public static getInstance(): T { if (!Service.instance) { - // Use this.prototype.constructor to instantiate the concrete class Service.instance = new (this as any)(); } return Service.instance as T; } + + get serviceType(): ServiceType { + return (this.constructor as typeof Service).serviceType; + } + + // Add abstract initialize method that must be implemented by derived classes + abstract initialize(runtime: IAgentRuntime): Promise; } export interface IAgentRuntime { @@ -601,7 +610,6 @@ export interface IAgentRuntime { export interface IImageDescriptionService extends Service { getInstance(): IImageDescriptionService; - initialize(modelId?: string | null, device?: string | null): Promise; describeImage( imageUrl: string ): Promise<{ title: string; description: string }>; @@ -648,7 +656,7 @@ export interface ITextGenerationService extends Service { } export interface IBrowserService extends Service { - initialize(): Promise; + getInstance(): IBrowserService; closeBrowser(): Promise; getPageContent( url: string, @@ -657,10 +665,12 @@ export interface IBrowserService extends Service { } export interface ISpeechService extends Service { + getInstance(): ISpeechService; generate(runtime: IAgentRuntime, text: string): Promise; } export interface IPdfService extends Service { + getInstance(): IPdfService; convertPdfToText(pdfBuffer: Buffer): Promise; } diff --git a/packages/plugin-node/src/index.ts b/packages/plugin-node/src/index.ts index 678db6460e..6969a05592 100644 --- a/packages/plugin-node/src/index.ts +++ b/packages/plugin-node/src/index.ts @@ -1,31 +1,28 @@ -export * from "./services/browser.ts"; -export * from "./services/image.ts"; -export * from "./services/llama.ts"; -export * from "./services/pdf.ts"; -export * from "./services/speech.ts"; -export * from "./services/transcription.ts"; -export * from "./services/video.ts"; +export * from "./services/index.ts"; import { Plugin } from "@ai16z/eliza"; -import { BrowserService } from "./services/browser.ts"; -import { ImageDescriptionService } from "./services/image.ts"; -import { LlamaService } from "./services/llama.ts"; -import { PdfService } from "./services/pdf.ts"; -import { SpeechService } from "./services/speech.ts"; -import { TranscriptionService } from "./services/transcription.ts"; -import { VideoService } from "./services/video.ts"; + +import { + BrowserService, + ImageDescriptionService, + LlamaService, + PdfService, + SpeechService, + TranscriptionService, + VideoService, +} from "./services/index.ts"; export const nodePlugin: Plugin = { name: "default", description: "Default plugin, with basic actions and evaluators", services: [ - BrowserService, - ImageDescriptionService, - LlamaService, - PdfService, - SpeechService, - TranscriptionService, - VideoService, + new BrowserService(), + new ImageDescriptionService(), + new LlamaService(), + new PdfService(), + new SpeechService(), + new TranscriptionService(), + new VideoService(), ], }; diff --git a/packages/plugin-node/src/services/image.ts b/packages/plugin-node/src/services/image.ts index e4430c651a..ff0ec46343 100644 --- a/packages/plugin-node/src/services/image.ts +++ b/packages/plugin-node/src/services/image.ts @@ -25,6 +25,7 @@ export class ImageDescriptionService extends Service { private processor: Florence2Processor | null = null; private tokenizer: PreTrainedTokenizer | null = null; private initialized: boolean = false; + private runtime: IAgentRuntime | null = null; static serviceType: ServiceType = ServiceType.IMAGE_DESCRIPTION; @@ -35,14 +36,7 @@ export class ImageDescriptionService extends Service { super(); } - async initialize( - device: string | null = null, - runtime: IAgentRuntime - ): Promise { - if (this.initialized) { - return; - } - + async initialize(runtime: IAgentRuntime): Promise { const model = models[runtime?.character?.modelProvider]; if (model === models[ModelProviderName.LLAMALOCAL]) { @@ -86,14 +80,15 @@ export class ImageDescriptionService extends Service { } async describeImage( - imageUrl: string, - device?: string, - runtime?: IAgentRuntime + imageUrl: string ): Promise<{ title: string; description: string }> { - this.initialize(device, runtime); - if (this.device === "cloud") { - return this.recognizeWithOpenAI(imageUrl, runtime); + if (!this.runtime) { + throw new Error( + "Runtime is required for OpenAI image recognition" + ); + } + return this.recognizeWithOpenAI(imageUrl); } else { this.queue.push(imageUrl); this.processQueue(); @@ -113,9 +108,12 @@ export class ImageDescriptionService extends Service { } private async recognizeWithOpenAI( - imageUrl: string, - runtime + imageUrl: string ): Promise<{ title: string; description: string }> { + if (!this.runtime) { + throw new Error("Runtime is required for OpenAI image recognition"); + } + const isGif = imageUrl.toLowerCase().endsWith(".gif"); let imageData: Buffer | null = null; @@ -147,7 +145,7 @@ export class ImageDescriptionService extends Service { imageData, prompt, isGif, - runtime + this.runtime ); const title = text.split("\n")[0]; const description = text.split("\n").slice(1).join("\n"); diff --git a/packages/plugin-node/src/services/index.ts b/packages/plugin-node/src/services/index.ts new file mode 100644 index 0000000000..95ed3e04ae --- /dev/null +++ b/packages/plugin-node/src/services/index.ts @@ -0,0 +1,17 @@ +import { BrowserService } from "./browser.ts"; +import { ImageDescriptionService } from "./image.ts"; +import { LlamaService } from "./llama.ts"; +import { PdfService } from "./pdf.ts"; +import { SpeechService } from "./speech.ts"; +import { TranscriptionService } from "./transcription.ts"; +import { VideoService } from "./video.ts"; + +export { + BrowserService, + ImageDescriptionService, + LlamaService, + PdfService, + SpeechService, + TranscriptionService, + VideoService, +}; diff --git a/packages/plugin-node/src/services/llama.ts b/packages/plugin-node/src/services/llama.ts index f8bc8be414..720972278f 100644 --- a/packages/plugin-node/src/services/llama.ts +++ b/packages/plugin-node/src/services/llama.ts @@ -1,4 +1,4 @@ -import { elizaLogger, ServiceType } from "@ai16z/eliza"; +import { elizaLogger, IAgentRuntime, ServiceType } from "@ai16z/eliza"; import { Service } from "@ai16z/eliza"; import fs from "fs"; import https from "https"; @@ -180,6 +180,9 @@ export class LlamaService extends Service { const modelName = "model.gguf"; this.modelPath = path.join(__dirname, modelName); } + + async initialize(runtime: IAgentRuntime): Promise {} + private async ensureInitialized() { if (!this.modelInitialized) { await this.initializeModel(); diff --git a/packages/plugin-node/src/services/pdf.ts b/packages/plugin-node/src/services/pdf.ts index ad899672fc..b113881448 100644 --- a/packages/plugin-node/src/services/pdf.ts +++ b/packages/plugin-node/src/services/pdf.ts @@ -1,4 +1,4 @@ -import { Service, ServiceType } from "@ai16z/eliza"; +import { IAgentRuntime, Service, ServiceType } from "@ai16z/eliza"; import { getDocument, PDFDocumentProxy } from "pdfjs-dist"; import { TextItem, TextMarkedContent } from "pdfjs-dist/types/src/display/api"; @@ -9,6 +9,8 @@ export class PdfService extends Service { super(); } + async initialize(runtime: IAgentRuntime): Promise {} + async convertPdfToText(pdfBuffer: Buffer): Promise { // Convert Buffer to Uint8Array const uint8Array = new Uint8Array(pdfBuffer); diff --git a/packages/plugin-node/src/services/speech.ts b/packages/plugin-node/src/services/speech.ts index 66e45a81ed..e5a0beed45 100644 --- a/packages/plugin-node/src/services/speech.ts +++ b/packages/plugin-node/src/services/speech.ts @@ -3,6 +3,7 @@ import { IAgentRuntime, ISpeechService, ServiceType } from "@ai16z/eliza"; import { getWavHeader } from "./audioUtils.ts"; import { synthesize } from "../vendor/vits.ts"; import { Service } from "@ai16z/eliza"; + function prependWavHeader( readable: Readable, audioLength: number, @@ -107,8 +108,11 @@ async function textToSpeech(runtime: IAgentRuntime, text: string) { } } -export class SpeechService extends Service implements ISpeechService { +export class SpeechService extends Service { static serviceType: ServiceType = ServiceType.SPEECH_GENERATION; + + async initialize(runtime: IAgentRuntime): Promise {} + async generate(runtime: IAgentRuntime, text: string): Promise { // check for elevenlabs API key if (runtime.getSetting("ELEVENLABS_XI_API_KEY")) { diff --git a/packages/plugin-node/src/services/transcription.ts b/packages/plugin-node/src/services/transcription.ts index 0360dcec91..dd2da54949 100644 --- a/packages/plugin-node/src/services/transcription.ts +++ b/packages/plugin-node/src/services/transcription.ts @@ -1,4 +1,4 @@ -import { settings } from "@ai16z/eliza"; +import { IAgentRuntime, settings } from "@ai16z/eliza"; import { Service, ServiceType } from "@ai16z/eliza"; import { exec } from "child_process"; import { File } from "formdata-node"; @@ -27,6 +27,8 @@ export class TranscriptionService extends Service { private queue: { audioBuffer: ArrayBuffer; resolve: Function }[] = []; private processing: boolean = false; + async initialize(runtime: IAgentRuntime): Promise {} + constructor() { super(); const rootDir = path.resolve(__dirname, "../../"); diff --git a/packages/plugin-node/src/services/video.ts b/packages/plugin-node/src/services/video.ts index c4c27ea383..bbc16b0973 100644 --- a/packages/plugin-node/src/services/video.ts +++ b/packages/plugin-node/src/services/video.ts @@ -22,6 +22,8 @@ export class VideoService extends Service { this.ensureCacheDirectoryExists(); } + async initialize(runtime: IAgentRuntime): Promise {} + private ensureCacheDirectoryExists() { if (!fs.existsSync(this.CONTENT_CACHE_DIR)) { fs.mkdirSync(this.CONTENT_CACHE_DIR); @@ -327,9 +329,9 @@ export class VideoService extends Service { console.log("Starting transcription..."); const startTime = Date.now(); - const transcriptionService = runtime.getService( - ServiceType.TRANSCRIPTION - ); + const transcriptionService = runtime + .getService(ServiceType.TRANSCRIPTION) + .getInstance(); if (!transcriptionService) { throw new Error("Transcription service not found"); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a97848a2ed..692f2dd449 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -3050,8 +3050,8 @@ packages: cpu: [x64] os: [win32] - '@octokit/app@15.1.0': - resolution: {integrity: sha512-TkBr7QgOmE6ORxvIAhDbZsqPkF7RSqTY4pLTtUQCvr6dTXqvi2fFo46q3h1lxlk/sGMQjqyZ0kEahkD/NyzOHg==} + '@octokit/app@15.1.1': + resolution: {integrity: sha512-fk8xrCSPTJGpyBdBNI+DcZ224dm0aApv4vi6X7/zTmANXlegKV2Td+dJ+fd7APPaPN7R+xttUsj2Fm+AFDSfMQ==} engines: {node: '>= 18'} '@octokit/auth-app@7.1.3': @@ -8380,8 +8380,8 @@ packages: resolution: {integrity: sha512-FMJTLMXfCLMLfJxcX9PFqX5qD88Z5MRGaZCVzfuqeZSPsyiBzs+pahDQjbIWz2QIzPZz0NX9Zy4FX3lmK6YHIg==} engines: {node: '>= 12.13.0'} - local-pkg@0.5.0: - resolution: {integrity: sha512-ok6z3qlYyCDS4ZEU27HaU6x/xZa9Whf8jD4ptH5UZTQYZVYeb9bnZ3ojVhiJNLiXK1Hfc0GNbLXcmZ5plLDDBg==} + local-pkg@0.5.1: + resolution: {integrity: sha512-9rrA30MRRP3gBD3HTGnC6cDFpaE1kVDWxWgqWJUN0RvDNAo+Nz/9GxB+nHOH0ifbVFy0hSA1V6vFDvnx54lTEQ==} engines: {node: '>=14'} locate-character@3.0.0: @@ -15032,7 +15032,7 @@ snapshots: '@iconify/types': 2.0.0 debug: 4.3.7(supports-color@5.5.0) kolorist: 1.8.0 - local-pkg: 0.5.0 + local-pkg: 0.5.1 mlly: 1.7.3 transitivePeerDependencies: - supports-color @@ -15720,7 +15720,7 @@ snapshots: '@nx/nx-win32-x64-msvc@20.1.2': optional: true - '@octokit/app@15.1.0': + '@octokit/app@15.1.1': dependencies: '@octokit/auth-app': 7.1.3 '@octokit/auth-unauthenticated': 6.1.0 @@ -22118,7 +22118,7 @@ snapshots: loader-utils@3.3.1: {} - local-pkg@0.5.0: + local-pkg@0.5.1: dependencies: mlly: 1.7.3 pkg-types: 1.2.1 @@ -22169,7 +22169,7 @@ snapshots: log-symbols@4.1.0: dependencies: - chalk: 4.1.0 + chalk: 4.1.2 is-unicode-supported: 0.1.0 log-symbols@6.0.0: @@ -23440,7 +23440,7 @@ snapshots: octokit@4.0.2: dependencies: - '@octokit/app': 15.1.0 + '@octokit/app': 15.1.1 '@octokit/core': 6.1.2 '@octokit/oauth-app': 7.1.3 '@octokit/plugin-paginate-graphql': 5.2.4(@octokit/core@6.1.2) From 5ec69db803e51acefa53bbee5eb0408667ecb2e2 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 21:59:40 +1100 Subject: [PATCH 6/8] character --- packages/agent/src/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index 76fce33992..9d53a965f3 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -21,7 +21,7 @@ import Database from "better-sqlite3"; import fs from "fs"; import readline from "readline"; import yargs from "yargs"; -import blobert from "./blobert.ts"; +import { character } from "./character.ts"; export const wait = (minTime: number = 1000, maxTime: number = 3000) => { const waitTime = @@ -272,7 +272,7 @@ const startAgents = async () => { let charactersArg = args.characters || args.character; - let characters = [blobert]; + let characters = [character]; if (charactersArg) { characters = await loadCharacters(charactersArg); From ac87e38517b02284915397375dfcb7257008c418 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Tue, 19 Nov 2024 22:25:59 +1100 Subject: [PATCH 7/8] services --- packages/core/src/runtime.ts | 17 +- packages/plugin-node/src/services/image.ts | 248 +++++++++------------ 2 files changed, 116 insertions(+), 149 deletions(-) diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index a5bfea5b4a..47de771d08 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -159,9 +159,10 @@ export class AgentRuntime implements IAgentRuntime { return serviceInstance as T; } - registerService(service: Service): void { + async registerService(service: Service): Promise { const serviceType = service.serviceType; elizaLogger.log("Registering service:", serviceType); + if (this.services.has(serviceType)) { elizaLogger.warn( `Service ${serviceType} is already registered. Skipping registration.` @@ -169,7 +170,19 @@ export class AgentRuntime implements IAgentRuntime { return; } - this.services.set(serviceType, service); + try { + await service.initialize(this); + this.services.set(serviceType, service); + elizaLogger.success( + `Service ${serviceType} initialized successfully` + ); + } catch (error) { + elizaLogger.error( + `Failed to initialize service ${serviceType}:`, + error + ); + throw error; + } } /** diff --git a/packages/plugin-node/src/services/image.ts b/packages/plugin-node/src/services/image.ts index ff0ec46343..ac4333abe3 100644 --- a/packages/plugin-node/src/services/image.ts +++ b/packages/plugin-node/src/services/image.ts @@ -1,5 +1,4 @@ -// Current image recognition service -- local recognition working, no openai recognition -import { models } from "@ai16z/eliza"; +import { elizaLogger, models } from "@ai16z/eliza"; import { Service } from "@ai16z/eliza"; import { IAgentRuntime, ModelProviderName, ServiceType } from "@ai16z/eliza"; import { @@ -19,6 +18,8 @@ import os from "os"; import path from "path"; export class ImageDescriptionService extends Service { + static serviceType: ServiceType = ServiceType.IMAGE_DESCRIPTION; + private modelId: string = "onnx-community/Florence-2-base-ft"; private device: string = "gpu"; private model: PreTrainedModel | null = null; @@ -26,51 +27,15 @@ export class ImageDescriptionService extends Service { private tokenizer: PreTrainedTokenizer | null = null; private initialized: boolean = false; private runtime: IAgentRuntime | null = null; - - static serviceType: ServiceType = ServiceType.IMAGE_DESCRIPTION; - private queue: string[] = []; private processing: boolean = false; - constructor() { - super(); - } - async initialize(runtime: IAgentRuntime): Promise { + this.runtime = runtime; const model = models[runtime?.character?.modelProvider]; if (model === models[ModelProviderName.LLAMALOCAL]) { - this.modelId = "onnx-community/Florence-2-base-ft"; - - env.allowLocalModels = false; - env.allowRemoteModels = true; - env.backends.onnx.logLevel = "fatal"; - env.backends.onnx.wasm.proxy = false; - env.backends.onnx.wasm.numThreads = 1; - - console.log("Downloading model..."); - - this.model = - await Florence2ForConditionalGeneration.from_pretrained( - this.modelId, - { - device: "gpu", - progress_callback: (progress) => { - if (progress.status === "downloading") { - console.log( - `Model download progress: ${JSON.stringify(progress)}` - ); - } - }, - } - ); - - console.log("Model downloaded successfully."); - - this.processor = (await AutoProcessor.from_pretrained( - this.modelId - )) as Florence2Processor; - this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId); + await this.initializeLocalModel(); } else { this.modelId = "gpt-4o-mini"; this.device = "cloud"; @@ -79,9 +44,44 @@ export class ImageDescriptionService extends Service { this.initialized = true; } + private async initializeLocalModel(): Promise { + env.allowLocalModels = false; + env.allowRemoteModels = true; + env.backends.onnx.logLevel = "fatal"; + env.backends.onnx.wasm.proxy = false; + env.backends.onnx.wasm.numThreads = 1; + + elizaLogger.log("Downloading Florence model..."); + + this.model = await Florence2ForConditionalGeneration.from_pretrained( + this.modelId, + { + device: "gpu", + progress_callback: (progress) => { + if (progress.status === "downloading") { + elizaLogger.log( + `Model download progress: ${JSON.stringify(progress)}` + ); + } + }, + } + ); + + elizaLogger.success("Florence model downloaded successfully"); + + this.processor = (await AutoProcessor.from_pretrained( + this.modelId + )) as Florence2Processor; + this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId); + } + async describeImage( imageUrl: string ): Promise<{ title: string; description: string }> { + if (!this.initialized) { + throw new Error("ImageDescriptionService not initialized"); + } + if (this.device === "cloud") { if (!this.runtime) { throw new Error( @@ -89,37 +89,32 @@ export class ImageDescriptionService extends Service { ); } return this.recognizeWithOpenAI(imageUrl); - } else { - this.queue.push(imageUrl); - this.processQueue(); - - return new Promise((resolve, reject) => { - const checkQueue = () => { - const index = this.queue.indexOf(imageUrl); - if (index !== -1) { - setTimeout(checkQueue, 100); - } else { - resolve(this.processImage(imageUrl)); - } - }; - checkQueue(); - }); } + + this.queue.push(imageUrl); + this.processQueue(); + + return new Promise((resolve, reject) => { + const checkQueue = () => { + const index = this.queue.indexOf(imageUrl); + if (index !== -1) { + setTimeout(checkQueue, 100); + } else { + resolve(this.processImage(imageUrl)); + } + }; + checkQueue(); + }); } private async recognizeWithOpenAI( imageUrl: string ): Promise<{ title: string; description: string }> { - if (!this.runtime) { - throw new Error("Runtime is required for OpenAI image recognition"); - } - const isGif = imageUrl.toLowerCase().endsWith(".gif"); let imageData: Buffer | null = null; try { if (isGif) { - console.log("Processing GIF: extracting first frame"); const { filePath } = await this.extractFirstFrameFromGif(imageUrl); imageData = fs.readFileSync(filePath); @@ -139,19 +134,20 @@ export class ImageDescriptionService extends Service { const prompt = "Describe this image and give it a title. The first line should be the title, and then a line break, then a detailed description of the image. Respond with the format 'title\ndescription'"; - const text = await this.requestOpenAI( imageUrl, imageData, prompt, - isGif, - this.runtime + isGif ); - const title = text.split("\n")[0]; - const description = text.split("\n").slice(1).join("\n"); - return { title, description }; + + const [title, ...descriptionParts] = text.split("\n"); + return { + title, + description: descriptionParts.join("\n"), + }; } catch (error) { - console.error("Error in recognizeWithOpenAI:", error); + elizaLogger.error("Error in recognizeWithOpenAI:", error); throw error; } } @@ -160,50 +156,21 @@ export class ImageDescriptionService extends Service { imageUrl: string, imageData: Buffer, prompt: string, - isGif: boolean, - runtime: IAgentRuntime + isGif: boolean ): Promise { - for (let retryAttempts = 0; retryAttempts < 3; retryAttempts++) { + for (let attempt = 0; attempt < 3; attempt++) { try { - let body; - if (isGif) { - const base64Image = imageData.toString("base64"); - body = JSON.stringify({ - model: "gpt-4o-mini", - messages: [ - { - role: "user", - content: [ - { type: "text", text: prompt }, - { - type: "image_url", - image_url: { - url: `data:image/png;base64,${base64Image}`, - }, - }, - ], - }, - ], - max_tokens: 500, - }); - } else { - body = JSON.stringify({ - model: "gpt-4o-mini", - messages: [ - { - role: "user", - content: [ - { type: "text", text: prompt }, - { - type: "image_url", - image_url: { url: imageUrl }, - }, - ], - }, - ], - max_tokens: 300, - }); - } + const content = [ + { type: "text", text: prompt }, + { + type: "image_url", + image_url: { + url: isGif + ? `data:image/png;base64,${imageData.toString("base64")}` + : imageUrl, + }, + }, + ]; const response = await fetch( "https://api.openai.com/v1/chat/completions", @@ -211,9 +178,13 @@ export class ImageDescriptionService extends Service { method: "POST", headers: { "Content-Type": "application/json", - Authorization: `Bearer ${runtime.getSetting("OPENAI_API_KEY")}`, + Authorization: `Bearer ${this.runtime.getSetting("OPENAI_API_KEY")}`, }, - body: body, + body: JSON.stringify({ + model: "gpt-4o-mini", + messages: [{ role: "user", content }], + max_tokens: isGif ? 500 : 300, + }), } ); @@ -224,13 +195,11 @@ export class ImageDescriptionService extends Service { const data = await response.json(); return data.choices[0].message.content; } catch (error) { - console.log( - `Error during OpenAI request (attempt ${retryAttempts + 1}):`, + elizaLogger.error( + `OpenAI request failed (attempt ${attempt + 1}):`, error ); - if (retryAttempts === 2) { - throw error; - } + if (attempt === 2) throw error; } } throw new Error( @@ -239,30 +208,30 @@ export class ImageDescriptionService extends Service { } private async processQueue(): Promise { - if (this.processing || this.queue.length === 0) { - return; - } + if (this.processing || this.queue.length === 0) return; this.processing = true; - while (this.queue.length > 0) { const imageUrl = this.queue.shift(); await this.processImage(imageUrl); } - this.processing = false; } private async processImage( imageUrl: string ): Promise<{ title: string; description: string }> { - console.log("***** PROCESSING IMAGE", imageUrl); + if (!this.model || !this.processor || !this.tokenizer) { + throw new Error("Model components not initialized"); + } + + elizaLogger.log("Processing image:", imageUrl); const isGif = imageUrl.toLowerCase().endsWith(".gif"); let imageToProcess = imageUrl; try { if (isGif) { - console.log("Processing GIF: extracting first frame"); + elizaLogger.log("Extracting first frame from GIF"); const { filePath } = await this.extractFirstFrameFromGif(imageUrl); imageToProcess = filePath; @@ -270,44 +239,32 @@ export class ImageDescriptionService extends Service { const image = await RawImage.fromURL(imageToProcess); const visionInputs = await this.processor(image); - const prompts = this.processor.construct_prompts(""); const textInputs = this.tokenizer(prompts); - console.log("***** GENERATING"); - + elizaLogger.log("Generating image description"); const generatedIds = (await this.model.generate({ ...textInputs, ...visionInputs, max_new_tokens: 256, })) as Tensor; - console.log("***** GENERATED IDS", generatedIds); - const generatedText = this.tokenizer.batch_decode(generatedIds, { skip_special_tokens: false, })[0]; - console.log("***** GENERATED TEXT"); - console.log(generatedText); - const result = this.processor.post_process_generation( generatedText, "", image.size ); - console.log("***** RESULT"); - console.log(result); - const detailedCaption = result[""] as string; - - // TODO: handle this better - return { title: detailedCaption, description: detailedCaption }; } catch (error) { - console.error("Error in processImage:", error); + elizaLogger.error("Error processing image:", error); + throw error; } finally { if (isGif && imageToProcess !== imageUrl) { fs.unlinkSync(imageToProcess); @@ -323,19 +280,16 @@ export class ImageDescriptionService extends Service { frames: 1, outputType: "png", }); - const firstFrame = frameData[0]; - const tempDir = os.tmpdir(); - const tempFilePath = path.join(tempDir, `gif_frame_${Date.now()}.png`); + const tempFilePath = path.join( + os.tmpdir(), + `gif_frame_${Date.now()}.png` + ); return new Promise((resolve, reject) => { const writeStream = fs.createWriteStream(tempFilePath); - firstFrame.getImage().pipe(writeStream); - - writeStream.on("finish", () => { - resolve({ filePath: tempFilePath }); - }); - + frameData[0].getImage().pipe(writeStream); + writeStream.on("finish", () => resolve({ filePath: tempFilePath })); writeStream.on("error", reject); }); } From 48d6fdbc966e11c4b10dfc666fdb3dbfc9dc3cc3 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Wed, 20 Nov 2024 06:09:35 +1100 Subject: [PATCH 8/8] issues --- packages/client-discord/src/messages.ts | 6 ++++++ packages/client-discord/src/voice.ts | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/packages/client-discord/src/messages.ts b/packages/client-discord/src/messages.ts index cab78a8ad2..25227e3e94 100644 --- a/packages/client-discord/src/messages.ts +++ b/packages/client-discord/src/messages.ts @@ -521,6 +521,12 @@ export class MessageManager { ServiceType.SPEECH_GENERATION ); + if (!speechService) { + throw new Error( + "Speech generation service not found" + ); + } + const audioStream = await speechService.generate( this.runtime, content.text diff --git a/packages/client-discord/src/voice.ts b/packages/client-discord/src/voice.ts index cf03ac45e5..db62844756 100644 --- a/packages/client-discord/src/voice.ts +++ b/packages/client-discord/src/voice.ts @@ -374,7 +374,6 @@ export class VoiceManager extends EventEmitter { audioStream, 10000000, async (buffer) => { - // console.log("buffer: ", buffer); const currentTime = Date.now(); const silenceDuration = currentTime - lastChunkTime; if (!buffer) { @@ -401,15 +400,16 @@ export class VoiceManager extends EventEmitter { this.runtime.getService( ServiceType.TRANSCRIPTION ); - console.log( - "transcriptionService: ", - transcriptionService - ); + + if (!transcriptionService) { + throw new Error( + "Transcription generation service not found" + ); + } const text = await transcriptionService.transcribe(wavBuffer); - console.log("transcribed text: ", text); transcriptionText += text; } catch (error) { console.error("Error processing audio stream:", error);