forked from run-llama/LlamaIndexTS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add a reader for Discord messages (run-llama#1040)
- Loading branch information
1 parent
b3681bf
commit 9bbbc67
Showing
8 changed files
with
269 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"llamaindex": patch | ||
--- | ||
|
||
feat: add a reader for Discord messages |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import CodeBlock from "@theme/CodeBlock"; | ||
import CodeSource from "!raw-loader!../../../../../examples/readers/src/discord"; | ||
|
||
# DiscordReader | ||
|
||
DiscordReader is a simple data loader that reads all messages in a given Discord channel and returns them as Document objects. | ||
It uses the [@discordjs/rest](https://github.com/discordjs/discord.js/tree/main/packages/rest) library to fetch the messages. | ||
|
||
## Usage | ||
|
||
First step is to create a Discord Application and generating a bot token [here](https://discord.com/developers/applications). | ||
In your Discord Application, go to the `OAuth2` tab and generate an invite URL by selecting `bot` and click `Read Messages/View Channels` as wells as `Read Message History`. | ||
This will invite the bot with the necessary permissions to read messages. | ||
Copy the URL in your browser and select the server you want your bot to join. | ||
|
||
<CodeBlock language="ts">{CodeSource}</CodeBlock> | ||
|
||
### Params | ||
|
||
#### DiscordReader() | ||
|
||
- `discordToken?`: The Discord bot token. | ||
- `makeRequest?`: Optionally provide a custom request function for edge environments, e.g. `fetch`. See discord.js for more info. | ||
|
||
#### DiscordReader.loadData | ||
|
||
- `channelIDs`: The ID(s) of discord channels as an array of strings. | ||
- `limit?`: Optionally limit the number of messages to read | ||
- `additionalInfo?`: An optional flag to include embedded messages and attachment urls in the document. | ||
- `oldestFirst?`: An optional flag to return the oldest messages first. | ||
|
||
## API Reference | ||
|
||
- [DiscordReader](../../api/classes/DiscordReader.md) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import { DiscordReader } from "llamaindex"; | ||
|
||
async function main() { | ||
// Create an instance of the DiscordReader. Set token here or DISCORD_TOKEN environment variable | ||
const discordReader = new DiscordReader(); | ||
|
||
// Specify the channel IDs you want to read messages from as an arry of strings | ||
const channelIds = ["721374320794009630", "719596376261918720"]; | ||
|
||
// Specify the number of messages to fetch per channel | ||
const limit = 10; | ||
|
||
// Load messages from the specified channel | ||
const messages = await discordReader.loadData(channelIds, limit, true); | ||
|
||
// Print out the messages | ||
console.log(messages); | ||
} | ||
|
||
main().catch(console.error); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
import { REST, type RESTOptions } from "@discordjs/rest"; | ||
import { Document } from "@llamaindex/core/schema"; | ||
import { getEnv } from "@llamaindex/env"; | ||
import { Routes, type APIEmbed, type APIMessage } from "discord-api-types/v10"; | ||
|
||
/** | ||
* Represents a reader for Discord messages using @discordjs/rest | ||
* See https://github.com/discordjs/discord.js/tree/main/packages/rest | ||
*/ | ||
export class DiscordReader { | ||
private client: REST; | ||
|
||
constructor( | ||
discordToken?: string, | ||
requestHandler?: RESTOptions["makeRequest"], | ||
) { | ||
const token = discordToken ?? getEnv("DISCORD_TOKEN"); | ||
if (!token) { | ||
throw new Error( | ||
"Must specify `discordToken` or set environment variable `DISCORD_TOKEN`.", | ||
); | ||
} | ||
|
||
const restOptions: Partial<RESTOptions> = { version: "10" }; | ||
|
||
// Use the provided request handler if specified | ||
if (requestHandler) { | ||
restOptions.makeRequest = requestHandler; | ||
} | ||
|
||
this.client = new REST(restOptions).setToken(token); | ||
} | ||
|
||
// Read all messages in a channel given a channel ID | ||
private async readChannel( | ||
channelId: string, | ||
limit?: number, | ||
additionalInfo?: boolean, | ||
oldestFirst?: boolean, | ||
): Promise<Document[]> { | ||
const params = new URLSearchParams(); | ||
if (limit) params.append("limit", limit.toString()); | ||
if (oldestFirst) params.append("after", "0"); | ||
|
||
try { | ||
const endpoint = | ||
`${Routes.channelMessages(channelId)}?${params}` as `/channels/${string}/messages`; | ||
const messages = (await this.client.get(endpoint)) as APIMessage[]; | ||
return messages.map((msg) => | ||
this.createDocumentFromMessage(msg, additionalInfo), | ||
); | ||
} catch (err) { | ||
console.error(err); | ||
return []; | ||
} | ||
} | ||
|
||
private createDocumentFromMessage( | ||
msg: APIMessage, | ||
additionalInfo?: boolean, | ||
): Document { | ||
let content = msg.content || ""; | ||
|
||
// Include information from embedded messages | ||
if (additionalInfo && msg.embeds.length > 0) { | ||
content += | ||
"\n" + msg.embeds.map((embed) => this.embedToString(embed)).join("\n"); | ||
} | ||
|
||
// Include URL from attachments | ||
if (additionalInfo && msg.attachments.length > 0) { | ||
content += | ||
"\n" + | ||
msg.attachments | ||
.map((attachment) => `Attachment: ${attachment.url}`) | ||
.join("\n"); | ||
} | ||
|
||
return new Document({ | ||
text: content, | ||
id_: msg.id, | ||
metadata: { | ||
messageId: msg.id, | ||
username: msg.author.username, | ||
createdAt: new Date(msg.timestamp).toISOString(), | ||
editedAt: msg.edited_timestamp | ||
? new Date(msg.edited_timestamp).toISOString() | ||
: undefined, | ||
}, | ||
}); | ||
} | ||
|
||
// Create a string representation of an embedded message | ||
private embedToString(embed: APIEmbed): string { | ||
let result = "***Embedded Message***\n"; | ||
if (embed.title) result += `**${embed.title}**\n`; | ||
if (embed.description) result += `${embed.description}\n`; | ||
if (embed.url) result += `${embed.url}\n`; | ||
if (embed.fields) { | ||
result += embed.fields | ||
.map((field) => `**${field.name}**: ${field.value}`) | ||
.join("\n"); | ||
} | ||
return result.trim(); | ||
} | ||
|
||
/** | ||
* Loads messages from multiple discord channels and returns an array of Document Objects. | ||
* | ||
* @param {string[]} channelIds - An array of channel IDs from which to load data. | ||
* @param {number} [limit] - An optional limit on the number of messages to load per channel. | ||
* @param {boolean} [additionalInfo] - An optional flag to include content from embedded messages and attachments urls as text. | ||
* @param {boolean} [oldestFirst] - An optional flag to load oldest messages first. | ||
* @return {Promise<Document[]>} A promise that resolves to an array of loaded documents. | ||
*/ | ||
async loadData( | ||
channelIds: string[], | ||
limit?: number, | ||
additionalInfo?: boolean, | ||
oldestFirst?: boolean, | ||
): Promise<Document[]> { | ||
let results: Document[] = []; | ||
for (const channelId of channelIds) { | ||
if (typeof channelId !== "string") { | ||
throw new Error(`Channel id ${channelId} must be a string.`); | ||
} | ||
const channelDocuments = await this.readChannel( | ||
channelId, | ||
limit, | ||
additionalInfo, | ||
oldestFirst, | ||
); | ||
results = results.concat(channelDocuments); | ||
} | ||
return results; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.