Skip to content

Commit

Permalink
feat: add a reader for Discord messages (run-llama#1040)
Browse files Browse the repository at this point in the history
  • Loading branch information
KindOfAScam authored Jul 16, 2024
1 parent b3681bf commit 9bbbc67
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 1 deletion.
5 changes: 5 additions & 0 deletions .changeset/five-ladybugs-act.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"llamaindex": patch
---

feat: add a reader for Discord messages
34 changes: 34 additions & 0 deletions apps/docs/docs/modules/data_loaders/discord.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import CodeBlock from "@theme/CodeBlock";
import CodeSource from "!raw-loader!../../../../../examples/readers/src/discord";

# DiscordReader

DiscordReader is a simple data loader that reads all messages in a given Discord channel and returns them as Document objects.
It uses the [@discordjs/rest](https://github.com/discordjs/discord.js/tree/main/packages/rest) library to fetch the messages.

## Usage

First step is to create a Discord Application and generating a bot token [here](https://discord.com/developers/applications).
In your Discord Application, go to the `OAuth2` tab and generate an invite URL by selecting `bot` and click `Read Messages/View Channels` as wells as `Read Message History`.
This will invite the bot with the necessary permissions to read messages.
Copy the URL in your browser and select the server you want your bot to join.

<CodeBlock language="ts">{CodeSource}</CodeBlock>

### Params

#### DiscordReader()

- `discordToken?`: The Discord bot token.
- `makeRequest?`: Optionally provide a custom request function for edge environments, e.g. `fetch`. See discord.js for more info.

#### DiscordReader.loadData

- `channelIDs`: The ID(s) of discord channels as an array of strings.
- `limit?`: Optionally limit the number of messages to read
- `additionalInfo?`: An optional flag to include embedded messages and attachment urls in the document.
- `oldestFirst?`: An optional flag to return the oldest messages first.

## API Reference

- [DiscordReader](../../api/classes/DiscordReader.md)
3 changes: 2 additions & 1 deletion examples/readers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"start:llamaparse": "node --import tsx ./src/llamaparse.ts",
"start:notion": "node --import tsx ./src/notion.ts",
"start:llamaparse-dir": "node --import tsx ./src/simple-directory-reader-with-llamaparse.ts",
"start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts"
"start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
"start:discord": "node --import tsx ./src/discord.ts"
},
"dependencies": {
"llamaindex": "*"
Expand Down
20 changes: 20 additions & 0 deletions examples/readers/src/discord.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import { DiscordReader } from "llamaindex";

async function main() {
// Create an instance of the DiscordReader. Set token here or DISCORD_TOKEN environment variable
const discordReader = new DiscordReader();

// Specify the channel IDs you want to read messages from as an arry of strings
const channelIds = ["721374320794009630", "719596376261918720"];

// Specify the number of messages to fetch per channel
const limit = 10;

// Load messages from the specified channel
const messages = await discordReader.loadData(channelIds, limit, true);

// Print out the messages
console.log(messages);
}

main().catch(console.error);
2 changes: 2 additions & 0 deletions packages/llamaindex/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"@aws-crypto/sha256-js": "^5.2.0",
"@azure/identity": "^4.2.1",
"@datastax/astra-db-ts": "^1.2.1",
"@discordjs/rest": "^2.3.0",
"@google-cloud/vertexai": "^1.2.0",
"@google/generative-ai": "0.12.0",
"@grpc/grpc-js": "^1.10.11",
Expand All @@ -45,6 +46,7 @@
"assemblyai": "^4.6.0",
"chromadb": "1.8.1",
"cohere-ai": "7.10.6",
"discord-api-types": "^0.37.92",
"groq-sdk": "^0.5.0",
"js-tiktoken": "^1.0.12",
"lodash": "^4.17.21",
Expand Down
137 changes: 137 additions & 0 deletions packages/llamaindex/src/readers/DiscordReader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import { REST, type RESTOptions } from "@discordjs/rest";
import { Document } from "@llamaindex/core/schema";
import { getEnv } from "@llamaindex/env";
import { Routes, type APIEmbed, type APIMessage } from "discord-api-types/v10";

/**
* Represents a reader for Discord messages using @discordjs/rest
* See https://github.com/discordjs/discord.js/tree/main/packages/rest
*/
export class DiscordReader {
private client: REST;

constructor(
discordToken?: string,
requestHandler?: RESTOptions["makeRequest"],
) {
const token = discordToken ?? getEnv("DISCORD_TOKEN");
if (!token) {
throw new Error(
"Must specify `discordToken` or set environment variable `DISCORD_TOKEN`.",
);
}

const restOptions: Partial<RESTOptions> = { version: "10" };

// Use the provided request handler if specified
if (requestHandler) {
restOptions.makeRequest = requestHandler;
}

this.client = new REST(restOptions).setToken(token);
}

// Read all messages in a channel given a channel ID
private async readChannel(
channelId: string,
limit?: number,
additionalInfo?: boolean,
oldestFirst?: boolean,
): Promise<Document[]> {
const params = new URLSearchParams();
if (limit) params.append("limit", limit.toString());
if (oldestFirst) params.append("after", "0");

try {
const endpoint =
`${Routes.channelMessages(channelId)}?${params}` as `/channels/${string}/messages`;
const messages = (await this.client.get(endpoint)) as APIMessage[];
return messages.map((msg) =>
this.createDocumentFromMessage(msg, additionalInfo),
);
} catch (err) {
console.error(err);
return [];
}
}

private createDocumentFromMessage(
msg: APIMessage,
additionalInfo?: boolean,
): Document {
let content = msg.content || "";

// Include information from embedded messages
if (additionalInfo && msg.embeds.length > 0) {
content +=
"\n" + msg.embeds.map((embed) => this.embedToString(embed)).join("\n");
}

// Include URL from attachments
if (additionalInfo && msg.attachments.length > 0) {
content +=
"\n" +
msg.attachments
.map((attachment) => `Attachment: ${attachment.url}`)
.join("\n");
}

return new Document({
text: content,
id_: msg.id,
metadata: {
messageId: msg.id,
username: msg.author.username,
createdAt: new Date(msg.timestamp).toISOString(),
editedAt: msg.edited_timestamp
? new Date(msg.edited_timestamp).toISOString()
: undefined,
},
});
}

// Create a string representation of an embedded message
private embedToString(embed: APIEmbed): string {
let result = "***Embedded Message***\n";
if (embed.title) result += `**${embed.title}**\n`;
if (embed.description) result += `${embed.description}\n`;
if (embed.url) result += `${embed.url}\n`;
if (embed.fields) {
result += embed.fields
.map((field) => `**${field.name}**: ${field.value}`)
.join("\n");
}
return result.trim();
}

/**
* Loads messages from multiple discord channels and returns an array of Document Objects.
*
* @param {string[]} channelIds - An array of channel IDs from which to load data.
* @param {number} [limit] - An optional limit on the number of messages to load per channel.
* @param {boolean} [additionalInfo] - An optional flag to include content from embedded messages and attachments urls as text.
* @param {boolean} [oldestFirst] - An optional flag to load oldest messages first.
* @return {Promise<Document[]>} A promise that resolves to an array of loaded documents.
*/
async loadData(
channelIds: string[],
limit?: number,
additionalInfo?: boolean,
oldestFirst?: boolean,
): Promise<Document[]> {
let results: Document[] = [];
for (const channelId of channelIds) {
if (typeof channelId !== "string") {
throw new Error(`Channel id ${channelId} must be a string.`);
}
const channelDocuments = await this.readChannel(
channelId,
limit,
additionalInfo,
oldestFirst,
);
results = results.concat(channelDocuments);
}
return results;
}
}
1 change: 1 addition & 0 deletions packages/llamaindex/src/readers/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export * from "./AssemblyAIReader.js";
export * from "./CSVReader.js";
export * from "./DiscordReader.js";
export * from "./DocxReader.js";
export * from "./HTMLReader.js";
export * from "./ImageReader.js";
Expand Down
68 changes: 68 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9bbbc67

Please sign in to comment.