Skip to content

Commit

Permalink
⚡️ perf: fix slow delete file sql (lobehub#4738)
Browse files Browse the repository at this point in the history
  • Loading branch information
arvinxx authored Nov 19, 2024
1 parent 41d92d8 commit f711cef
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
37 changes: 37 additions & 0 deletions src/database/server/models/file.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { asc, count, eq, ilike, inArray, notExists, or, sum } from 'drizzle-orm';
import { and, desc, like } from 'drizzle-orm/expressions';
import type { PgTransaction } from 'drizzle-orm/pg-core';

import { serverDBEnv } from '@/config/db';
import { serverDB } from '@/database/server/core/db';
Expand All @@ -9,6 +10,9 @@ import {
FileItem,
NewFile,
NewGlobalFile,
chunks,
embeddings,
fileChunks,
files,
globalFiles,
knowledgeBaseFiles,
Expand Down Expand Up @@ -68,6 +72,10 @@ export class FileModel {
const fileHash = file.fileHash!;

return await serverDB.transaction(async (trx) => {
// 1. 删除相关的 chunks
await this.deleteFileChunks(trx as any, [id]);

// 2. 删除文件记录
await trx.delete(files).where(and(eq(files.id, id), eq(files.userId, this.userId)));

const result = await trx
Expand Down Expand Up @@ -107,6 +115,9 @@ export class FileModel {
const hashList = fileList.map((file) => file.fileHash!);

return await serverDB.transaction(async (trx) => {
// 1. 删除相关的 chunks
await this.deleteFileChunks(trx as any, ids);

// delete the files
await trx.delete(files).where(and(inArray(files.id, ids), eq(files.userId, this.userId)));

Expand Down Expand Up @@ -289,4 +300,30 @@ export class FileModel {
),
});
}

// 抽象出通用的删除 chunks 方法
private async deleteFileChunks(trx: PgTransaction<any>, fileIds: string[]) {
const BATCH_SIZE = 1000; // 每批处理的数量

// 1. 获取所有关联的 chunk IDs
const relatedChunks = await trx
.select({ chunkId: fileChunks.chunkId })
.from(fileChunks)
.where(inArray(fileChunks.fileId, fileIds));

const chunkIds = relatedChunks.map((c) => c.chunkId).filter(Boolean) as string[];

if (chunkIds.length === 0) return;

// 2. 分批处理删除
for (let i = 0; i < chunkIds.length; i += BATCH_SIZE) {
const batchChunkIds = chunkIds.slice(i, i + BATCH_SIZE);

await trx.delete(embeddings).where(inArray(embeddings.chunkId, batchChunkIds));

await trx.delete(chunks).where(inArray(chunks.id, batchChunkIds));
}

return chunkIds;
}
}
5 changes: 0 additions & 5 deletions src/server/routers/lambda/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,6 @@ export const fileRouter = router({
removeFile: fileProcedure.input(z.object({ id: z.string() })).mutation(async ({ input, ctx }) => {
const file = await ctx.fileModel.delete(input.id);

// delete the orphan chunks
await ctx.chunkModel.deleteOrphanChunks();
if (!file) return;

// delele the file from remove from S3 if it is not used by other files
Expand Down Expand Up @@ -187,9 +185,6 @@ export const fileRouter = router({
.mutation(async ({ input, ctx }) => {
const needToRemoveFileList = await ctx.fileModel.deleteMany(input.ids);

// delete the orphan chunks
await ctx.chunkModel.deleteOrphanChunks();

if (!needToRemoveFileList || needToRemoveFileList.length === 0) return;

// remove from S3
Expand Down

0 comments on commit f711cef

Please sign in to comment.