Skip to content

Commit

Permalink
Feat: Adding file through Notion
Browse files Browse the repository at this point in the history
Signed-off-by: Daishan Peng <daishan@acorn.io>
  • Loading branch information
StrongMonkey committed Sep 3, 2024
1 parent 50bdc85 commit 48cb300
Show file tree
Hide file tree
Showing 11 changed files with 565 additions and 163 deletions.
18 changes: 18 additions & 0 deletions actions/knowledge/filehelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import fs from 'fs';
import path from 'path';
import { FileDetail } from '@/actions/knowledge/util';

export async function getFileOrFolderSizeInKB(
filePath: string
Expand Down Expand Up @@ -31,3 +32,20 @@ export async function getFileOrFolderSizeInKB(
export async function getBasename(filePath: string): Promise<string> {
return path.basename(filePath);
}

export async function importFiles(
files: string[],
type: 'local' | 'notion'
): Promise<Map<string, FileDetail>> {
const result: Map<string, FileDetail> = new Map();

for (const file of files) {
result.set(file, {
fileName: path.basename(file),
size: fs.statSync(file).size,
type: type,
});
}

return result;
}
105 changes: 58 additions & 47 deletions actions/knowledge/knowledge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import path from 'path';
import { exec } from 'child_process';
import { promisify } from 'util';
import { KNOWLEDGE_DIR } from '@/config/env';
import { FileDetail } from '@/actions/knowledge/util';
import { getFileOrFolderSizeInKB } from '@/actions/knowledge/filehelper';

const execPromise = promisify(exec);

Expand Down Expand Up @@ -39,97 +41,106 @@ export async function deleteDataset(datasetID: string): Promise<void> {

export async function firstIngestion(
scriptId: string,
files: string[]
files: Map<string, FileDetail>
): Promise<boolean> {
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
return !fs.existsSync(dir) && files.length > 0;
return !fs.existsSync(dir) && files.size > 0;
}

export async function ensureFilesIngested(
files: string[],
updateOnly: boolean,
export async function ensureFiles(
files: Map<string, FileDetail>,
scriptId: string,
token: string
): Promise<string> {
updateOnly: boolean
): Promise<void> {
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
if (!fs.existsSync(dir) && files.length > 0) {
if (!fs.existsSync(dir) && files.size > 0) {
fs.mkdirSync(dir, { recursive: true });
} else if (!fs.existsSync(dir) && files.length === 0) {
// if there are no files in the directory and no dropped files, do nothing
return '';
}

for (const file of files) {
const filePath = path.join(dir, path.basename(file));
try {
if (!fs.existsSync(filePath)) {
await fs.promises.copyFile(file, filePath);
for (const file of Array.from(files.entries())) {
if (!fs.existsSync(path.join(dir, file[1].type))) {
fs.mkdirSync(path.join(dir, file[1].type), { recursive: true });
}
const filePath = path.join(dir, file[1].type, path.basename(file[0]));
if (!fs.existsSync(filePath)) {
if (file[1].type === 'local') {
await fs.promises.copyFile(file[0], filePath);
} else if (file[1].type === 'notion') {
if (
fs.existsSync(filePath) &&
fs.lstatSync(filePath).isSymbolicLink()
) {
continue;
}
await fs.promises.symlink(file[0], filePath);
}
} catch (error) {
return `Error copying file ${file}: ${error}`;
}
}

if (!updateOnly) {
try {
const filesInDir = await fs.promises.readdir(dir);
for (const type of ['local', 'notion']) {
if (!fs.existsSync(path.join(dir, type))) {
continue;
}
const filesInDir = await fs.promises.readdir(path.join(dir, type));
for (const fileName of filesInDir) {
const fullPath = path.join(dir, fileName);
const fileInDroppedFiles = files.find(
const fullPath = path.join(dir, type, fileName);
const fileInDroppedFiles = Array.from(files.keys()).find(
(file) => path.basename(file) === path.basename(fullPath)
);
if (!fileInDroppedFiles || !files || files.length === 0) {
if (!fileInDroppedFiles || !files || files.size === 0) {
await fs.promises.unlink(fullPath);
}
}
} catch (error) {
return `Error deleting files: ${error}`;
}
}

try {
await runKnowledgeIngest(
scriptId,
path.join(KNOWLEDGE_DIR(), 'script_data', scriptId),
token
);
} catch (error) {
return `Error running knowledge ingestion: ${error}`;
}

return '';
return;
}

async function runKnowledgeIngest(
export async function runKnowledgeIngest(
id: string,
knowledgePath: string,
token: string
): Promise<void> {
if (!fs.existsSync(path.join(KNOWLEDGE_DIR(), 'script_data', id, 'data'))) {
return;
}
await execPromise(
`${process.env.KNOWLEDGE_BIN} ingest --prune --dataset ${id} ./data`,
{
cwd: knowledgePath,
cwd: path.join(KNOWLEDGE_DIR(), 'script_data', id),
env: { ...process.env, GPTSCRIPT_GATEWAY_API_KEY: token },
}
);

return;
}

export async function getFiles(scriptId: string): Promise<string[]> {
export async function getFiles(
scriptId: string
): Promise<Map<string, FileDetail>> {
const result = new Map<string, FileDetail>();
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
if (!fs.existsSync(dir)) {
return [];
return result;
}
const files = await fs.promises.readdir(dir);
return files.map((file) => path.join(dir, file));
for (const type of ['local', 'notion']) {
if (!fs.existsSync(path.join(dir, type))) {
continue;
}
const files = await fs.promises.readdir(path.join(dir, type));
for (const file of files) {
result.set(path.join(dir, type, file), {
type: type as 'local' | 'notion',
fileName: file,
size: await getFileOrFolderSizeInKB(path.join(dir, type, file)),
});
}
}
return result;
}

export async function datasetExists(scriptId: string): Promise<boolean> {
const dir = path.join(KNOWLEDGE_DIR(), 'script_data', scriptId, 'data');
return fs.existsSync(dir);
}

export async function getKnowledgeBinaryPath(): Promise<string> {
return process.env.KNOWLEDGE_BIN || 'knowledge';
}
90 changes: 90 additions & 0 deletions actions/knowledge/notion.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
'use server';

import fs from 'fs';
import path from 'path';
import { WORKSPACE_DIR } from '@/config/env';
import {
GPTScript,
PromptFrame,
Run,
RunEventType,
} from '@gptscript-ai/gptscript';

export async function isNotionConfigured() {
return fs.existsSync(
path.join(
WORKSPACE_DIR(),
'knowledge',
'integrations',
'notion',
'metadata.json'
)
);
}

function readFilesRecursive(dir: string): string[] {
let results: string[] = [];

const list = fs.readdirSync(dir);
list.forEach((file) => {
if (file === 'metadata.json') return;
const filePath = path.join(dir, file);
const stat = fs.statSync(filePath);

if (stat && stat.isDirectory()) {
// Recursively read the directory
results = results.concat(readFilesRecursive(filePath));
} else {
// Add the file path to the results
results.push(filePath);
}
});

return results;
}

export async function getNotionFiles(): Promise<
Map<string, { url: string; fileName: string }>
> {
const dir = path.join(WORKSPACE_DIR(), 'knowledge', 'integrations', 'notion');
const filePaths = readFilesRecursive(dir);
const metadataFromFiles = fs.readFileSync(path.join(dir, 'metadata.json'));
const metadata = JSON.parse(metadataFromFiles.toString());
const result = new Map<string, { url: string; fileName: string }>();
for (const filePath of filePaths) {
const pageID = path.basename(path.dirname(filePath));
result.set(filePath, {
url: metadata[pageID].url,
fileName: path.basename(filePath),
});
}

return result;
}

export async function runNotionSync(authed: boolean): Promise<void> {
const gptscript = new GPTScript({
DefaultModelProvider: 'github.com/gptscript-ai/gateway-provider',
});

const runningTool = await gptscript.run(
'github.com/gptscript-ai/knowledge-notion-integration',
{
prompt: true,
}
);
if (!authed) {
const handlePromptEvent = (runningTool: Run) => {
return new Promise<string>((resolve) => {
runningTool.on(RunEventType.Prompt, (data: PromptFrame) => {
resolve(data.id);
});
});
};

const id = await handlePromptEvent(runningTool);
await gptscript.promptResponse({ id, responses: {} });
}
await runningTool.text();
return;
}
6 changes: 6 additions & 0 deletions actions/knowledge/util.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
export interface FileDetail {
fileName: string;
size: number;
type: 'local' | 'notion';
}

export function gatewayTool(): string {
return 'github.com/gptscript-ai/knowledge/gateway@v0.4.12';
}
Expand Down
Loading

0 comments on commit 48cb300

Please sign in to comment.