Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Add multiple data sources #10

Closed
wants to merge 11 commits into from
4 changes: 2 additions & 2 deletions create-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
observability,
}: InstallAppArgs): Promise<void> {
Expand Down Expand Up @@ -89,7 +89,7 @@ export async function createApp({
vectorDb,
externalPort,
postInstallAction,
dataSource,
dataSources,
tools,
observability,
};
Expand Down
53 changes: 31 additions & 22 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
TemplateDataSource,
TemplateFramework,
TemplateVectorDB,
WebSourceConfig,
} from "./types";

type EnvVar = {
Expand Down Expand Up @@ -99,26 +100,32 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
}
};

const getDataSourceEnvs = (dataSource: TemplateDataSource) => {
switch (dataSource.type) {
case "web":
return [
{
name: "BASE_URL",
description: "The base URL to start web scraping.",
},
{
name: "URL_PREFIX",
description: "The prefix of the URL to start web scraping.",
},
{
name: "MAX_DEPTH",
description: "The maximum depth to scrape.",
},
];
default:
return [];
const getDataSourceEnvs = (dataSources: TemplateDataSource[]) => {
const envs = [];
for (const source of dataSources) {
switch (source.type) {
case "web":
const config = source.config as WebSourceConfig;
envs.push(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might we get duplicates with that approach?

{
name: "BASE_URL",
description: "The base URL to start web scraping.",
value: config.baseUrl,
},
{
name: "URL_PREFIX",
description: "The prefix of the URL to start web scraping.",
value: config.baseUrl,
},
{
name: "MAX_DEPTH",
description: "The maximum depth to scrape.",
value: config.depth?.toString(),
},
);
}
}
return envs;
};

export const createBackendEnvFile = async (
Expand All @@ -130,7 +137,7 @@ export const createBackendEnvFile = async (
model?: string;
embeddingModel?: string;
framework?: TemplateFramework;
dataSource?: TemplateDataSource;
dataSources?: TemplateDataSource[];
port?: number;
},
) => {
Expand All @@ -152,7 +159,7 @@ export const createBackendEnvFile = async (
// Add vector database environment variables
...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
// Add data source environment variables
...(opts.dataSource ? getDataSourceEnvs(opts.dataSource) : []),
...(opts.dataSources ? getDataSourceEnvs(opts.dataSources) : []),
];
let envVars: EnvVar[] = [];
if (opts.framework === "fastapi") {
Expand Down Expand Up @@ -204,7 +211,9 @@ We have provided context information below.
Given this information, please answer the question: {query_str}
"`,
},
(opts?.dataSource?.config as FileSourceConfig).useLlamaParse
opts?.dataSources?.some(
(ds) => (ds.config as FileSourceConfig).useLlamaParse,
)
? {
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
Expand Down
55 changes: 36 additions & 19 deletions helpers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ async function generateContextData(
packageManager?: PackageManager,
openAiKey?: string,
vectorDb?: TemplateVectorDB,
dataSource?: TemplateDataSource,
llamaCloudKey?: string,
useLlamaParse?: boolean,
) {
if (packageManager) {
const runGenerate = `${cyan(
Expand All @@ -37,8 +37,7 @@ async function generateContextData(
: `${packageManager} run generate`,
)}`;
const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
const llamaCloudKeyConfigured = (dataSource?.config as FileSourceConfig)
?.useLlamaParse
const llamaCloudKeyConfigured = useLlamaParse
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
const hasVectorDb = vectorDb && vectorDb !== "none";
Expand Down Expand Up @@ -82,18 +81,18 @@ const copyContextData = async (
dataSource?: TemplateDataSource,
) => {
const destPath = path.join(root, "data");

const dataSourceConfig = dataSource?.config as FileSourceConfig;

// Copy file
if (dataSource?.type === "file") {
if (dataSourceConfig.path) {
console.log(`\nCopying file to ${cyan(destPath)}\n`);
await fs.mkdir(destPath, { recursive: true });
await fs.copyFile(
dataSourceConfig.path,
path.join(destPath, path.basename(dataSourceConfig.path)),
);
// Split and strip beginning and trailing blank spaces
const paths = dataSourceConfig.path.split(",").map((p) => p.trim());
console.log("Copying data from files:", paths);
for (const p of paths) {
await fs.copyFile(p, path.join(destPath, path.basename(p)));
}
} else {
console.log("Missing file path in config");
process.exit(1);
Expand All @@ -103,13 +102,20 @@ const copyContextData = async (

// Copy folder
if (dataSource?.type === "folder") {
const srcPath =
dataSourceConfig.path ?? path.join(templatesDir, "components", "data");
console.log(`\nCopying data to ${cyan(destPath)}\n`);
await copy("**", destPath, {
parents: true,
cwd: srcPath,
});
// Example data does not have path config, set the default path
const srcPaths = dataSourceConfig.path?.split(",").map((p) => p.trim()) ?? [
path.join(templatesDir, "components", "data"),
];
console.log("Copying data from folders: ", srcPaths);
for (const p of srcPaths) {
const folderName = path.basename(p);
const destFolderPath = path.join(destPath, folderName);
await fs.mkdir(destFolderPath, { recursive: true });
await copy("**", destFolderPath, {
parents: true,
cwd: p,
});
}
return;
}
};
Expand Down Expand Up @@ -160,12 +166,19 @@ export const installTemplate = async (
model: props.model,
embeddingModel: props.embeddingModel,
framework: props.framework,
dataSource: props.dataSource,
dataSources: props.dataSources,
port: props.externalPort,
});

if (props.engine === "context") {
await copyContextData(props.root, props.dataSource);
console.log("\nGenerating context data...\n");
console.log(props.dataSources);

props.dataSources.forEach(async (ds) => {
if (ds.type === "file" || ds.type === "folder") {
await copyContextData(props.root, ds);
}
});
if (
props.postInstallAction === "runApp" ||
props.postInstallAction === "dependencies"
Expand All @@ -175,8 +188,12 @@ export const installTemplate = async (
props.packageManager,
props.openAiKey,
props.vectorDb,
props.dataSource,
props.llamaCloudKey,
props.dataSources.some(
(ds) =>
(ds.type === "file" || ds.type === "folder") &&
(ds.config as FileSourceConfig).useLlamaParse,
),
);
}
}
Expand Down
63 changes: 45 additions & 18 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ export const installPythonTemplate = async ({
framework,
engine,
vectorDb,
dataSource,
dataSources,
tools,
postInstallAction,
}: Pick<
Expand All @@ -185,7 +185,7 @@ export const installPythonTemplate = async ({
| "template"
| "engine"
| "vectorDb"
| "dataSource"
| "dataSources"
| "tools"
| "postInstallAction"
>) => {
Expand Down Expand Up @@ -250,27 +250,54 @@ export const installPythonTemplate = async ({
});
}

const dataSourceType = dataSource?.type;
if (dataSourceType !== undefined && dataSourceType !== "none") {
let loaderFolder: string;
if (dataSourceType === "file" || dataSourceType === "folder") {
const dataSourceConfig = dataSource?.config as FileSourceConfig;
loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
} else {
loaderFolder = dataSourceType;
}
await copy("**", enginePath, {
if (dataSources.length > 0 || dataSources[0].type !== "none") {
// Copy loader.py file to enginePath
await copy("loader.py", enginePath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", loaderFolder),
cwd: path.join(compPath, "loaders", "python"),
});

// Copy data source loaders
const loaderPath = path.join(enginePath, "loaders");
for (const source of dataSources) {
const sourceType = source.type;
if (sourceType === "file" || sourceType === "folder") {
const sourceConfig = source.config as FileSourceConfig;
const loaderFolder = sourceConfig.useLlamaParse
? "llama_parse"
: "file";
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", loaderFolder),
});
} else {
await copy("**", loaderPath, {
parents: true,
cwd: path.join(compPath, "loaders", "python", sourceType),
});
}
}
}

// const dataSourceType = dataSource?.type;
// if (dataSourceType !== undefined && dataSourceType !== "none") {
// let loaderFolder: string;
// if (dataSourceType === "file" || dataSourceType === "folder") {
// const dataSourceConfig = dataSource?.config as FileSourceConfig;
// loaderFolder = dataSourceConfig.useLlamaParse ? "llama_parse" : "file";
// } else {
// loaderFolder = dataSourceType;
// }
// await copy("**", enginePath, {
// parents: true,
// cwd: path.join(compPath, "loaders", "python", loaderFolder),
// });
// }
}

const addOnDependencies = getAdditionalDependencies(
vectorDb,
dataSource,
tools,
);
const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
.flat();
await addDependencies(root, addOnDependencies);

if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
Expand Down
2 changes: 1 addition & 1 deletion helpers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export interface InstallTemplateArgs {
framework: TemplateFramework;
engine: TemplateEngine;
ui: TemplateUI;
dataSource?: TemplateDataSource;
dataSources: TemplateDataSource[];
eslint: boolean;
customApiPath?: string;
openAiKey?: string;
Expand Down
2 changes: 1 addition & 1 deletion index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ async function run(): Promise<void> {
vectorDb: program.vectorDb,
externalPort: program.externalPort,
postInstallAction: program.postInstallAction,
dataSource: program.dataSource,
dataSources: program.dataSources,
tools: program.tools,
observability: program.observability,
});
Expand Down
Loading