Skip to content

Commit

Permalink
refactor: remove defaultFS from parameters (run-llama#841)
Browse files Browse the repository at this point in the history
  • Loading branch information
himself65 authored May 16, 2024
1 parent ba217ee commit 9e133ac
Show file tree
Hide file tree
Showing 37 changed files with 12,823 additions and 294 deletions.
10 changes: 10 additions & 0 deletions .changeset/new-shirts-reply.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"llamaindex": patch
"@llamaindex/env": patch
---

refactor: remove `defaultFS` from parameters

We don't accept passing fs in the parameter since it's unnecessary for a determined JS environment.

This was a polyfill way for the non-Node.js environment, but now we use another way to polyfill APIs.
4 changes: 2 additions & 2 deletions packages/core/src/embeddings/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import _ from "lodash";
import { filetypemime } from "magic-bytes.js";
import type { ImageType } from "../Node.js";
Expand Down Expand Up @@ -243,7 +243,7 @@ export async function imageToDataUrl(input: ImageType): Promise<string> {
_.isString(input)
) {
// string or file URL
const dataBuffer = await defaultFS.readFile(
const dataBuffer = await fs.readFile(
input instanceof URL ? input.pathname : input,
);
input = new Blob([dataBuffer]);
Expand Down
10 changes: 3 additions & 7 deletions packages/core/src/readers/CSVReader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import type { ParseConfig } from "papaparse";
import Papa from "papaparse";
import { Document } from "../Node.js";
Expand Down Expand Up @@ -40,11 +39,8 @@ export class PapaCSVReader implements FileReader {
* @param {GenericFileSystem} [fs=DEFAULT_FS] - The file system to use for reading the file.
* @returns {Promise<Document[]>}
*/
async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
const fileContent = await fs.readFile(file);
async loadData(file: string): Promise<Document[]> {
const fileContent = await fs.readFile(file, "utf-8");
const result = Papa.parse(fileContent, this.papaConfig);
const textList = result.data.map((row: any) => {
// Compatible with header row mode
Expand Down
10 changes: 3 additions & 7 deletions packages/core/src/readers/DocxReader.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import mammoth from "mammoth";
import { Document } from "../Node.js";
import type { FileReader } from "./type.js";

export class DocxReader implements FileReader {
/** DocxParser */
async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
const dataBuffer = await fs.readRawFile(file);
async loadData(file: string): Promise<Document[]> {
const dataBuffer = await fs.readFile(file);
const { value } = await mammoth.extractRawText({ buffer: dataBuffer });
return [new Document({ text: value, id_: file })];
}
Expand Down
11 changes: 3 additions & 8 deletions packages/core/src/readers/HTMLReader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import { Document } from "../Node.js";
import type { FileReader } from "./type.js";

Expand All @@ -15,14 +14,10 @@ export class HTMLReader implements FileReader {
* Public method for this reader.
* Required by BaseReader interface.
* @param file Path/name of the file to be loaded.
* @param fs fs wrapper interface for getting the file content.
* @returns Promise<Document[]> A Promise object, eventually yielding zero or one Document parsed from the HTML content of the specified file.
*/
async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
const dataBuffer = await fs.readFile(file);
async loadData(file: string): Promise<Document[]> {
const dataBuffer = await fs.readFile(file, "utf-8");
const htmlOptions = this.getOptions();
const content = await this.parseContent(dataBuffer, htmlOptions);
return [new Document({ text: content, id_: file })];
Expand Down
10 changes: 3 additions & 7 deletions packages/core/src/readers/ImageReader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import type { Document } from "../Node.js";
import { ImageDocument } from "../Node.js";
import type { FileReader } from "./type.js";
Expand All @@ -15,11 +14,8 @@ export class ImageReader implements FileReader {
* @param fs fs wrapper interface for getting the file content.
* @returns Promise<Document[]> A Promise object, eventually yielding zero or one ImageDocument of the specified file.
*/
async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
const dataBuffer = await fs.readRawFile(file);
async loadData(file: string): Promise<Document[]> {
const dataBuffer = await fs.readFile(file);
const blob = new Blob([dataBuffer]);
return [new ImageDocument({ image: blob, id_: file })];
}
Expand Down
9 changes: 3 additions & 6 deletions packages/core/src/readers/LlamaParseReader.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { defaultFS, getEnv, type GenericFileSystem } from "@llamaindex/env";
import { fs, getEnv } from "@llamaindex/env";
import { filetypemime } from "magic-bytes.js";
import { Document } from "../Node.js";
import type { FileReader, Language, ResultType } from "./type.js";
Expand Down Expand Up @@ -79,14 +79,11 @@ export class LlamaParseReader implements FileReader {
this.apiKey = params.apiKey;
}

async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
async loadData(file: string): Promise<Document[]> {
const metadata = { file_path: file };

// Load data, set the mime type
const data = await fs.readRawFile(file);
const data = await fs.readFile(file);
const mimeType = await this.getMimeType(data);

const body = new FormData();
Expand Down
10 changes: 3 additions & 7 deletions packages/core/src/readers/MarkdownReader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import { Document } from "../Node.js";
import type { FileReader } from "./type.js";

Expand Down Expand Up @@ -88,11 +87,8 @@ export class MarkdownReader implements FileReader {
return this.markdownToTups(modifiedContent);
}

async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
const content = await fs.readFile(file);
async loadData(file: string): Promise<Document[]> {
const content = await fs.readFile(file, "utf-8");
const tups = this.parseTups(content);
const results: Document[] = [];
let counter = 0;
Expand Down
10 changes: 3 additions & 7 deletions packages/core/src/readers/PDFReader.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import { Document } from "../Node.js";
import type { BaseReader } from "./type.js";

/**
* Read the text of a PDF
*/
export class PDFReader implements BaseReader {
async loadData(
file: string,
fs: GenericFileSystem = defaultFS,
): Promise<Document[]> {
const content = await fs.readRawFile(file);
async loadData(file: string): Promise<Document[]> {
const content = await fs.readFile(file);
const pages = await readPDF(content);
return pages.map((text, page) => {
const id_ = `${file}_${page + 1}`;
Expand Down
7 changes: 2 additions & 5 deletions packages/core/src/readers/SimpleDirectoryReader.edge.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { CompleteFileSystem } from "@llamaindex/env";
import { defaultFS, path } from "@llamaindex/env";
import { fs, path } from "@llamaindex/env";
import { Document, type Metadata } from "../Node.js";
import { walk } from "../storage/FileSystem.js";
import { TextFileReader } from "./TextFileReader.js";
Expand All @@ -19,7 +18,6 @@ enum ReaderStatus {

export type SimpleDirectoryReaderLoadDataParams = {
directoryPath: string;
fs?: CompleteFileSystem;
defaultReader?: BaseReader | null;
fileExtToReader?: Record<string, BaseReader>;
};
Expand All @@ -45,7 +43,6 @@ export class SimpleDirectoryReader implements BaseReader {

const {
directoryPath,
fs = defaultFS,
defaultReader = new TextFileReader(),
fileExtToReader,
} = params;
Expand All @@ -58,7 +55,7 @@ export class SimpleDirectoryReader implements BaseReader {
}

const docs: Document[] = [];
for await (const filePath of walk(fs, directoryPath)) {
for await (const filePath of walk(directoryPath)) {
try {
const fileExt = path.extname(filePath).slice(1).toLowerCase();

Expand Down
10 changes: 3 additions & 7 deletions packages/core/src/readers/TextFileReader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { CompleteFileSystem } from "@llamaindex/env";
import { defaultFS } from "@llamaindex/env";
import { fs } from "@llamaindex/env";
import { Document } from "../Node.js";
import type { BaseReader } from "./type.js";

Expand All @@ -8,11 +7,8 @@ import type { BaseReader } from "./type.js";
*/

export class TextFileReader implements BaseReader {
async loadData(
file: string,
fs: CompleteFileSystem = defaultFS,
): Promise<Document[]> {
const dataBuffer = await fs.readFile(file);
async loadData(file: string): Promise<Document[]> {
const dataBuffer = await fs.readFile(file, "utf-8");
return [new Document({ text: dataBuffer, id_: file })];
}
}
3 changes: 1 addition & 2 deletions packages/core/src/readers/type.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import type { CompleteFileSystem } from "@llamaindex/env";
import type { Document } from "../Node.js";

/**
Expand All @@ -12,7 +11,7 @@ export interface BaseReader {
* A reader takes file paths and imports data into Document objects.
*/
export interface FileReader extends BaseReader {
loadData(filePath: string, fs?: CompleteFileSystem): Promise<Document[]>;
loadData(filePath: string): Promise<Document[]>;
}

// For LlamaParseReader.ts
Expand Down
18 changes: 6 additions & 12 deletions packages/core/src/storage/FileSystem.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { GenericFileSystem, WalkableFileSystem } from "@llamaindex/env";
// FS utility functions
// FS utility helpers

import { fs } from "@llamaindex/env";

/**
* Checks if a file exists.
Expand All @@ -8,10 +9,7 @@ import type { GenericFileSystem, WalkableFileSystem } from "@llamaindex/env";
* @param path The path to the file to check.
* @returns A promise that resolves to true if the file exists, false otherwise.
*/
export async function exists(
fs: GenericFileSystem,
path: string,
): Promise<boolean> {
export async function exists(path: string): Promise<boolean> {
try {
await fs.access(path);
return true;
Expand All @@ -22,19 +20,15 @@ export async function exists(

/**
* Recursively traverses a directory and yields all the paths to the files in it.
* @param fs The filesystem to use.
* @param dirPath The path to the directory to traverse.
*/
export async function* walk(
fs: WalkableFileSystem,
dirPath: string,
): AsyncIterable<string> {
export async function* walk(dirPath: string): AsyncIterable<string> {
const entries = await fs.readdir(dirPath);
for (const entry of entries) {
const fullPath = `${dirPath}/${entry}`;
const stats = await fs.stat(fullPath);
if (stats.isDirectory()) {
yield* walk(fs, fullPath);
yield* walk(fullPath);
} else {
yield fullPath;
}
Expand Down
17 changes: 4 additions & 13 deletions packages/core/src/storage/StorageContext.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS, path } from "@llamaindex/env";
import { path } from "@llamaindex/env";
import {
DEFAULT_IMAGE_VECTOR_NAMESPACE,
DEFAULT_NAMESPACE,
Expand All @@ -25,7 +24,6 @@ export type BuilderParams = {
imageVectorStore: VectorStore;
storeImages: boolean;
persistDir: string;
fs: GenericFileSystem;
};

export async function storageContextFromDefaults({
Expand All @@ -35,30 +33,23 @@ export async function storageContextFromDefaults({
imageVectorStore,
storeImages,
persistDir,
fs,
}: Partial<BuilderParams>): Promise<StorageContext> {
if (!persistDir) {
docStore = docStore || new SimpleDocumentStore();
indexStore = indexStore || new SimpleIndexStore();
vectorStore = vectorStore || new SimpleVectorStore();
imageVectorStore = storeImages ? new SimpleVectorStore() : imageVectorStore;
} else {
fs = fs || defaultFS;
docStore =
docStore ||
(await SimpleDocumentStore.fromPersistDir(
persistDir,
DEFAULT_NAMESPACE,
fs,
));
(await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE));
indexStore =
indexStore || (await SimpleIndexStore.fromPersistDir(persistDir, fs));
indexStore || (await SimpleIndexStore.fromPersistDir(persistDir));
vectorStore =
vectorStore || (await SimpleVectorStore.fromPersistDir(persistDir, fs));
vectorStore || (await SimpleVectorStore.fromPersistDir(persistDir));
imageVectorStore = storeImages
? await SimpleVectorStore.fromPersistDir(
path.join(persistDir, DEFAULT_IMAGE_VECTOR_NAMESPACE),
fs,
)
: imageVectorStore;
}
Expand Down
18 changes: 4 additions & 14 deletions packages/core/src/storage/docStore/SimpleDocumentStore.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import type { GenericFileSystem } from "@llamaindex/env";
import { defaultFS, path } from "@llamaindex/env";
import { path } from "@llamaindex/env";
import _ from "lodash";
import {
DEFAULT_DOC_STORE_PERSIST_FILENAME,
Expand All @@ -25,26 +24,19 @@ export class SimpleDocumentStore extends KVDocumentStore {
static async fromPersistDir(
persistDir: string = DEFAULT_PERSIST_DIR,
namespace?: string,
fsModule?: GenericFileSystem,
): Promise<SimpleDocumentStore> {
const persistPath = path.join(
persistDir,
DEFAULT_DOC_STORE_PERSIST_FILENAME,
);
return await SimpleDocumentStore.fromPersistPath(
persistPath,
namespace,
fsModule,
);
return await SimpleDocumentStore.fromPersistPath(persistPath, namespace);
}

static async fromPersistPath(
persistPath: string,
namespace?: string,
fs?: GenericFileSystem,
): Promise<SimpleDocumentStore> {
fs = fs || defaultFS;
const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, fs);
const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath);
return new SimpleDocumentStore(simpleKVStore, namespace);
}

Expand All @@ -53,14 +45,12 @@ export class SimpleDocumentStore extends KVDocumentStore {
DEFAULT_PERSIST_DIR,
DEFAULT_DOC_STORE_PERSIST_FILENAME,
),
fs?: GenericFileSystem,
): Promise<void> {
fs = fs || defaultFS;
if (
_.isObject(this.kvStore) &&
this.kvStore instanceof BaseInMemoryKVStore
) {
await this.kvStore.persist(persistPath, fs);
await this.kvStore.persist(persistPath);
}
}

Expand Down
Loading

0 comments on commit 9e133ac

Please sign in to comment.