From 9214b0669de6ab2b085855584920994ec6f8e566 Mon Sep 17 00:00:00 2001 From: Yi Ding Date: Fri, 4 Aug 2023 23:33:16 -0700 Subject: [PATCH] fix persistence bug --- .changeset/short-boats-confess.md | 5 ++++ apps/simple/persist.ts | 36 -------------------------- apps/simple/storageContext.ts | 26 ++++++++++++++----- examples/storageContext.ts | 26 ++++++++++++++----- packages/core/src/Node.ts | 25 ++++++++++++++++++ packages/core/src/indices/BaseIndex.ts | 9 +++++-- packages/core/src/storage/constants.ts | 2 +- 7 files changed, 76 insertions(+), 53 deletions(-) create mode 100644 .changeset/short-boats-confess.md delete mode 100644 apps/simple/persist.ts diff --git a/.changeset/short-boats-confess.md b/.changeset/short-boats-confess.md new file mode 100644 index 0000000000..ca16235328 --- /dev/null +++ b/.changeset/short-boats-confess.md @@ -0,0 +1,5 @@ +--- +"llamaindex": patch +--- + +Fix persistence bug (thanks @HenryHengZJ) diff --git a/apps/simple/persist.ts b/apps/simple/persist.ts deleted file mode 100644 index 5412a48029..0000000000 --- a/apps/simple/persist.ts +++ /dev/null @@ -1,36 +0,0 @@ -import fs from "fs/promises"; -import { - Document, - VectorStoreIndex, - storageContextFromDefaults, -} from "llamaindex"; - -async function main() { - // Load essay from abramov.txt in Node - const essay = await fs.readFile( - "node_modules/llamaindex/examples/abramov.txt", - "utf-8" - ); - - // Create Document object with essay - const document = new Document({ text: essay }); - - // Split text and create embeddings. Store them in a VectorStoreIndex with persistence - const storageContext = await storageContextFromDefaults({ - persistDir: "./storage", - }); - const index = await VectorStoreIndex.fromDocuments([document], { - storageContext, - }); - - // Query the index - const queryEngine = index.asQueryEngine(); - const response = await queryEngine.query( - "What did the author do in college?" - ); - - // Output response - console.log(response.toString()); -} - -main().catch(console.error); diff --git a/apps/simple/storageContext.ts b/apps/simple/storageContext.ts index 56be8010a6..cb9c7102bc 100644 --- a/apps/simple/storageContext.ts +++ b/apps/simple/storageContext.ts @@ -1,15 +1,22 @@ -import { Document, VectorStoreIndex, storageContextFromDefaults } from "llamaindex"; +import { + Document, + VectorStoreIndex, + storageContextFromDefaults, +} from "llamaindex"; import essay from "./essay"; - async function main() { // Create Document object with essay const document = new Document({ text: essay }); // Split text and create embeddings. Store them in a VectorStoreIndex // persist the vector store automatically with the storage context - const storageContext = await storageContextFromDefaults({ persistDir: "./storage" }); - const index = await VectorStoreIndex.fromDocuments([document], { storageContext }); + const storageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const index = await VectorStoreIndex.fromDocuments([document], { + storageContext, + }); // Query the index const queryEngine = index.asQueryEngine(); @@ -21,9 +28,14 @@ async function main() { console.log(response.toString()); // load the index - const loadedIndex = await VectorStoreIndex.init({ storageContext }); - const laodedQueryEngine = loadedIndex.asQueryEngine(); - const loadedResponse = await laodedQueryEngine.query( + const secondStorageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const loadedIndex = await VectorStoreIndex.init({ + storageContext: secondStorageContext, + }); + const loadedQueryEngine = loadedIndex.asQueryEngine(); + const loadedResponse = await loadedQueryEngine.query( "What did the author do growing up?" ); console.log(loadedResponse.toString()); diff --git a/examples/storageContext.ts b/examples/storageContext.ts index 56be8010a6..cb9c7102bc 100644 --- a/examples/storageContext.ts +++ b/examples/storageContext.ts @@ -1,15 +1,22 @@ -import { Document, VectorStoreIndex, storageContextFromDefaults } from "llamaindex"; +import { + Document, + VectorStoreIndex, + storageContextFromDefaults, +} from "llamaindex"; import essay from "./essay"; - async function main() { // Create Document object with essay const document = new Document({ text: essay }); // Split text and create embeddings. Store them in a VectorStoreIndex // persist the vector store automatically with the storage context - const storageContext = await storageContextFromDefaults({ persistDir: "./storage" }); - const index = await VectorStoreIndex.fromDocuments([document], { storageContext }); + const storageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const index = await VectorStoreIndex.fromDocuments([document], { + storageContext, + }); // Query the index const queryEngine = index.asQueryEngine(); @@ -21,9 +28,14 @@ async function main() { console.log(response.toString()); // load the index - const loadedIndex = await VectorStoreIndex.init({ storageContext }); - const laodedQueryEngine = loadedIndex.asQueryEngine(); - const loadedResponse = await laodedQueryEngine.query( + const secondStorageContext = await storageContextFromDefaults({ + persistDir: "./storage", + }); + const loadedIndex = await VectorStoreIndex.init({ + storageContext: secondStorageContext, + }); + const loadedQueryEngine = loadedIndex.asQueryEngine(); + const loadedResponse = await loadedQueryEngine.query( "What did the author do growing up?" ); console.log(loadedResponse.toString()); diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts index abbc58a6d5..a775990157 100644 --- a/packages/core/src/Node.ts +++ b/packages/core/src/Node.ts @@ -128,6 +128,14 @@ export abstract class BaseNode { hash: this.hash, }; } + + /** + * Used with built in JSON.stringify + * @returns + */ + toJSON(): Record { + return { ...this, type: this.getType() }; + } } /** @@ -232,6 +240,23 @@ export class Document extends TextNode { } } +export function jsonToNode(json: any) { + if (!json.type) { + throw new Error("Node type not found"); + } + + switch (json.type) { + case ObjectType.TEXT: + return new TextNode(json); + case ObjectType.INDEX: + return new IndexNode(json); + case ObjectType.DOCUMENT: + return new Document(json); + default: + throw new Error(`Invalid node type: ${json.type}`); + } +} + // export class ImageDocument extends Document { // image?: string; // } diff --git a/packages/core/src/indices/BaseIndex.ts b/packages/core/src/indices/BaseIndex.ts index f19bfcc88d..27c4e17da3 100644 --- a/packages/core/src/indices/BaseIndex.ts +++ b/packages/core/src/indices/BaseIndex.ts @@ -1,4 +1,4 @@ -import { Document, BaseNode } from "../Node"; +import { Document, BaseNode, jsonToNode } from "../Node"; import { v4 as uuidv4 } from "uuid"; import { BaseRetriever } from "../Retriever"; import { ServiceContext } from "../ServiceContext"; @@ -74,7 +74,12 @@ export function jsonToIndexStruct(json: any): IndexStruct { return indexList; } else if (json.type === IndexStructType.SIMPLE_DICT) { const indexDict = new IndexDict(json.indexId, json.summary); - indexDict.nodesDict = json.nodesDict; + indexDict.nodesDict = Object.entries(json.nodesDict).reduce< + Record + >((acc, [key, value]) => { + acc[key] = jsonToNode(value); + return acc; + }, {}); return indexDict; } else { throw new Error(`Unknown index struct type: ${json.type}`); diff --git a/packages/core/src/storage/constants.ts b/packages/core/src/storage/constants.ts index a0ded7c10f..15e87613c7 100644 --- a/packages/core/src/storage/constants.ts +++ b/packages/core/src/storage/constants.ts @@ -1,7 +1,7 @@ export const DEFAULT_COLLECTION = "data"; export const DEFAULT_PERSIST_DIR = "./storage"; export const DEFAULT_INDEX_STORE_PERSIST_FILENAME = "index_store.json"; -export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "docstore.json"; +export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "doc_store.json"; export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json"; export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json"; export const DEFAULT_NAMESPACE = "docstore";