From dffdd8134514d417121aa589e87f66d8d3236277 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sun, 1 Dec 2024 22:37:57 -0800 Subject: [PATCH] Do not wrap filepath in Path to fix indexing markdown files on Windows Issue - Path with / are converted to \\ on Windows using the Path operator. - The markdown to entries method for some reason was doing this. This would store the file paths in DB entry differently than the file to entries map. Resulting in a KeyError when trying to look up the entry file path from file_to_text_map in the text_to_entries:update_embeddings() function. Fix - Removing the unnecessary OS dependendent Path normalization in markdown_to_entries should keep the file path storage consistent across file_to_text_map var, FileObjectAdaptor, Entry DB tables on Windows for Markdown files as well This issue would only affect users hosting Khoj server on Windows and attempting to index markdown files. Resolves #984 --- src/khoj/processor/content/markdown/markdown_to_entries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/content/markdown/markdown_to_entries.py b/src/khoj/processor/content/markdown/markdown_to_entries.py index c4ee03ef4..c7ed690c8 100644 --- a/src/khoj/processor/content/markdown/markdown_to_entries.py +++ b/src/khoj/processor/content/markdown/markdown_to_entries.py @@ -139,7 +139,7 @@ def convert_markdown_entries_to_maps(parsed_entries: List[str], entry_to_file_ma # Escape the URL to avoid issues with special characters entry_filename = urllib3.util.parse_url(raw_filename).url else: - entry_filename = str(Path(raw_filename)) + entry_filename = raw_filename heading = parsed_entry.splitlines()[0] if re.search(r"^#+\s", parsed_entry) else "" # Append base filename to compiled entry for context to model