From 138f5223bd5f44ae162b3dd006d6dea3e3437cc0 Mon Sep 17 00:00:00 2001 From: sabaimran <65192171+sabaimran@users.noreply.github.com> Date: Tue, 20 Feb 2024 13:46:56 -0800 Subject: [PATCH] Fix process for generating embeddings for Notion entries (#648) * Fix process for generating embeddings for Notion entries * If no title field found, just log a warning and set the title to --- src/khoj/processor/content/notion/notion_to_entries.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/khoj/processor/content/notion/notion_to_entries.py b/src/khoj/processor/content/notion/notion_to_entries.py index bf9fa0fd6..2ed62fd3f 100644 --- a/src/khoj/processor/content/notion/notion_to_entries.py +++ b/src/khoj/processor/content/notion/notion_to_entries.py @@ -112,7 +112,9 @@ def process( page_entries = self.process_page(p_or_d) current_entries.extend(page_entries) - return self.update_entries_with_ids(current_entries, user) + current_entries = TextToEntries.split_entries_by_max_tokens(current_entries, max_tokens=256) + + return self.update_entries_with_ids(current_entries, user=user) def process_page(self, page): page_id = page["id"] @@ -232,8 +234,9 @@ def get_page_content(self, page_id): elif "Event" in properties: title_field = "Event" elif title_field not in properties: - logger.error(f"Page {page_id} does not have a title field") - return None, None + logger.warning(f"Title field not found for page {page_id}. Setting title as None...") + title = None + return title, content try: title = page["properties"][title_field]["title"][0]["text"]["content"] except Exception as e: