From 857d6aaa8368296036f73883cf9f2c82d216afd6 Mon Sep 17 00:00:00 2001 From: John Cordeiro Date: Mon, 8 Apr 2024 20:24:42 -0300 Subject: [PATCH] Remove strings lowering process --- app/handlers/content_bases.py | 2 +- app/indexer/content_bases.py | 2 +- app/loaders/loaders.py | 14 +++++++------- app/tests/test_document_loader.py | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/app/handlers/content_bases.py b/app/handlers/content_bases.py index 9155403..e493175 100644 --- a/app/handlers/content_bases.py +++ b/app/handlers/content_bases.py @@ -89,7 +89,7 @@ def delete_batch(self): def search(self, request: ContentBaseSearchRequest, Authorization: Annotated[str | None, Header()] = None): token_verification(Authorization) response = self.content_base_indexer.search( - search=request.search.lower(), + search=request.search, threshold=request.threshold, filter=request.filter ) diff --git a/app/indexer/content_bases.py b/app/indexer/content_bases.py index c14f7bf..a55511a 100644 --- a/app/indexer/content_bases.py +++ b/app/indexer/content_bases.py @@ -36,7 +36,7 @@ def index(self, texts: List, metadatas: dict): self.storage.delete(ids=ids) docs = [ - Document(page_content=text.lower(), metadata=metadatas) + Document(page_content=text, metadata=metadatas) for text in texts ] diff --git a/app/loaders/loaders.py b/app/loaders/loaders.py index 4d14af3..80644fa 100644 --- a/app/loaders/loaders.py +++ b/app/loaders/loaders.py @@ -54,7 +54,7 @@ def raw_text(self) -> str: for i, page in enumerate(pages): text = page.page_content if text: - raw_text += text.lower() + raw_text += text return raw_text @@ -85,7 +85,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]: pages = self.load() split_pages = [] for page in pages: - page_content = page.page_content.lower() + page_content = page.page_content metadatas = page.metadata metadatas.update({"full_page": page_content}) @@ -118,7 +118,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]: split_pages = [] for page in pages: - page_content = page.page_content.lower() + page_content = page.page_content metadatas = page.metadata metadatas.update({"full_page": page_content}) @@ -134,7 +134,7 @@ def raw_text(self) -> str: for i, page in enumerate(pages): text = page.page_content if text: - raw_text += text.lower() + raw_text += text return raw_text @@ -155,7 +155,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]: pages = self.load() split_pages = [] for page in pages: - page_content = page.page_content.lower() + page_content = page.page_content metadatas = page.metadata metadatas.update({"full_page": page_content}) @@ -202,7 +202,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]: pages = self.load() split_pages = [] for page in pages: - page_content = page.page_content.lower() + page_content = page.page_content metadatas = page.metadata metadatas.update({"full_page": page_content}) @@ -230,7 +230,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]: pages = self.loader.load_and_split() for page in pages: - page_content = page.page_content.lower() + page_content = page.page_content metadatas = page.metadata metadatas.update({"full_page": page_content}) diff --git a/app/tests/test_document_loader.py b/app/tests/test_document_loader.py index 469fb18..5683cf9 100644 --- a/app/tests/test_document_loader.py +++ b/app/tests/test_document_loader.py @@ -98,25 +98,25 @@ def test_load_pdf(self): file_path = f'{self.path}/{self.file_name}.pdf' data_loader = DataLoader(pdf_loader, file_path) raw_text = data_loader.raw_text() - self.assertEqual(raw_text, self.text_string.lower()) + self.assertEqual(raw_text, self.text_string) def test_load_txt(self): file_path = f'{self.path}/{self.file_name}.txt' data_loader = DataLoader(txt_loader, file_path) raw_text = data_loader.raw_text() - self.assertEqual(raw_text, self.text_string.lower()) + self.assertEqual(raw_text, self.text_string) def test_load_udocx(self): file_path = f'{self.path}/{self.file_name}.docx' data_loader = DataLoader(u_docx_loader, file_path) raw_text = data_loader.raw_text() - self.assertEqual(raw_text, self.text_string.lower()) + self.assertEqual(raw_text, self.text_string) def test_load_docx(self): file_path = f'{self.path}/{self.file_name}.docx' data_loader = DataLoader(docx_loader, file_path) raw_text = data_loader.raw_text() - self.assertEqual(raw_text, self.text_string.lower()) + self.assertEqual(raw_text, self.text_string) def test_load_xlsx(self): file_path = f'{self.path}/{self.file_name}.xlsx'