From ecee090f2737e6aa902f85947e140b93bf35be57 Mon Sep 17 00:00:00 2001 From: devketanpro Date: Wed, 2 Aug 2023 16:59:03 +0530 Subject: [PATCH 1/4] update regex for boolean operators --- server/belga/search_providers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/belga/search_providers.py b/server/belga/search_providers.py index bddbfeee..56378a55 100644 --- a/server/belga/search_providers.py +++ b/server/belga/search_providers.py @@ -336,7 +336,7 @@ def get_search_text(self, query): return searchText def set_highlight(self, search_text, docs): - search_text = "|".join(search_text.split()) + search_text = "|".join(re.escape(term.strip()) for term in search_text.split()) fields = ("body_html", "headline", "slugline") for doc in docs: for field in fields: From b777cc6fe164ee39d6d453da2fc75fbb79eaaa9d Mon Sep 17 00:00:00 2001 From: devketanpro Date: Wed, 2 Aug 2023 16:59:50 +0530 Subject: [PATCH 2/4] add unit testcases --- .../fixtures/belga-360archive-search.json | 2 +- .../belga_360_archive_test.py | 20 +++++++++++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/server/tests/fixtures/belga-360archive-search.json b/server/tests/fixtures/belga-360archive-search.json index 6daf6cba..31674983 100644 --- a/server/tests/fixtures/belga-360archive-search.json +++ b/server/tests/fixtures/belga-360archive-search.json @@ -7,7 +7,7 @@ "name": "", "credit": "BELGA", "topic": "Belga 360 slugline", - "headLine": "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + "headLine": "(Lorem ipsum) dolor sit amet, consectetur adipiscing elit.", "keywords": ["BRIEF", "#CORONAVIRUS", "SPORTS", "INTERNET"], "authors": [ { diff --git a/server/tests/search_providers/belga_360_archive_test.py b/server/tests/search_providers/belga_360_archive_test.py index 94df00a3..abeb3d1b 100644 --- a/server/tests/search_providers/belga_360_archive_test.py +++ b/server/tests/search_providers/belga_360_archive_test.py @@ -400,7 +400,7 @@ def test_get_highlighted_text(self): { "headline": [ ( - 'Lorem ipsum dolor ' + '(Lorem ipsum) dolor ' "sit amet, consectetur adipiscing elit." ) ] @@ -415,7 +415,23 @@ def test_get_highlighted_text(self): { "headline": [ ( - 'Lorem ipsum dolor ' + '(Lorem ipsum) dolor ' + "sit amet, consectetur adipiscing elit." + ) + ] + }, + ) + + query["query"]["filtered"]["query"]["query_string"]["query"] = "(Lorem ipsum)" + items = self.provider.find(query) + highlighted_item = items[0] + + self.assertEqual( + highlighted_item["es_highlight"], + { + "headline": [ + ( + '(Lorem ipsum) dolor ' "sit amet, consectetur adipiscing elit." ) ] From 8c0e5614edf47849fd0241ac02af70211f30e0f7 Mon Sep 17 00:00:00 2001 From: devketanpro Date: Wed, 2 Aug 2023 17:00:18 +0530 Subject: [PATCH 3/4] fix black --- server/tests/search_providers/belga_360_archive_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/server/tests/search_providers/belga_360_archive_test.py b/server/tests/search_providers/belga_360_archive_test.py index abeb3d1b..6110c145 100644 --- a/server/tests/search_providers/belga_360_archive_test.py +++ b/server/tests/search_providers/belga_360_archive_test.py @@ -421,11 +421,13 @@ def test_get_highlighted_text(self): ] }, ) - - query["query"]["filtered"]["query"]["query_string"]["query"] = "(Lorem ipsum)" + + query["query"]["filtered"]["query"]["query_string"][ + "query" + ] = "(Lorem ipsum)" items = self.provider.find(query) highlighted_item = items[0] - + self.assertEqual( highlighted_item["es_highlight"], { From e0255cd4d35b31c4868428b9d0eb6636292135e9 Mon Sep 17 00:00:00 2001 From: devketanpro Date: Wed, 2 Aug 2023 17:13:12 +0530 Subject: [PATCH 4/4] fix testcases --- server/tests/search_providers/belga_360_archive_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/tests/search_providers/belga_360_archive_test.py b/server/tests/search_providers/belga_360_archive_test.py index 6110c145..b7031ebc 100644 --- a/server/tests/search_providers/belga_360_archive_test.py +++ b/server/tests/search_providers/belga_360_archive_test.py @@ -173,7 +173,8 @@ def test_format_list_item(self): self.assertEqual(item["guid"], guid) self.assertEqual(item["extra"]["city"], "Bruxelles") self.assertEqual( - item["headline"], "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + item["headline"], + "(Lorem ipsum) dolor sit amet, consectetur adipiscing elit.", ) self.assertEqual(item["name"], "") self.assertEqual(item["slugline"], "Belga 360 slugline")