diff --git a/server/belga/search_providers.py b/server/belga/search_providers.py index bddbfeee..56378a55 100644 --- a/server/belga/search_providers.py +++ b/server/belga/search_providers.py @@ -336,7 +336,7 @@ def get_search_text(self, query): return searchText def set_highlight(self, search_text, docs): - search_text = "|".join(search_text.split()) + search_text = "|".join(re.escape(term.strip()) for term in search_text.split()) fields = ("body_html", "headline", "slugline") for doc in docs: for field in fields: diff --git a/server/tests/fixtures/belga-360archive-search.json b/server/tests/fixtures/belga-360archive-search.json index 6daf6cba..31674983 100644 --- a/server/tests/fixtures/belga-360archive-search.json +++ b/server/tests/fixtures/belga-360archive-search.json @@ -7,7 +7,7 @@ "name": "", "credit": "BELGA", "topic": "Belga 360 slugline", - "headLine": "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + "headLine": "(Lorem ipsum) dolor sit amet, consectetur adipiscing elit.", "keywords": ["BRIEF", "#CORONAVIRUS", "SPORTS", "INTERNET"], "authors": [ { diff --git a/server/tests/search_providers/belga_360_archive_test.py b/server/tests/search_providers/belga_360_archive_test.py index 94df00a3..b7031ebc 100644 --- a/server/tests/search_providers/belga_360_archive_test.py +++ b/server/tests/search_providers/belga_360_archive_test.py @@ -173,7 +173,8 @@ def test_format_list_item(self): self.assertEqual(item["guid"], guid) self.assertEqual(item["extra"]["city"], "Bruxelles") self.assertEqual( - item["headline"], "Lorem ipsum dolor sit amet, consectetur adipiscing elit." + item["headline"], + "(Lorem ipsum) dolor sit amet, consectetur adipiscing elit.", ) self.assertEqual(item["name"], "") self.assertEqual(item["slugline"], "Belga 360 slugline") @@ -400,7 +401,7 @@ def test_get_highlighted_text(self): { "headline": [ ( - 'Lorem ipsum dolor ' + '(Lorem ipsum) dolor ' "sit amet, consectetur adipiscing elit." ) ] @@ -415,7 +416,25 @@ def test_get_highlighted_text(self): { "headline": [ ( - 'Lorem ipsum dolor ' + '(Lorem ipsum) dolor ' + "sit amet, consectetur adipiscing elit." + ) + ] + }, + ) + + query["query"]["filtered"]["query"]["query_string"][ + "query" + ] = "(Lorem ipsum)" + items = self.provider.find(query) + highlighted_item = items[0] + + self.assertEqual( + highlighted_item["es_highlight"], + { + "headline": [ + ( + '(Lorem ipsum) dolor ' "sit amet, consectetur adipiscing elit." ) ]