From 90dd1cb3f0981883ff3f40847c3120c61f996710 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 25 Sep 2024 16:12:37 +0400 Subject: [PATCH 1/3] Enhanced test book descriptions with xapian keywords Added to test book descriptions words that serve as keywords for query syntax with boolean operators (or, and, not, xor, near, adj) enabled. Note that the change in indexed text has lead to the change in the order of returned results. --- test/data/library.xml | 6 +++--- test/library_server.cpp | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/test/data/library.xml b/test/data/library.xml index 1c1fe4886..8508b97f3 100644 --- a/test/data/library.xml +++ b/test/data/library.xml @@ -4,7 +4,7 @@ path="./zimfile_raycharles.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles.zim" title="Ray Charles" - description="Wikipedia articles about Ray Charles" + description="Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)" language="eng" creator="Wikipedia" publisher="Kiwix" @@ -22,7 +22,7 @@ path="./zimfile_raycharles_uncategorized.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim" title="Ray (uncategorized) Charles" - description="No category is assigned to this library entry." + description="No category is assigned to this library entry (neither adj nor xor was considered a good option)" language="rus,eng" creator="Wikipedia" publisher="Kiwix" @@ -39,7 +39,7 @@ path="./zimfile&other.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile%26other.zim" title="Charles, Ray" - description="Wikipedia articles about Ray Charles" + description="Wikipedia articles about Ray Charles or why and when one should go to library" language="fra" creator="Wikipedia" publisher="Kiwix" diff --git a/test/library_server.cpp b/test/library_server.cpp index 2d52e5c15..aa5400c32 100644 --- a/test/library_server.cpp +++ b/test/library_server.cpp @@ -103,7 +103,7 @@ std::string maskVariableOPDSFeedData(std::string s) #define _CHARLES_RAY_CATALOG_ENTRY(CONTENT_NAME) CATALOG_ENTRY( \ "charlesray", \ "Charles, Ray", \ - "Wikipedia articles about Ray Charles", \ + "Wikipedia articles about Ray Charles or why and when one should go to library", \ "fra", \ "wikipedia_fr_ray_charles",\ "jazz",\ @@ -120,7 +120,7 @@ std::string maskVariableOPDSFeedData(std::string s) #define _RAY_CHARLES_CATALOG_ENTRY(CONTENT_NAME) CATALOG_ENTRY(\ "raycharles",\ "Ray Charles",\ - "Wikipedia articles about Ray Charles",\ + "Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)",\ "eng",\ "wikipedia_en_ray_charles",\ "wikipedia",\ @@ -139,7 +139,7 @@ std::string maskVariableOPDSFeedData(std::string s) #define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\ "raycharles_uncategorized",\ "Ray (uncategorized) Charles",\ - "No category is assigned to this library entry.",\ + "No category is assigned to this library entry (neither adj nor xor was considered a good option)",\ "rus,eng",\ "wikipedia_ru_ray_charles",\ "",\ @@ -199,8 +199,8 @@ TEST_F(LibraryServerTest, catalog_search_by_phrase) " 0\n" " 2\n" CATALOG_LINK_TAGS - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -218,8 +218,8 @@ TEST_F(LibraryServerTest, catalog_search_by_words) " 0\n" " 3\n" CATALOG_LINK_TAGS - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY "\n" ); @@ -239,8 +239,8 @@ TEST_F(LibraryServerTest, catalog_prefix_search) " 0\n" " 2\n" CATALOG_LINK_TAGS - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -275,8 +275,8 @@ TEST_F(LibraryServerTest, catalog_search_with_word_exclusion) " 0\n" " 2\n" CATALOG_LINK_TAGS - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -331,8 +331,8 @@ TEST_F(LibraryServerTest, catalog_search_by_category) " 0\n" " 2\n" CATALOG_LINK_TAGS - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -772,8 +772,8 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_search_terms) " 2\n" " 0\n" " 2\n" - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -841,8 +841,8 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_category) " 2\n" " 0\n" " 2\n" - RAY_CHARLES_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -1086,7 +1086,7 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access) "
\n" \ "
Charles, Ray
\n" \ "
\n" \ - "
Wikipedia articles about Ray Charles
\n" \ + "
Wikipedia articles about Ray Charles or why and when one should go to library
\n" \ " \n" \ " \n" \ "
\n" \ @@ -1113,7 +1113,7 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access) "
\n" \ "
Ray Charles
\n" \ "
\n" \ - "
Wikipedia articles about Ray Charles
\n" \ + "
Wikipedia articles about Ray Charles (not all of them but near to what an average newborn may find more than enough)
\n" \ "
\n" \ " \n" \ "
\n" \ @@ -1140,7 +1140,7 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access) "
\n" \ "
Ray (uncategorized) Charles
\n" \ "
\n" \ - "
No category is assigned to this library entry.
\n" \ + "
No category is assigned to this library entry (neither adj nor xor was considered a good option)
\n" \ "
\n" \ " \n" \ "
\n" \ From de64a5a724bafeb7d0b6d23ccfa64b14f5ffab84 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 25 Sep 2024 17:25:17 +0400 Subject: [PATCH 2/3] Testing of Xapian query operators in catalog search --- test/library_server.cpp | 124 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/test/library_server.cpp b/test/library_server.cpp index aa5400c32..e5cc2bd22 100644 --- a/test/library_server.cpp +++ b/test/library_server.cpp @@ -778,6 +778,130 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_search_terms) ); } +TEST_F(LibraryServerTest, catalog_v2_entries_filtering_special_queries) +{ + { + // 'or' acts as a Xapian boolean operator, resulting in malformed query + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=Or"); + EXPECT_EQ(r->status, 500); + } + + { + // 'and' acts as a Xapian boolean operator, resulting in malformed query + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=and"); + EXPECT_EQ(r->status, 500); + } + + { + // 'not' acts as a Xapian boolean operator, resulting in malformed query + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=not"); + EXPECT_EQ(r->status, 500); + } + + { + // 'xor' acts as a Xapian boolean operator, resulting in malformed query + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=xor"); + EXPECT_EQ(r->status, 500); + } + + { + // 'or' acts as a Xapian boolean operator + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=wikipedia%20or%20library"); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=wikipedia%20or%20library") + " Filtered Entries (q=wikipedia%20or%20library)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 3\n" + " 0\n" + " 3\n" + UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY + CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY + "\n" + ); + } + + { + // 'and' acts as a Xapian boolean operator + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=wikipedia%20and%20articles"); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=wikipedia%20and%20articles") + " Filtered Entries (q=wikipedia%20and%20articles)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 2\n" + " 0\n" + " 2\n" + CHARLES_RAY_CATALOG_ENTRY + RAY_CHARLES_CATALOG_ENTRY + "\n" + ); + } + + { + // 'near' doesn't act as a Xapian query operator + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=near"); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=near") + " Filtered Entries (q=near)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 1\n" + " 0\n" + " 1\n" + RAY_CHARLES_CATALOG_ENTRY + "\n" + ); + } + + { + // 'adj' doesn't act as a Xapian query operator + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=adj"); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=adj") + " Filtered Entries (q=adj)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 1\n" + " 0\n" + " 1\n" + UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY + "\n" + ); + } + + { + // 'near' doesn't act as a Xapian query operator + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=charles%20near%20why"); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=charles%20near%20why") + " Filtered Entries (q=charles%20near%20why)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 0\n" + " 0\n" + " 0\n" + "\n" + ); + } + + { + // 'adj' doesn't act as a Xapian query operator + const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=charles%20adj%20why"); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=charles%20adj%20why") + " Filtered Entries (q=charles%20adj%20why)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 0\n" + " 0\n" + " 0\n" + "\n" + ); + } +} + TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language) { { From 01bda6b2c0818becef3819aa6333344bbb8ee9fb Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 25 Sep 2024 17:32:39 +0400 Subject: [PATCH 3/3] Disabled Xapian boolean operators in catalog query --- src/library.cpp | 2 -- test/library_server.cpp | 67 ++++++++++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 038052a36..d3e6333e1 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -645,8 +645,6 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) //queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???))); //queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); const auto flags = Xapian::QueryParser::FLAG_PHRASE - | Xapian::QueryParser::FLAG_BOOLEAN - | Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD | partialQueryFlag; diff --git a/test/library_server.cpp b/test/library_server.cpp index e5cc2bd22..5159ffb3a 100644 --- a/test/library_server.cpp +++ b/test/library_server.cpp @@ -781,27 +781,67 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_search_terms) TEST_F(LibraryServerTest, catalog_v2_entries_filtering_special_queries) { { - // 'or' acts as a Xapian boolean operator, resulting in malformed query + // 'or' doesn't act as a Xapian boolean operator const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=Or"); - EXPECT_EQ(r->status, 500); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=Or") + " Filtered Entries (q=Or)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 1\n" + " 0\n" + " 1\n" + CHARLES_RAY_CATALOG_ENTRY + "\n" + ); } { - // 'and' acts as a Xapian boolean operator, resulting in malformed query + // 'and' doesn't act as a Xapian boolean operator const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=and"); - EXPECT_EQ(r->status, 500); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=and") + " Filtered Entries (q=and)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 1\n" + " 0\n" + " 1\n" + CHARLES_RAY_CATALOG_ENTRY + "\n" + ); } { - // 'not' acts as a Xapian boolean operator, resulting in malformed query + // 'not' doesn't act as a Xapian boolean operator const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=not"); - EXPECT_EQ(r->status, 500); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=not") + " Filtered Entries (q=not)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 1\n" + " 0\n" + " 1\n" + RAY_CHARLES_CATALOG_ENTRY + "\n" + ); } { - // 'xor' acts as a Xapian boolean operator, resulting in malformed query + // 'xor' doesn't act as a Xapian boolean operator const auto r = zfs1_->GET("/ROOT%23%3F/catalog/v2/entries?q=xor"); - EXPECT_EQ(r->status, 500); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + CATALOG_V2_ENTRIES_PREAMBLE("?q=xor") + " Filtered Entries (q=xor)\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 1\n" + " 0\n" + " 1\n" + UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY + "\n" + ); } { @@ -812,12 +852,10 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtering_special_queries) CATALOG_V2_ENTRIES_PREAMBLE("?q=wikipedia%20or%20library") " Filtered Entries (q=wikipedia%20or%20library)\n" " YYYY-MM-DDThh:mm:ssZ\n" - " 3\n" + " 1\n" " 0\n" - " 3\n" - UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY + " 1\n" CHARLES_RAY_CATALOG_ENTRY - RAY_CHARLES_CATALOG_ENTRY "\n" ); } @@ -830,11 +868,10 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtering_special_queries) CATALOG_V2_ENTRIES_PREAMBLE("?q=wikipedia%20and%20articles") " Filtered Entries (q=wikipedia%20and%20articles)\n" " YYYY-MM-DDThh:mm:ssZ\n" - " 2\n" + " 1\n" " 0\n" - " 2\n" + " 1\n" CHARLES_RAY_CATALOG_ENTRY - RAY_CHARLES_CATALOG_ENTRY "\n" ); }