Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration tests for match_phrase_prefix in PPL #78

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,9 @@
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

Expand All @@ -21,21 +17,100 @@ public class MatchPhrasePrefixWhereCommandIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.BANK);
loadIndex(Index.BEER);
}

@Test
public void required_parameters() throws IOException {
String query = "source = %s | WHERE match_phrase_prefix(Title, 'champagne be') | fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("Can old flat champagne be used for vinegar?"),
rows("Elder flower champagne best to use natural yeast or add a wine yeast?"));
}


@Test
public void all_optional_parameters() throws IOException {
// The values for optional parameters are valid but arbitrary.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'flat champ', boost = 1.0, " +
"zero_terms_query='ALL', max_expansions = 2, analyzer=standard, slop=0) " +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Can old flat champagne be used for vinegar?"));
}


@Test
public void max_expansions_is_3() throws IOException {
// max_expansions applies to the last term in the query -- 'bottl'
// It tells OpenSearch to consider only the first 3 terms that start with 'bottl'
// In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'.

String query = "source = %s " +
"| WHERE match_phrase_prefix(Tags, 'draught bottl', max_expansions=3) | fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("brewing draught bottling"),
rows("draught bottles"));
}

@Test
public void analyzer_english() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// This results in an empty query.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=english)" +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
assertTrue("Expect English analyzer to filter out common words 'in' and 'to'",
result.getInt("total") == 0);
}

@Test
public void analyzer_standard() throws IOException {
// Standard analyzer does not treat 'in' and 'to' as special terms.
// This results in 'to' being used as a phrase prefix given us 'Tokyo'.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=standard)" +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo"));
}

@Test
public void zero_term_query_all() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// zero_terms_query of 'ALL' causes all rows to be returned.
// ORDER BY ... LIMIT helps make the test understandable.
String query = "source = %s" +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=english, zero_terms_query='ALL') " +
"| sort -Title | head 1 | fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("was working great, now all foam"));
}


@Test
public void match_phrase_prefix_required_parameters() throws IOException {
JSONObject result = executeQuery("source=" + TEST_INDEX_BANK
+ "| where match_phrase_prefix(address, 'Quentin Str') | fields lastname");
verifyDataRows(result, rows("Mcpherson"));
public void slop_is_2() throws IOException {
// When slop is 0, the terms are matched exactly in the order specified.
// 'open' is used to match prefix of the next term.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=2) " +
"| fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("taste gas"));
}

@Test
public void match_phrase_prefix_all_parameters() throws IOException {
JSONObject result = executeQuery("source=" + TEST_INDEX_BANK
+ "| where match_phrase_prefix(address, 'Quentin Str', "
+ "boost = 1.0, zero_terms_query='ALL',max_expansions = 3, analyzer='standard') "
+ "| fields lastname");
verifyDataRows(result, rows("Mcpherson"));
public void slop_is_3() throws IOException {
// When slop is 2, results will include phrases where the query terms are transposed.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=3)" +
"| fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("taste draught gas"),
rows("taste gas"));
}
}