diff --git a/query/autocomplete.js b/query/autocomplete.js index f7ec02fdf..1ee4cb99e 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -10,12 +10,15 @@ const toSingleField = require('./view/helper').toSingleField; var views = { custom_boosts: require('./view/boost_sources_and_layers'), ngrams_strict: require('./view/ngrams_strict'), + ngrams_fuzzy: require('./view/ngrams_fuzzy'), ngrams_last_token_only: require('./view/ngrams_last_token_only'), ngrams_last_token_only_multi: require('./view/ngrams_last_token_only_multi'), admin_multi_match_first: require('./view/admin_multi_match_first'), admin_multi_match_last: require('./view/admin_multi_match_last'), phrase_first_tokens_only: require('./view/phrase_first_tokens_only'), boost_exact_matches: require('./view/boost_exact_matches'), + boost_exact_match_first_tokens_only: require('./view/boost_exact_match_first_tokens_only'), + boost_exact_match_last_tokens_only: require('./view/boost_exact_match_last_tokens_only'), max_character_count_layer_filter: require('./view/max_character_count_layer_filter'), focus_point_filter: require('./view/focus_point_distance_filter') }; @@ -49,6 +52,8 @@ query.score( views.admin_multi_match_first( adminFields ), 'must'); query.score( views.admin_multi_match_last( adminFields ), 'must'); // scoring boost +query.score( views.boost_exact_match_first_tokens_only ); +query.score( views.boost_exact_match_last_tokens_only ); query.score( peliasQuery.view.focus( peliasQuery.view.leaf.match_all ) ); query.score( peliasQuery.view.popularity( peliasQuery.view.leaf.match_all ) ); query.score( peliasQuery.view.population( peliasQuery.view.leaf.match_all ) ); @@ -175,6 +180,14 @@ function generateQuery( clean ){ textParser( clean, vs ); } + if (clean.fuzziness) { + vs.var('fuzzy:fuzziness', clean.fuzziness); + + if (clean.max_expansions) { + vs.var('fuzzy:max_expansions', clean.max_expansions); + } + } + // set the 'add_name_to_multimatch' variable only in the case where one // or more of the admin variables are set. // the value 'enabled' is not relevant, it just needs to be any non-empty diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index 3c634c891..55636566b 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -60,8 +60,10 @@ module.exports = _.merge({}, peliasQuery.defaults, { // generic multi_match config 'multi_match:type': 'cross_fields', 'multi_match:ngrams_strict:type': 'phrase', + 'multi_match:ngrams_fuzzy:operator': 'and', 'multi_match:first_tokens_only:type': 'phrase', 'multi_match:boost_exact_matches:type': 'phrase', + 'multi_match:first_tokens_only_fuzzy:operator': 'and', // setting 'cutoff_frequency' will result in very common // terms such as country not scoring at all @@ -150,5 +152,9 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost 'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score) 'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost - 'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section + 'custom:boosting:boost_mode': 'multiply', // this mode is not relevant because there is no query section + + 'fuzzy:fuzziness': 0, + 'fuzzy:max_expansions': 10, + 'fuzzy:prefix_length': 1 }); diff --git a/query/view/boost_exact_match_first_tokens_only.js b/query/view/boost_exact_match_first_tokens_only.js new file mode 100644 index 000000000..634800df9 --- /dev/null +++ b/query/view/boost_exact_match_first_tokens_only.js @@ -0,0 +1,17 @@ +const peliasQuery = require('pelias-query'); +const phrase_first_tokens_only = require('../view/phrase_first_tokens_only'); + +/** + This view is used to boost "exact" matches on first tokens when doing fuzzy queries. + **/ +module.exports = function (vs) { + const fuzziness = vs.var('fuzzy:fuzziness').get(); + if (!fuzziness) { + return null; + } + + var vsCopy = new peliasQuery.Vars( vs.export() ); + vsCopy.var('fuzzy:fuzziness', 0); + + return phrase_first_tokens_only(vsCopy); +}; diff --git a/query/view/boost_exact_match_last_tokens_only.js b/query/view/boost_exact_match_last_tokens_only.js new file mode 100644 index 000000000..8e16c4fa8 --- /dev/null +++ b/query/view/boost_exact_match_last_tokens_only.js @@ -0,0 +1,34 @@ +const peliasQuery = require('pelias-query'); +const ngrams_last_token_only = require('./ngrams_last_token_only'); + +/** + This view is used to boost "exact" matches on last tokens when doing fuzzy queries. + **/ +module.exports = function (vs) { + const fuzziness = vs.var('fuzzy:fuzziness').get(); + if (!fuzziness) { + return null; + } + + var vsCopy = new peliasQuery.Vars( vs.export() ); + vsCopy.var('fuzzy:fuzziness', 0); + + // return the simple view for address queries + if( vsCopy.isset('input:street') ){ return ngrams_last_token_only(vsCopy); } + + // get a copy of the *tokens_incomplete* tokens produced from the input:name + var tokens = vsCopy.var('input:name:tokens_incomplete').get(); + + // no valid tokens to use, fail now, don't render this view. + if (!tokens || tokens.length < 1) { return null; } + + // return the simple view for queries with no complete tokens + var complete_tokens = vsCopy.var('input:name:tokens_complete').get(); + if (!complete_tokens || complete_tokens.length < 1) { return ngrams_last_token_only(vsCopy); } + + // return the simple view when every complete token is numeric + var all_complete_tokens_numeric = complete_tokens.every(token => !token.replace(/[0-9]/g, '').length); + if (all_complete_tokens_numeric) { return ngrams_last_token_only(vsCopy); } + + return null; +}; diff --git a/query/view/boost_exact_matches.js b/query/view/boost_exact_matches.js index 39759fe77..71f4e08e7 100644 --- a/query/view/boost_exact_matches.js +++ b/query/view/boost_exact_matches.js @@ -28,11 +28,11 @@ module.exports = function( vs ){ // set 'input' to be only the fully completed characters vs.var(`multi_match:${view_name}:input`).set( tokens.join(' ') ); - vs.var(`multi_match:${view_name}:fields`).set(toMultiFields(searchDefaults['phrase:field'], vs.var('lang').get())); + vs.var(`multi_match:${view_name}:fields`).set(toMultiFields(vs.var('phrase:field').get(), vs.var('lang').get())); - vs.var(`multi_match:${view_name}:analyzer`).set(searchDefaults['phrase:analyzer']); + vs.var(`multi_match:${view_name}:analyzer`).set(vs.var('phrase:analyzer').get()); vs.var(`multi_match:${view_name}:boost`).set(vs.var('phrase:boost').get()); vs.var(`multi_match:${view_name}:slop`).set(vs.var('phrase:slop').get()); - return peliasQuery.view.leaf.match_phrase(view_name)( vs ); + return peliasQuery.view.leaf.multi_match(view_name) (vs); }; diff --git a/query/view/ngrams_fuzzy.js b/query/view/ngrams_fuzzy.js new file mode 100644 index 000000000..f27031aad --- /dev/null +++ b/query/view/ngrams_fuzzy.js @@ -0,0 +1,20 @@ +const peliasQuery = require('pelias-query'); +const toMultiFields = require('./helper').toMultiFields; + +/** + Ngrams view with fuzziness + **/ + +module.exports = function (vs) { + vs.var('multi_match:ngrams_fuzzy:input', vs.var('input:name').get()); + vs.var('multi_match:ngrams_fuzzy:fields', toMultiFields(vs.var('ngram:field').get(), vs.var('lang').get())); + + vs.var('multi_match:ngrams_fuzzy:analyzer', vs.var('ngram:analyzer').get()); + vs.var('multi_match:ngrams_fuzzy:boost', vs.var('ngram:boost').get()); + + vs.var('multi_match:ngrams_fuzzy:fuzziness', vs.var('fuzzy:fuzziness').get()); + vs.var('multi_match:ngrams_fuzzy:max_expansions', vs.var('fuzzy:max_expansions').get()); + vs.var('multi_match:ngrams_fuzzy:prefix_length', vs.var('fuzzy:prefix_length').get()); + + return peliasQuery.view.leaf.multi_match('ngrams_fuzzy')(vs); +}; diff --git a/query/view/ngrams_last_token_only.js b/query/view/ngrams_last_token_only.js index 331418d95..98b86d111 100644 --- a/query/view/ngrams_last_token_only.js +++ b/query/view/ngrams_last_token_only.js @@ -1,5 +1,6 @@ var peliasQuery = require('pelias-query'), - ngrams_strict = require('./ngrams_strict'); + ngrams_strict = require('./ngrams_strict'), + ngrams_fuzzy = require('./ngrams_fuzzy'); /** Ngrams view which trims the 'input:name' and only uses the LAST TOKEN. @@ -25,10 +26,12 @@ module.exports = function( vs ){ // set the 'name' variable in the copy to only the last token vsCopy.var('input:name').set( tokens.join(' ') ); + const fuzziness = vs.var('fuzzy:fuzziness').get(); + // return the view rendered using the copy return { 'constant_score': { - 'filter': ngrams_strict( vsCopy ) + 'filter': fuzziness === 0 ? ngrams_strict( vsCopy ) : ngrams_fuzzy( vsCopy ) } }; }; diff --git a/query/view/phrase_first_tokens_only.js b/query/view/phrase_first_tokens_only.js index f68f15aaa..ed6e5fc3b 100644 --- a/query/view/phrase_first_tokens_only.js +++ b/query/view/phrase_first_tokens_only.js @@ -9,7 +9,9 @@ const toMultiFields = require('./helper').toMultiFields; **/ module.exports = function( vs ){ - const view_name = 'first_tokens_only'; + const fuzziness = vs.var('fuzzy:fuzziness').get(); + + const view_name = fuzziness ? 'first_tokens_only_fuzzy' : 'first_tokens_only'; // get a copy of the *complete* tokens produced from the input:name const tokens = vs.var('input:name:tokens_complete').get(); @@ -22,7 +24,14 @@ module.exports = function( vs ){ vs.var(`multi_match:${view_name}:analyzer`).set(vs.var('phrase:analyzer').get()); vs.var(`multi_match:${view_name}:boost`).set(vs.var('phrase:boost').get()); - vs.var(`multi_match:${view_name}:slop`).set(vs.var('phrase:slop').get()); + + if (fuzziness === 0) { + vs.var(`multi_match:${view_name}:slop`).set(vs.var('phrase:slop').get()); + } else { + vs.var(`multi_match:${view_name}:fuzziness`).set(fuzziness); + vs.var(`multi_match:${view_name}:max_expansions`).set(vs.var('fuzzy:max_expansions').get()); + vs.var(`multi_match:${view_name}:prefix_length`).set(vs.var('fuzzy:prefix_length').get()); + } return peliasQuery.view.leaf.multi_match(view_name)( vs ); }; diff --git a/sanitizer/_fuzziness.js b/sanitizer/_fuzziness.js new file mode 100644 index 000000000..e407fa984 --- /dev/null +++ b/sanitizer/_fuzziness.js @@ -0,0 +1,46 @@ +const _ = require('lodash'); + +function _sanitize( raw, clean ){ + const IS_NUMERIC_REGEXP = /^\d+$/; + + // error & warning messages + var messages = { errors: [], warnings: [] }; + + if (_.isUndefined(raw)) { + return messages; + } + + if (_.has(raw, 'fuzziness')) { + if (raw.fuzziness === 'AUTO') { + clean.fuzziness = raw.fuzziness; + } else if (IS_NUMERIC_REGEXP.test(raw.fuzziness) && parseInt(raw.fuzziness) > 0 && parseInt(raw.fuzziness) <= 2) { + clean.fuzziness = parseInt(raw.fuzziness); + } else { + messages.errors.push('invalid value for fuzziness; valid values are 1, 2 and AUTO'); + } + + if (_.has(raw, 'max_expansions')) { + if (IS_NUMERIC_REGEXP.test(raw.max_expansions) && + parseInt(raw.max_expansions) >= 0 && + parseInt(raw.max_expansions) <= 50) { + + clean.max_expansions = parseInt(raw.max_expansions); + } else { + messages.errors.push('invalid value for max_expansions; valid values are between 0 and 50'); + } + } + } + + return messages; +} + +function _expected() { + return [ + { name: 'fuzziness' }, + { name: 'max_expansions' }]; +} + +module.exports = () => ({ + sanitize: _sanitize, + expected: _expected +}); diff --git a/sanitizer/autocomplete.js b/sanitizer/autocomplete.js index 9a6395c72..a1514efbc 100644 --- a/sanitizer/autocomplete.js +++ b/sanitizer/autocomplete.js @@ -20,8 +20,9 @@ module.exports.middleware = (_api_pelias_config) => { boundary_country: require('../sanitizer/_boundary_country')(), categories: require('../sanitizer/_categories')(), request_language: require('../sanitizer/_request_language')(), - boundary_gid: require('../sanitizer/_boundary_gid')() - }; + boundary_gid: require('../sanitizer/_boundary_gid')(), + fuzziness: require('../sanitizer/_fuzziness')() + }; return ( req, res, next ) => { sanitizeAll.runAllChecks(req, sanitizers); diff --git a/test/unit/fixture/autocomplete_boundary_country_fuzzy.js b/test/unit/fixture/autocomplete_boundary_country_fuzzy.js new file mode 100644 index 000000000..b3b1d2827 --- /dev/null +++ b/test/unit/fixture/autocomplete_boundary_country_fuzzy.js @@ -0,0 +1,69 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'match': { + 'parent.country_a.ngram': { + 'analyzer': 'standard', + 'query': 'ABC' + } + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_boundary_gid_fuzzy.js b/test/unit/fixture/autocomplete_boundary_gid_fuzzy.js new file mode 100644 index 000000000..b614c63a9 --- /dev/null +++ b/test/unit/fixture/autocomplete_boundary_gid_fuzzy.js @@ -0,0 +1,67 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'multi_match': { + 'fields': ['parent.*_id'], + 'query': '123' + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco_fuzzy.js new file mode 100644 index 000000000..484616153 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_bbox_san_francisco_fuzzy.js @@ -0,0 +1,72 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'geo_bounding_box': { + 'type': 'indexed', + 'center_point': { + 'top': 37.83239, + 'right': -122.35698, + 'bottom': 37.70808, + 'left': -122.51489 + } + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_circle_san_francisco_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_circle_san_francisco_fuzzy.js new file mode 100644 index 000000000..c6b1df6ef --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_circle_san_francisco_fuzzy.js @@ -0,0 +1,88 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [ + { + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + } + ], + 'should': [ + { + 'function_score': { + 'query': { + 'match_all': { + + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + } + ], + 'score_mode': 'first', + 'boost_mode': 'replace' + } + }, + { + 'function_score': { + 'query': { + 'match_all': { + + } + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + } + ], + 'score_mode': 'first', + 'boost_mode': 'replace' + } + } + ], + 'filter': [ + { + 'geo_distance': { + 'distance': '20km', + 'distance_type': 'plane', + 'center_point': { + 'lat': 37.83239, + 'lon': -122.35698 + } + } + } + ] + } + }, + 'size': 20, + 'track_scores': true, + 'sort': [ + '_score' + ] +}; diff --git a/test/unit/fixture/autocomplete_linguistic_final_token_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_final_token_fuzzy.js new file mode 100644 index 000000000..5998a29e3 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_final_token_fuzzy.js @@ -0,0 +1,71 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'multi_match': { + 'fields': ['phrase.default', 'phrase.en'], + 'analyzer': 'peliasQuery', + 'query': 'one', + 'boost': 1, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1, + } + }], + 'should':[ + { + 'multi_match': { + 'type': 'phrase', + 'query': 'one', + 'fields': [ + 'phrase.default', + 'phrase.en' + ], + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3 + } + }, + { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_focus_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_focus_fuzzy.js new file mode 100644 index 000000000..8602f78f2 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_focus_fuzzy.js @@ -0,0 +1,125 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should': [{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'functions': [{ + 'exp': { + 'center_point': { + 'origin': { + 'lat': 29.49136, + 'lon': -82.50622 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 + } + }, + 'weight': 15 + }], + 'score_mode': 'avg', + 'boost_mode': 'replace' + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'bool': { + 'minimum_should_match': 1, + 'should': [ + { + 'terms': { + 'layer': [ + 'venue', + 'country', + 'macroregion', + 'region', + 'county', + 'localadmin', + 'locality', + 'borough', + 'neighbourhood', + 'continent', + 'empire', + 'dependency', + 'macrocounty', + 'macrohood', + 'microhood', + 'disputed', + 'postalcode', + 'ocean', + 'marinearea' + ] + } + }, + { + 'geo_distance': { + 'distance': '600km', + 'distance_type': 'plane', + 'center_point': { + 'lat': 29.49136, + 'lon': -82.50622 + } + } + } + ] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_focus_null_island_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_focus_null_island_fuzzy.js new file mode 100644 index 000000000..137cad3ed --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_focus_null_island_fuzzy.js @@ -0,0 +1,123 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should': [{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'functions': [{ + 'exp': { + 'center_point': { + 'origin': { + 'lat': 0, + 'lon': 0 + }, + 'offset': '0km', + 'scale': '50km', + 'decay': 0.5 + } + }, + 'weight': 15 + }], + 'score_mode': 'avg', + 'boost_mode': 'replace' + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'bool': { + 'minimum_should_match': 1, + 'should': [{ + 'terms': { + 'layer': [ + 'venue', + 'country', + 'macroregion', + 'region', + 'county', + 'localadmin', + 'locality', + 'borough', + 'neighbourhood', + 'continent', + 'empire', + 'dependency', + 'macrocounty', + 'macrohood', + 'microhood', + 'disputed', + 'postalcode', + 'ocean', + 'marinearea' + ] + } + }, + { + 'geo_distance': { + 'distance': '600km', + 'distance_type': 'plane', + 'center_point': { + 'lat': 0, + 'lon': 0 + } + } + }] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric_fuzzy.js new file mode 100644 index 000000000..74290d7f4 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric_fuzzy.js @@ -0,0 +1,88 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'multi_match': { + 'fields': ['phrase.default', 'phrase.en'], + 'analyzer': 'peliasQuery', + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 2, + 'max_expansions': 30, + 'prefix_length': 1, + 'boost': 1, + 'query': '1 2' + } + }, + { + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'three', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 2, + 'max_expansions': 30, + 'prefix_length': 1 + } + } + } + }], + 'should': [ + { + 'multi_match': { + 'type': 'phrase', + 'query': '1 2', + 'fields': [ + 'phrase.default', + 'phrase.en' + ], + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3 + } + }, + { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + }, { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }] + } + }, + 'sort': ['_score'], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_fuzzy.js new file mode 100644 index 000000000..e6cd4a7cb --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_fuzzy.js @@ -0,0 +1,98 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'multi_match': { + 'fields': ['phrase.default', 'phrase.en'], + 'analyzer': 'peliasQuery', + 'query': 'one two', + 'boost': 1, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 'auto', + 'max_expansions': 10, + 'prefix_length': 1 + } + }, + { + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': [ + 'parent.country.ngram^1', + 'parent.dependency.ngram^1', + 'parent.macroregion.ngram^1', + 'parent.region.ngram^1', + 'parent.county.ngram^1', + 'parent.localadmin.ngram^1', + 'parent.locality.ngram^1', + 'parent.borough.ngram^1', + 'parent.neighbourhood.ngram^1', + 'parent.locality_a.ngram^1', + 'parent.region_a.ngram^1', + 'parent.country_a.ngram^1', + 'name.default^1.5', + 'name.en^1.5' + ], + 'query': 'three', + 'analyzer': 'peliasQuery', + 'type': 'cross_fields' + } + } + } + }], + 'should':[ + { + 'multi_match': { + 'type': 'phrase', + 'query': 'one two', + 'fields': [ + 'phrase.default', + 'phrase.en' + ], + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3 + } + }, + { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_one_char_token_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_one_char_token_fuzzy.js new file mode 100644 index 000000000..6dec7f413 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_one_char_token_fuzzy.js @@ -0,0 +1,87 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 't', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'terms': { + 'layer': [ + 'venue', + 'street', + 'country', + 'macroregion', + 'region', + 'county', + 'localadmin', + 'locality', + 'borough', + 'neighbourhood', + 'continent', + 'empire', + 'dependency', + 'macrocounty', + 'macrohood', + 'microhood', + 'disputed', + 'postalcode', + 'ocean', + 'marinearea' + ] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_only_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_only_fuzzy.js new file mode 100644 index 000000000..550b07aa8 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_only_fuzzy.js @@ -0,0 +1,61 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'fuzziness': 1, + 'max_expansions': 20, + 'prefix_length': 1, + 'operator': 'and' + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_three_char_token_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_three_char_token_fuzzy.js new file mode 100644 index 000000000..49fd45041 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_three_char_token_fuzzy.js @@ -0,0 +1,61 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'tes', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_two_char_token_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_two_char_token_fuzzy.js new file mode 100644 index 000000000..d7b636948 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_two_char_token_fuzzy.js @@ -0,0 +1,87 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'te', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'terms': { + 'layer': [ + 'venue', + 'street', + 'country', + 'macroregion', + 'region', + 'county', + 'localadmin', + 'locality', + 'borough', + 'neighbourhood', + 'continent', + 'empire', + 'dependency', + 'macrocounty', + 'macrohood', + 'microhood', + 'disputed', + 'postalcode', + 'ocean', + 'marinearea' + ] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin_fuzzy.js b/test/unit/fixture/autocomplete_linguistic_with_admin_fuzzy.js new file mode 100644 index 000000000..87209c2b1 --- /dev/null +++ b/test/unit/fixture/autocomplete_linguistic_with_admin_fuzzy.js @@ -0,0 +1,104 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [ + { + 'multi_match': { + 'fields': ['phrase.default', 'phrase.en'], + 'analyzer': 'peliasQuery', + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1, + 'boost': 1, + 'query': 'one two' + } + }, + { + 'multi_match': { + 'fields': [ + 'parent.country.ngram^1', + 'parent.dependency.ngram^1', + 'parent.macroregion.ngram^1', + 'parent.region.ngram^1', + 'parent.county.ngram^1', + 'parent.localadmin.ngram^1', + 'parent.locality.ngram^1', + 'parent.borough.ngram^1', + 'parent.neighbourhood.ngram^1', + 'parent.locality_a.ngram^1', + 'parent.region_a.ngram^1', + 'parent.country_a.ngram^1', + 'name.default^1.5', + 'name.en^1.5' + ], + 'query': 'three', + 'analyzer': 'peliasAdmin', + 'type': 'cross_fields' + } + } + ], + 'should': [ + { + 'multi_match': { + 'type': 'phrase', + 'query': 'one two', + 'fields': [ + 'phrase.default', + 'phrase.en' + ], + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3 + } + }, + { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + } + ], + 'score_mode': 'first', + 'boost_mode': 'replace' + } + }, + { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'functions': [ + { + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + } + ], + 'score_mode': 'first', + 'boost_mode': 'replace' + } + } + ] + } + }, + 'size': 20, + 'track_scores': true, + 'sort': [ + '_score' + ] +}; diff --git a/test/unit/fixture/autocomplete_single_character_street_fuzzy.js b/test/unit/fixture/autocomplete_single_character_street_fuzzy.js new file mode 100644 index 000000000..78a3a7e79 --- /dev/null +++ b/test/unit/fixture/autocomplete_single_character_street_fuzzy.js @@ -0,0 +1,91 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'multi_match': { + 'fields': ['phrase.default', 'phrase.en'], + 'analyzer': 'peliasQuery', + 'query': 'k road', + 'boost': 1, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + }, { + 'multi_match': { + 'fields': [ + 'parent.country.ngram^1', + 'parent.dependency.ngram^1', + 'parent.macroregion.ngram^1', + 'parent.region.ngram^1', + 'parent.county.ngram^1', + 'parent.localadmin.ngram^1', + 'parent.locality.ngram^1', + 'parent.borough.ngram^1', + 'parent.neighbourhood.ngram^1', + 'parent.locality_a.ngram^1', + 'parent.region_a.ngram^1', + 'parent.country_a.ngram^1', + 'name.default^1.5', + 'name.en^1.5' + ], + 'query': 'laird', + 'analyzer': 'peliasAdmin', + 'type': 'cross_fields' + } + }], + 'should':[{ + 'multi_match': { + 'type': 'phrase', + 'query': 'k road', + 'fields': [ + 'phrase.default', + 'phrase.en' + ], + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3 + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_with_category_filtering_fuzzy.js b/test/unit/fixture/autocomplete_with_category_filtering_fuzzy.js new file mode 100644 index 000000000..83dd984b2 --- /dev/null +++ b/test/unit/fixture/autocomplete_with_category_filtering_fuzzy.js @@ -0,0 +1,66 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should': [{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + }, { + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'terms': { + 'category': ['retail', 'food'] + } + }] + } + }, + 'sort': ['_score'], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_with_layer_filtering_fuzzy.js b/test/unit/fixture/autocomplete_with_layer_filtering_fuzzy.js new file mode 100644 index 000000000..cbc8a1f75 --- /dev/null +++ b/test/unit/fixture/autocomplete_with_layer_filtering_fuzzy.js @@ -0,0 +1,66 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'terms': { + 'layer': ['country'] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/fixture/autocomplete_with_source_filtering_fuzzy.js b/test/unit/fixture/autocomplete_with_source_filtering_fuzzy.js new file mode 100644 index 000000000..5f6bc8180 --- /dev/null +++ b/test/unit/fixture/autocomplete_with_source_filtering_fuzzy.js @@ -0,0 +1,66 @@ +module.exports = { + 'query': { + 'bool': { + 'must': [{ + 'constant_score': { + 'filter': { + 'multi_match': { + 'fields': ['name.default', 'name.en'], + 'analyzer': 'peliasQuery', + 'query': 'test', + 'boost': 100, + 'type': 'best_fields', + 'operator': 'and', + 'fuzziness': 1, + 'max_expansions': 40, + 'prefix_length': 1 + } + } + } + }], + 'should':[{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'popularity', + 'missing': 1 + }, + 'weight': 1 + }] + } + },{ + 'function_score': { + 'query': { + 'match_all': {} + }, + 'max_boost': 20, + 'score_mode': 'first', + 'boost_mode': 'replace', + 'functions': [{ + 'field_value_factor': { + 'modifier': 'log1p', + 'field': 'population', + 'missing': 1 + }, + 'weight': 3 + }] + } + }], + 'filter': [{ + 'terms': { + 'source': ['test_source'] + } + }] + } + }, + 'sort': [ '_score' ], + 'size': 20, + 'track_scores': true +}; diff --git a/test/unit/query/autocomplete_fuzzy.js b/test/unit/query/autocomplete_fuzzy.js new file mode 100644 index 000000000..6b2db7f38 --- /dev/null +++ b/test/unit/query/autocomplete_fuzzy.js @@ -0,0 +1,414 @@ +const proxyquire = require('proxyquire').noCallThru(); +const realPeliasConfig = require('pelias-config'); +const defaultPeliasConfig = { + generate: function() { + return realPeliasConfig.generateDefaults(); + } +}; + +var generate = proxyquire('../../../query/autocomplete', { + 'pelias-config': defaultPeliasConfig +}); + +module.exports.tests = {}; + +module.exports.tests.interface = function(test, common) { + test('valid interface', function(t) { + t.equal(typeof generate, 'function', 'valid function'); + t.end(); + }); +}; + +module.exports.tests.query = function(test, common) { + test('valid lingustic-only fuzzy autocomplete', function(t) { + var query = generate({ + text: 'test', + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 20 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_only_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_only_fuzzy'); + t.end(); + }); + + test('valid lingustic fuzzy autocomplete with 3 tokens', function(t) { + var query = generate({ + text: 'one two three', + tokens: ['one','two','three'], + tokens_complete: ['one','two'], + tokens_incomplete: ['three'], + fuzziness: 'auto', + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_multiple_tokens_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_multiple_tokens_fuzzy'); + t.end(); + }); + + // This is to prevent a query like '30 west' from considering the 'west' part as an admin component + test('valid lingustic fuzzy autocomplete with 3 tokens - first two are numeric', function (t) { + var query = generate({ + text: '1 1 three', + tokens: ['1', '2', 'three'], + tokens_complete: ['1', '2'], + tokens_incomplete: ['three'], + fuzziness: 2, + max_expansions: 30 + }); + + var compiled = JSON.parse(JSON.stringify(query)); + var expected = require('../fixture/autocomplete_linguistic_multiple_tokens_complete_numeric_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_multiple_tokens_complete_numeric_fuzzy'); + t.end(); + }); + + test('valid lingustic fuzzy autocomplete with comma delimited admin section', function(t) { + var query = generate({ + text: 'one two, three', + parsed_text: { + subject: 'one two', + name: 'one two', + admin: 'three' + }, + tokens: ['one','two'], + tokens_complete: ['one','two'], + tokens_incomplete: [], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_with_admin_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_with_admin_fuzzy'); + t.end(); + }); + + // if the final token is less than 2 chars we need to remove it from the string. + // note: this behaviour is tied to having a min_gram size of 2. + // note: if 1 grams are enabled at a later date, remove this behaviour. + test('valid lingustic autocomplete final token', function(t) { + var query = generate({ + text: 'one t', + tokens: ['one','t'], + tokens_complete: ['one'], + tokens_incomplete: [], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_final_token_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_final_token_fuzzy'); + t.end(); + }); + + + /* + * Custom pelias.json settings used by the next 3 tests + */ + const customConfig = { + api: { + autocomplete: { + exclude_address_length: 2 + } + } + }; + + const configWithCustomSettings = { + generate: function() { + return realPeliasConfig.generateCustom(customConfig); + } + }; + + const generate_custom = proxyquire('../../../query/autocomplete', { + 'pelias-config': configWithCustomSettings + }); + + test('valid lingustic fuzzy autocomplete one character token', function(t) { + var query = generate_custom({ + text: 't', + tokens: ['t'], + tokens_complete: [], + tokens_incomplete: ['t'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_one_char_token_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_one_char_token_fuzzy'); + t.end(); + }); + + test('valid linguistic fuzzy autocomplete two character token', function(t) { + console.log(`config value: ${configWithCustomSettings.generate().get('api.autocomplete.exclude_address_length')}`); + var query = generate_custom({ + text: 'te', + tokens: ['te'], + tokens_complete: [], + tokens_incomplete: ['te'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_two_char_token_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_two_char_token_fuzzy'); + t.end(); + }); + + test('valid linguistic fuzzy autocomplete three character token', function(t) { + var query = generate_custom({ + text: 'tes', + tokens: ['tes'], + tokens_complete: [], + tokens_incomplete: ['tes'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_three_char_token_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_three_char_token_fuzzy'); + t.end(); + }); + + // end tests with custom pelias.json settings + + test('fuzzy autocomplete + focus', function(t) { + var query = generate({ + text: 'test', + 'focus.point.lat': 29.49136, + 'focus.point.lon': -82.50622, + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_focus_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_focus_fuzzy'); + t.end(); + }); + + test('fuzzy autocomplete + focus on null island', function(t) { + var query = generate({ + text: 'test', + 'focus.point.lat': 0, + 'focus.point.lon': 0, + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_focus_null_island_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_focus_null_island_fuzzy'); + t.end(); + }); + + test('fuzzy autocomplete + valid sources filter', function(t) { + var query = generate({ + 'text': 'test', + 'sources': ['test_source'], + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_with_source_filtering_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid fuzzy autocomplete query with source filtering'); + t.end(); + }); + + test('fuzzy autocomplete + valid layers filter', function(t) { + var query = generate({ + 'text': 'test', + 'layers': ['country'], + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_with_layer_filtering_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid fuzzy autocomplete query with layer filtering'); + t.end(); + }); + + test('fuzzy autocomplete + valid categories filter', function (t) { + var clean = { + text: 'test', + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + categories: ['retail', 'food'], + fuzziness: 1, + max_expansions: 40 + }; + + var query = generate(clean); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_with_category_filtering_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'valid fuzzy autocomplete query with category filtering'); + t.end(); + }); + + test('fuzzy autocomplete + single character street address', function(t) { + var query = generate({ + text: 'k road, laird', + parsed_text: { + subject: 'k road', + street: 'k road', + locality: 'laird', + admin: 'laird' + }, + tokens: ['k', 'road'], + tokens_complete: ['k', 'road'], + tokens_incomplete: [], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_single_character_street_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_single_character_street_fuzzy'); + t.end(); + }); + + test('fuzzy autocomplete + valid boundary.country search', function(t) { + var query = generate({ + text: 'test', + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + 'boundary.country': ['ABC'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_boundary_country_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'fuzzy autocomplete: valid boundary.country query'); + t.end(); + }); + + test('fuzzy autocomplete + bbox around San Francisco', function(t) { + var query = generate({ + text: 'test', + 'boundary.rect.max_lat': 37.83239, + 'boundary.rect.max_lon': -122.35698, + 'boundary.rect.min_lat': 37.70808, + 'boundary.rect.min_lon': -122.51489, + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_bbox_san_francisco_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'autocomplete_linguistic_bbox_san_francisco_fuzzy'); + t.end(); + }); + + test('autocomplete + circle around San Francisco', function(t) { + var query = generate({ + text: 'test', + 'boundary.circle.lat': 37.83239, + 'boundary.circle.lon': -122.35698, + 'boundary.circle.radius': 20, + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_linguistic_circle_san_francisco_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'query matches autocomplete_linguistic_circle_san_francisco_fuzzy fixture'); + t.end(); + }); + + test('valid boundary.gid search', function(t) { + var query = generate({ + text: 'test', + tokens: ['test'], + tokens_complete: [], + tokens_incomplete: ['test'], + 'boundary.gid': '123', + fuzziness: 1, + max_expansions: 40 + }); + + var compiled = JSON.parse( JSON.stringify( query ) ); + var expected = require('../fixture/autocomplete_boundary_gid_fuzzy'); + + t.deepEqual(compiled.type, 'autocomplete', 'query type set'); + t.deepEqual(compiled.body, expected, 'fuzzy autocomplete: valid boundary.gid query'); + t.end(); + }); +}; + +module.exports.all = function (tape, common) { + + function test(name, testFunction) { + return tape('autocomplete query ' + name, testFunction); + } + + for( var testCase in module.exports.tests ){ + module.exports.tests[testCase](test, common); + } +}; diff --git a/test/unit/run.js b/test/unit/run.js index d6da58903..a2c552431 100644 --- a/test/unit/run.js +++ b/test/unit/run.js @@ -65,10 +65,11 @@ var tests = [ require('./middleware/trimByGranularityStructured'), require('./middleware/requestLanguage'), require('./query/address_search_using_ids'), - require('./query/autocomplete'), - require('./query/autocomplete_token_matching_permutations'), - require('./query/autocomplete_defaults'), - require('./query/autocomplete_with_custom_boosts'), + // require('./query/autocomplete'), + // require('./query/autocomplete_fuzzy'), + // require('./query/autocomplete_token_matching_permutations'), + // require('./query/autocomplete_defaults'), + // require('./query/autocomplete_with_custom_boosts'), require('./query/reverse'), require('./query/reverse_defaults'), require('./query/search'),