Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parameterised fuzzy autocomplete #1510

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions query/autocomplete.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,15 @@ const toSingleField = require('./view/helper').toSingleField;
var views = {
custom_boosts: require('./view/boost_sources_and_layers'),
ngrams_strict: require('./view/ngrams_strict'),
ngrams_fuzzy: require('./view/ngrams_fuzzy'),
ngrams_last_token_only: require('./view/ngrams_last_token_only'),
ngrams_last_token_only_multi: require('./view/ngrams_last_token_only_multi'),
admin_multi_match_first: require('./view/admin_multi_match_first'),
admin_multi_match_last: require('./view/admin_multi_match_last'),
phrase_first_tokens_only: require('./view/phrase_first_tokens_only'),
boost_exact_matches: require('./view/boost_exact_matches'),
boost_exact_match_first_tokens_only: require('./view/boost_exact_match_first_tokens_only'),
boost_exact_match_last_tokens_only: require('./view/boost_exact_match_last_tokens_only'),
max_character_count_layer_filter: require('./view/max_character_count_layer_filter'),
focus_point_filter: require('./view/focus_point_distance_filter')
};
Expand Down Expand Up @@ -49,6 +52,8 @@ query.score( views.admin_multi_match_first( adminFields ), 'must');
query.score( views.admin_multi_match_last( adminFields ), 'must');

// scoring boost
query.score( views.boost_exact_match_first_tokens_only );
query.score( views.boost_exact_match_last_tokens_only );
query.score( peliasQuery.view.focus( peliasQuery.view.leaf.match_all ) );
query.score( peliasQuery.view.popularity( peliasQuery.view.leaf.match_all ) );
query.score( peliasQuery.view.population( peliasQuery.view.leaf.match_all ) );
Expand Down Expand Up @@ -175,6 +180,14 @@ function generateQuery( clean ){
textParser( clean, vs );
}

if (clean.fuzziness) {
vs.var('fuzzy:fuzziness', clean.fuzziness);

if (clean.max_expansions) {
vs.var('fuzzy:max_expansions', clean.max_expansions);
}
}

// set the 'add_name_to_multimatch' variable only in the case where one
// or more of the admin variables are set.
// the value 'enabled' is not relevant, it just needs to be any non-empty
Expand Down
8 changes: 7 additions & 1 deletion query/autocomplete_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
// generic multi_match config
'multi_match:type': 'cross_fields',
'multi_match:ngrams_strict:type': 'phrase',
'multi_match:ngrams_fuzzy:operator': 'and',
'multi_match:first_tokens_only:type': 'phrase',
'multi_match:boost_exact_matches:type': 'phrase',
'multi_match:first_tokens_only_fuzzy:operator': 'and',

// setting 'cutoff_frequency' will result in very common
// terms such as country not scoring at all
Expand Down Expand Up @@ -150,5 +152,9 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'custom:boosting:boost': 5, // multiply score by this number to increase the strength of the boost
'custom:boosting:max_boost': 50, // maximum boosting which can be applied (max_boost/boost = max_score)
'custom:boosting:score_mode': 'sum', // sum all function scores before multiplying the boost
'custom:boosting:boost_mode': 'multiply' // this mode is not relevant because there is no query section
'custom:boosting:boost_mode': 'multiply', // this mode is not relevant because there is no query section

'fuzzy:fuzziness': 0,
'fuzzy:max_expansions': 10,
'fuzzy:prefix_length': 1
});
17 changes: 17 additions & 0 deletions query/view/boost_exact_match_first_tokens_only.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
const peliasQuery = require('pelias-query');
const phrase_first_tokens_only = require('../view/phrase_first_tokens_only');

/**
This view is used to boost "exact" matches on first tokens when doing fuzzy queries.
**/
module.exports = function (vs) {
const fuzziness = vs.var('fuzzy:fuzziness').get();
if (!fuzziness) {
return null;
}

var vsCopy = new peliasQuery.Vars( vs.export() );
vsCopy.var('fuzzy:fuzziness', 0);

return phrase_first_tokens_only(vsCopy);
};
34 changes: 34 additions & 0 deletions query/view/boost_exact_match_last_tokens_only.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
const peliasQuery = require('pelias-query');
const ngrams_last_token_only = require('./ngrams_last_token_only');

/**
This view is used to boost "exact" matches on last tokens when doing fuzzy queries.
**/
module.exports = function (vs) {
const fuzziness = vs.var('fuzzy:fuzziness').get();
if (!fuzziness) {
return null;
}

var vsCopy = new peliasQuery.Vars( vs.export() );
vsCopy.var('fuzzy:fuzziness', 0);

// return the simple view for address queries
if( vsCopy.isset('input:street') ){ return ngrams_last_token_only(vsCopy); }

// get a copy of the *tokens_incomplete* tokens produced from the input:name
var tokens = vsCopy.var('input:name:tokens_incomplete').get();

// no valid tokens to use, fail now, don't render this view.
if (!tokens || tokens.length < 1) { return null; }

// return the simple view for queries with no complete tokens
var complete_tokens = vsCopy.var('input:name:tokens_complete').get();
if (!complete_tokens || complete_tokens.length < 1) { return ngrams_last_token_only(vsCopy); }

// return the simple view when every complete token is numeric
var all_complete_tokens_numeric = complete_tokens.every(token => !token.replace(/[0-9]/g, '').length);
if (all_complete_tokens_numeric) { return ngrams_last_token_only(vsCopy); }

return null;
};
6 changes: 3 additions & 3 deletions query/view/boost_exact_matches.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ module.exports = function( vs ){

// set 'input' to be only the fully completed characters
vs.var(`multi_match:${view_name}:input`).set( tokens.join(' ') );
vs.var(`multi_match:${view_name}:fields`).set(toMultiFields(searchDefaults['phrase:field'], vs.var('lang').get()));
vs.var(`multi_match:${view_name}:fields`).set(toMultiFields(vs.var('phrase:field').get(), vs.var('lang').get()));

vs.var(`multi_match:${view_name}:analyzer`).set(searchDefaults['phrase:analyzer']);
vs.var(`multi_match:${view_name}:analyzer`).set(vs.var('phrase:analyzer').get());
vs.var(`multi_match:${view_name}:boost`).set(vs.var('phrase:boost').get());
vs.var(`multi_match:${view_name}:slop`).set(vs.var('phrase:slop').get());

return peliasQuery.view.leaf.match_phrase(view_name)( vs );
return peliasQuery.view.leaf.multi_match(view_name) (vs);
};
20 changes: 20 additions & 0 deletions query/view/ngrams_fuzzy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const peliasQuery = require('pelias-query');
const toMultiFields = require('./helper').toMultiFields;

/**
Ngrams view with fuzziness
**/

module.exports = function (vs) {
vs.var('multi_match:ngrams_fuzzy:input', vs.var('input:name').get());
vs.var('multi_match:ngrams_fuzzy:fields', toMultiFields(vs.var('ngram:field').get(), vs.var('lang').get()));

vs.var('multi_match:ngrams_fuzzy:analyzer', vs.var('ngram:analyzer').get());
vs.var('multi_match:ngrams_fuzzy:boost', vs.var('ngram:boost').get());

vs.var('multi_match:ngrams_fuzzy:fuzziness', vs.var('fuzzy:fuzziness').get());
vs.var('multi_match:ngrams_fuzzy:max_expansions', vs.var('fuzzy:max_expansions').get());
vs.var('multi_match:ngrams_fuzzy:prefix_length', vs.var('fuzzy:prefix_length').get());

return peliasQuery.view.leaf.multi_match('ngrams_fuzzy')(vs);
};
7 changes: 5 additions & 2 deletions query/view/ngrams_last_token_only.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
var peliasQuery = require('pelias-query'),
ngrams_strict = require('./ngrams_strict');
ngrams_strict = require('./ngrams_strict'),
ngrams_fuzzy = require('./ngrams_fuzzy');
/**
Ngrams view which trims the 'input:name' and only uses the LAST TOKEN.

Expand All @@ -25,10 +26,12 @@ module.exports = function( vs ){
// set the 'name' variable in the copy to only the last token
vsCopy.var('input:name').set( tokens.join(' ') );

const fuzziness = vs.var('fuzzy:fuzziness').get();

// return the view rendered using the copy
return {
'constant_score': {
'filter': ngrams_strict( vsCopy )
'filter': fuzziness === 0 ? ngrams_strict( vsCopy ) : ngrams_fuzzy( vsCopy )
}
};
};
13 changes: 11 additions & 2 deletions query/view/phrase_first_tokens_only.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ const toMultiFields = require('./helper').toMultiFields;
**/

module.exports = function( vs ){
const view_name = 'first_tokens_only';
const fuzziness = vs.var('fuzzy:fuzziness').get();

const view_name = fuzziness ? 'first_tokens_only_fuzzy' : 'first_tokens_only';
// get a copy of the *complete* tokens produced from the input:name
const tokens = vs.var('input:name:tokens_complete').get();

Expand All @@ -22,7 +24,14 @@ module.exports = function( vs ){

vs.var(`multi_match:${view_name}:analyzer`).set(vs.var('phrase:analyzer').get());
vs.var(`multi_match:${view_name}:boost`).set(vs.var('phrase:boost').get());
vs.var(`multi_match:${view_name}:slop`).set(vs.var('phrase:slop').get());

if (fuzziness === 0) {
vs.var(`multi_match:${view_name}:slop`).set(vs.var('phrase:slop').get());
} else {
vs.var(`multi_match:${view_name}:fuzziness`).set(fuzziness);
vs.var(`multi_match:${view_name}:max_expansions`).set(vs.var('fuzzy:max_expansions').get());
vs.var(`multi_match:${view_name}:prefix_length`).set(vs.var('fuzzy:prefix_length').get());
}

return peliasQuery.view.leaf.multi_match(view_name)( vs );
};
46 changes: 46 additions & 0 deletions sanitizer/_fuzziness.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
const _ = require('lodash');

function _sanitize( raw, clean ){
const IS_NUMERIC_REGEXP = /^\d+$/;

// error & warning messages
var messages = { errors: [], warnings: [] };

if (_.isUndefined(raw)) {
return messages;
}

if (_.has(raw, 'fuzziness')) {
if (raw.fuzziness === 'AUTO') {
clean.fuzziness = raw.fuzziness;
} else if (IS_NUMERIC_REGEXP.test(raw.fuzziness) && parseInt(raw.fuzziness) > 0 && parseInt(raw.fuzziness) <= 2) {
clean.fuzziness = parseInt(raw.fuzziness);
} else {
messages.errors.push('invalid value for fuzziness; valid values are 1, 2 and AUTO');
}

if (_.has(raw, 'max_expansions')) {
if (IS_NUMERIC_REGEXP.test(raw.max_expansions) &&
parseInt(raw.max_expansions) >= 0 &&
parseInt(raw.max_expansions) <= 50) {

clean.max_expansions = parseInt(raw.max_expansions);
} else {
messages.errors.push('invalid value for max_expansions; valid values are between 0 and 50');
}
}
}

return messages;
}

function _expected() {
return [
{ name: 'fuzziness' },
{ name: 'max_expansions' }];
}

module.exports = () => ({
sanitize: _sanitize,
expected: _expected
});
5 changes: 3 additions & 2 deletions sanitizer/autocomplete.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ module.exports.middleware = (_api_pelias_config) => {
boundary_country: require('../sanitizer/_boundary_country')(),
categories: require('../sanitizer/_categories')(),
request_language: require('../sanitizer/_request_language')(),
boundary_gid: require('../sanitizer/_boundary_gid')()
};
boundary_gid: require('../sanitizer/_boundary_gid')(),
fuzziness: require('../sanitizer/_fuzziness')()
};

return ( req, res, next ) => {
sanitizeAll.runAllChecks(req, sanitizers);
Expand Down
69 changes: 69 additions & 0 deletions test/unit/fixture/autocomplete_boundary_country_fuzzy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
module.exports = {
'query': {
'bool': {
'must': [{
'constant_score': {
'filter': {
'multi_match': {
'fields': ['name.default', 'name.en'],
'analyzer': 'peliasQuery',
'query': 'test',
'boost': 100,
'type': 'best_fields',
'operator': 'and',
'fuzziness': 1,
'max_expansions': 40,
'prefix_length': 1
}
}
}
}],
'should':[{
'function_score': {
'query': {
'match_all': {}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'popularity',
'missing': 1
},
'weight': 1
}]
}
},{
'function_score': {
'query': {
'match_all': {}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'population',
'missing': 1
},
'weight': 3
}]
}
}],
'filter': [{
'match': {
'parent.country_a.ngram': {
'analyzer': 'standard',
'query': 'ABC'
}
}
}]
}
},
'sort': [ '_score' ],
'size': 20,
'track_scores': true
};
67 changes: 67 additions & 0 deletions test/unit/fixture/autocomplete_boundary_gid_fuzzy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
module.exports = {
'query': {
'bool': {
'must': [{
'constant_score': {
'filter': {
'multi_match': {
'fields': ['name.default', 'name.en'],
'analyzer': 'peliasQuery',
'query': 'test',
'boost': 100,
'type': 'best_fields',
'operator': 'and',
'fuzziness': 1,
'max_expansions': 40,
'prefix_length': 1
}
}
}
}],
'should':[{
'function_score': {
'query': {
'match_all': {}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'popularity',
'missing': 1
},
'weight': 1
}]
}
},{
'function_score': {
'query': {
'match_all': {}
},
'max_boost': 20,
'score_mode': 'first',
'boost_mode': 'replace',
'functions': [{
'field_value_factor': {
'modifier': 'log1p',
'field': 'population',
'missing': 1
},
'weight': 3
}]
}
}],
'filter': [{
'multi_match': {
'fields': ['parent.*_id'],
'query': '123'
}
}]
}
},
'sort': [ '_score' ],
'size': 20,
'track_scores': true
};
Loading