From cba6b62b506fe40662033ebcd75a09bbfd656022 Mon Sep 17 00:00:00 2001 From: Joxit Date: Wed, 4 Dec 2019 17:24:10 +0100 Subject: [PATCH 1/3] fix(peliasQueryFullToken): remove unused analyzer --- integration/address_matching.js | 2 +- integration/analyzer_peliasQueryFullToken.js | 250 ------------------ .../autocomplete_abbreviated_street_names.js | 8 +- ...ocomplete_directional_synonym_expansion.js | 16 +- .../autocomplete_street_synonym_expansion.js | 16 +- integration/run.js | 1 - settings.js | 17 -- test/fixtures/expected.json | 20 -- test/settings.js | 29 -- 9 files changed, 21 insertions(+), 338 deletions(-) delete mode 100644 integration/analyzer_peliasQueryFullToken.js diff --git a/integration/address_matching.js b/integration/address_matching.js index 8499159c..74accd66 100644 --- a/integration/address_matching.js +++ b/integration/address_matching.js @@ -273,7 +273,7 @@ module.exports.tests.venue_vs_address = function(test, common){ { 'match_phrase': { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'boost': 1, 'slop': 3, 'query': 'union square' diff --git a/integration/analyzer_peliasQueryFullToken.js b/integration/analyzer_peliasQueryFullToken.js deleted file mode 100644 index ce82dd47..00000000 --- a/integration/analyzer_peliasQueryFullToken.js +++ /dev/null @@ -1,250 +0,0 @@ -// validate analyzer is behaving as expected - -const elastictest = require('elastictest'); -const schema = require('../schema'); -const punctuation = require('../punctuation'); -const config = require('pelias-config').generate(); - -module.exports.tests = {}; - -module.exports.tests.analyze = function(test, common){ - test( 'analyze', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'lowercase', 'F', ['f']); - assertAnalysis( 'asciifolding', 'á', ['a']); - assertAnalysis( 'asciifolding', 'ß', ['ss']); - assertAnalysis( 'asciifolding', 'æ', ['ae']); - assertAnalysis( 'asciifolding', 'ł', ['l']); - assertAnalysis( 'asciifolding', 'ɰ', ['m']); - assertAnalysis( 'trim', ' f ', ['f'] ); - assertAnalysis('ampersand', 'a and b', ['0:a', '1:and', '1:&', '2:b']); - assertAnalysis('ampersand', 'a & b', ['0:a', '1:&', '1:and', '1:und', '2:b']); - assertAnalysis('ampersand', 'a and & and b', [ - '0:a', - '1:and', '1:&', - '2:&', '2:and', '2:und', - '3:and', '3:&', - '4:b' - ]); - assertAnalysis('ampersand', 'land', ['land'] ); // should not replace inside tokens - - assertAnalysis('keyword_street_suffix', 'foo Street', ['0:foo', '1:street', '1:st'] ); - assertAnalysis('keyword_street_suffix', 'foo Road', ['0:foo', '1:road', '1:rd'] ); - assertAnalysis('keyword_street_suffix', 'foo Crescent', ['0:foo', '1:crescent', '1:cres'] ); - assertAnalysis('keyword_compass', 'north foo', ['0:north', '0:n', '1:foo'] ); - assertAnalysis('keyword_compass', 'SouthWest foo', ['0:southwest', '0:sw', '1:foo'] ); - assertAnalysis('keyword_compass', 'foo SouthWest', ['0:foo', '1:southwest', '1:sw'] ); - - assertAnalysis( 'peliasQueryFullTokenFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcdefghij'] ); - assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] ); - assertAnalysis( 'unique', '1 1 1', ['1','1','1'] ); - assertAnalysis( 'notnull', ' / / ', [] ); - - assertAnalysis( 'no kstem', 'mcdonalds', ['mcdonalds'] ); - assertAnalysis( 'no kstem', 'McDonald\'s', ['mcdonalds'] ); - assertAnalysis( 'no kstem', 'peoples', ['peoples'] ); - - // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), ['0:&', '0:and', '0:und'] ); - - // ensure that very large tokens are created - assertAnalysis( 'largeGrams', 'grolmanstrasse', [ 'grolmanstrasse' ]); - - suite.run( t.end ); - }); -}; - -module.exports.tests.address_suffix_expansions = function(test, common){ - test( 'address suffix expansions', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis('safe expansions', 'aly', ['0:aly', '0:alley' ]); - - assertAnalysis('safe expansions', 'xing', ['0:xing', '0:crossing' ]); - - assertAnalysis('safe expansions', 'rd', ['0:rd', '0:road' ]); - - assertAnalysis('safe expansion', 'ct st', ['0:ct', '0:court', '1:st', '1:street' ]); - - suite.run( t.end ); - }); -}; - -// stop words should be disabled so that the entire token is used -module.exports.tests.stop_words = function(test, common){ - test( 'stop words', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'street suffix', 'AB street', [ '0:ab', '1:street', '1:st' ]); - - assertAnalysis( 'street suffix (abbreviation)', 'AB st', [ '0:ab', '1:st', '1:street' ]); - - suite.run( t.end ); - }); -}; - -module.exports.tests.functional = function(test, common){ - test( 'functional', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'country', 'Trinidad and Tobago', [ - '0:trinidad', '1:and', '1:&', '2:tobago' - ]); - - assertAnalysis( 'place', 'Toys "R" Us!', [ - '0:toys', '1:r', '2:us' - ]); - - assertAnalysis( 'address', '101 mapzen place', [ - '0:101', '1:mapzen', '2:place', '2:pl' - ]); - - suite.run( t.end ); - }); -}; - -module.exports.tests.tokenizer = function(test, common){ - test( 'tokenizer', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - const expected = ['0:bedell', '1:street', '1:st', '2:133', '3:avenue', '3:ave', '3:av']; - - // specify 2 streets with a delimeter - assertAnalysis( 'forward slash', 'Bedell Street/133rd Avenue', expected); - assertAnalysis( 'forward slash', 'Bedell Street /133rd Avenue', expected); - assertAnalysis( 'forward slash', 'Bedell Street/ 133rd Avenue', expected); - assertAnalysis( 'back slash', 'Bedell Street\\133rd Avenue', expected); - assertAnalysis( 'back slash', 'Bedell Street \\133rd Avenue', expected); - assertAnalysis( 'back slash', 'Bedell Street\\ 133rd Avenue', expected); - assertAnalysis( 'comma', 'Bedell Street,133rd Avenue', expected); - assertAnalysis( 'comma', 'Bedell Street ,133rd Avenue', expected); - assertAnalysis( 'comma', 'Bedell Street, 133rd Avenue', expected); - - suite.run( t.end ); - }); -}; - -// test the minimum amount of slop required to retrieve address documents -module.exports.tests.slop = function(test, common){ - test( 'slop', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - // index a document - suite.action( function( done ){ - suite.client.index({ - index: suite.props.index, - type: config.schema.typeName, - id: '1', - body: { name: { default: '52 Görlitzer Straße' } } - }, done); - }); - - // search using 'peliasQueryFullToken' - // in this case we require a slop of 3 to return the same - // record with the street number and street name reversed. - // (as is common in European countries, such as Germany). - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { match_phrase: { - 'name.default': { - 'analyzer': 'peliasQueryFullToken', - 'query': 'Görlitzer Straße 52', - 'slop': 3, - } - }}} - }, function( err, res ){ - t.equal( err, undefined ); - t.equal( res.hits.total, 1, 'document found' ); - done(); - }); - }); - - suite.run( t.end ); - }); -}; - -module.exports.tests.address = function(test, common){ - test( 'address', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'address', '101 mapzen place', [ - '0:101', '1:mapzen', '2:place', '2:pl' - ]); - - assertAnalysis( 'address', '30 w 26 st', [ - '0:30', '1:w', '1:west', '2:26', '3:st', '3:street' - ]); - - assertAnalysis( 'address', '4B 921 83 st', [ - '0:4b', '1:921', '2:83', '3:st', '3:street' - ]); - - suite.run( t.end ); - }); -}; - -// @see: https://github.com/pelias/api/issues/600 -module.exports.tests.unicode = function(test, common){ - test( 'normalization', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryFullToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - var latin_large_letter_e_with_acute = String.fromCodePoint(0x00C9); - var latin_small_letter_e_with_acute = String.fromCodePoint(0x00E9); - var combining_acute_accent = String.fromCodePoint(0x0301); - var latin_large_letter_e = String.fromCodePoint(0x0045); - var latin_small_letter_e = String.fromCodePoint(0x0065); - - // Chambéry (both forms appear the same) - var composed = "Chamb" + latin_small_letter_e_with_acute + "ry"; - var decomposed = "Chamb" + combining_acute_accent + latin_small_letter_e + "ry" - - assertAnalysis( 'composed', composed, ['chambery'] ); - assertAnalysis( 'decomposed', decomposed, ['chambery'] ); - - // Één (both forms appear the same) - var composed = latin_large_letter_e_with_acute + latin_small_letter_e_with_acute + "n"; - var decomposed = combining_acute_accent + latin_large_letter_e + combining_acute_accent + latin_small_letter_e + "n" - - assertAnalysis( 'composed', composed, ['een'] ); - assertAnalysis( 'decomposed', decomposed, ['een'] ); - - suite.run( t.end ); - }); -}; - -module.exports.all = function (tape, common) { - - function test(name, testFunction) { - return tape('peliasQueryFullToken: ' + name, testFunction); - } - - for( var testCase in module.exports.tests ){ - module.exports.tests[testCase](test, common); - } -}; diff --git a/integration/autocomplete_abbreviated_street_names.js b/integration/autocomplete_abbreviated_street_names.js index 4e7cfe23..df49c78d 100644 --- a/integration/autocomplete_abbreviated_street_names.js +++ b/integration/autocomplete_abbreviated_street_names.js @@ -45,14 +45,14 @@ module.exports.tests.index_expanded_form_search_contracted = function(test, comm }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'Grolmanstr.' } }}} @@ -107,7 +107,7 @@ module.exports.tests.index_expanded_form_search_contracted = function(test, comm // }); // }); -// // search using 'peliasQueryFullToken' +// // search using 'peliasQuery' // // @note: this case is currently not supported. // // Please index your data in the expanded form. @@ -117,7 +117,7 @@ module.exports.tests.index_expanded_form_search_contracted = function(test, comm // type: config.schema.typeName, // body: { query: { match: { // 'name.default': { -// 'analyzer': 'peliasQueryFullToken', +// 'analyzer': 'peliasQuery', // 'query': 'Grolmanstraße' // } // }}} diff --git a/integration/autocomplete_directional_synonym_expansion.js b/integration/autocomplete_directional_synonym_expansion.js index b6a5f552..eef95079 100644 --- a/integration/autocomplete_directional_synonym_expansion.js +++ b/integration/autocomplete_directional_synonym_expansion.js @@ -45,14 +45,14 @@ module.exports.tests.index_and_retrieve_expanded_form = function(test, common){ }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'north' } }}} @@ -102,14 +102,14 @@ module.exports.tests.index_and_retrieve_contracted_form = function(test, common) }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'n' } }}} @@ -159,14 +159,14 @@ module.exports.tests.index_and_retrieve_mixed_form_1 = function(test, common){ }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'north' } }}} @@ -216,14 +216,14 @@ module.exports.tests.index_and_retrieve_mixed_form_2 = function(test, common){ }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'n' } }}} diff --git a/integration/autocomplete_street_synonym_expansion.js b/integration/autocomplete_street_synonym_expansion.js index 88ef0de3..20832b5d 100644 --- a/integration/autocomplete_street_synonym_expansion.js +++ b/integration/autocomplete_street_synonym_expansion.js @@ -45,14 +45,14 @@ module.exports.tests.index_and_retrieve_expanded_form = function(test, common){ }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'center' } }}} @@ -102,14 +102,14 @@ module.exports.tests.index_and_retrieve_contracted_form = function(test, common) }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'ctr' } }}} @@ -159,14 +159,14 @@ module.exports.tests.index_and_retrieve_mixed_form_1 = function(test, common){ }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'center' } }}} @@ -216,14 +216,14 @@ module.exports.tests.index_and_retrieve_mixed_form_2 = function(test, common){ }); }); - // search using 'peliasQueryFullToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryFullToken', + 'analyzer': 'peliasQuery', 'query': 'ctr' } }}} diff --git a/integration/run.js b/integration/run.js index 22e22876..ec8d227e 100644 --- a/integration/run.js +++ b/integration/run.js @@ -72,7 +72,6 @@ var tests = [ require('./analyzer_peliasIndexOneEdgeGram.js'), require('./analyzer_peliasQuery.js'), require('./analyzer_peliasQueryPartialToken.js'), - require('./analyzer_peliasQueryFullToken.js'), require('./analyzer_peliasPhrase.js'), require('./analyzer_peliasAdmin.js'), require('./analyzer_peliasHousenumber.js'), diff --git a/settings.js b/settings.js index 7de859df..417c2a68 100644 --- a/settings.js +++ b/settings.js @@ -100,23 +100,6 @@ function generate(){ "notnull" ] }, - "peliasQueryFullToken" : { - "type": "custom", - "tokenizer" : "peliasNameTokenizer", - "char_filter" : ["punctuation", "nfkc_normalizer"], - "filter": [ - "lowercase", - "icu_folding", - "trim", - "remove_ordinals", - "street_suffix", - "directionals", - "ampersand", - "removeAllZeroNumericPrefix", - "unique_only_same_position", - "notnull" - ] - }, "peliasPhrase": { "type": "custom", "tokenizer":"peliasNameTokenizer", diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 2788a92c..ec6d062e 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -89,26 +89,6 @@ "notnull" ] }, - "peliasQueryFullToken": { - "type": "custom", - "tokenizer": "peliasNameTokenizer", - "char_filter": [ - "punctuation", - "nfkc_normalizer" - ], - "filter": [ - "lowercase", - "icu_folding", - "trim", - "remove_ordinals", - "street_suffix", - "directionals", - "ampersand", - "removeAllZeroNumericPrefix", - "unique_only_same_position", - "notnull" - ] - }, "peliasPhrase": { "type": "custom", "tokenizer": "peliasNameTokenizer", diff --git a/test/settings.js b/test/settings.js index 1956e27d..77cbde00 100644 --- a/test/settings.js +++ b/test/settings.js @@ -120,35 +120,6 @@ module.exports.tests.peliasQueryAnalyzer = function (test, common) { }); }; -module.exports.tests.peliasQueryFullTokenAnalyzer = function (test, common) { - test('has peliasQueryFullToken analyzer', function (t) { - var s = settings(); - t.equal(typeof s.analysis.analyzer.peliasQueryFullToken, 'object', 'there is a peliasQueryFullToken analyzer'); - var analyzer = s.analysis.analyzer.peliasQueryFullToken; - t.equal(analyzer.type, 'custom', 'custom analyzer'); - t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified'); - t.deepEqual(analyzer.char_filter, ["punctuation", "nfkc_normalizer"], 'character filters specified'); - t.true(Array.isArray(analyzer.filter), 'filters specified'); - t.end(); - }); - test('peliasQueryFullToken token filters', function (t) { - var analyzer = settings().analysis.analyzer.peliasQueryFullToken; - t.deepEqual(analyzer.filter, [ - "lowercase", - "icu_folding", - "trim", - "remove_ordinals", - "street_suffix", - "directionals", - "ampersand", - "removeAllZeroNumericPrefix", - "unique_only_same_position", - "notnull" - ]); - t.end(); - }); -}; - module.exports.tests.peliasQueryPartialTokenAnalyzer = function (test, common) { test('has peliasQueryPartialToken analyzer', function (t) { var s = settings(); From 1f94780465fc9a14e23ddbaf0abe2d04268387b8 Mon Sep 17 00:00:00 2001 From: Joxit Date: Wed, 11 Dec 2019 22:28:18 +0100 Subject: [PATCH 2/3] fix(peliasQueryPartialToken): remove unused analyzer --- .../analyzer_peliasQueryPartialToken.js | 179 ------------------ .../autocomplete_abbreviated_street_names.js | 18 -- ...ocomplete_directional_synonym_expansion.js | 44 +---- .../autocomplete_street_synonym_expansion.js | 44 +---- integration/run.js | 1 - settings.js | 16 -- test/fixtures/expected.json | 19 -- test/settings.js | 28 --- 8 files changed, 8 insertions(+), 341 deletions(-) delete mode 100644 integration/analyzer_peliasQueryPartialToken.js diff --git a/integration/analyzer_peliasQueryPartialToken.js b/integration/analyzer_peliasQueryPartialToken.js deleted file mode 100644 index 480982ec..00000000 --- a/integration/analyzer_peliasQueryPartialToken.js +++ /dev/null @@ -1,179 +0,0 @@ -// validate analyzer is behaving as expected - -var tape = require('tape'), - elastictest = require('elastictest'), - schema = require('../schema'), - punctuation = require('../punctuation'); - -module.exports.tests = {}; - -module.exports.tests.analyze = function(test, common){ - test( 'analyze', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryPartialToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'lowercase', 'F', ['f']); - assertAnalysis( 'asciifolding', 'é', ['e']); - assertAnalysis( 'asciifolding', 'ß', ['ss']); - assertAnalysis( 'asciifolding', 'æ', ['ae']); - assertAnalysis( 'asciifolding', 'ł', ['l']); - assertAnalysis( 'asciifolding', 'ɰ', ['m']); - assertAnalysis( 'trim', ' f ', ['f'] ); - assertAnalysis('ampersand', 'a and b', [ - '0:a', - '1:and', '1:&', - '2:b' - ]); - assertAnalysis('ampersand', 'a & b', [ - '0:a', - '1:&', '1:and', '1:und', - '2:b' - ]); - assertAnalysis('ampersand', 'a and & and b', [ - '0:a', - '1:and', '1:&', - '2:&', '2:and', '2:und', - '3:and', '3:&', - '4:b' - ]); - assertAnalysis( 'ampersand', 'land', ['land'] ); // should not replace inside tokens - - // partial_token_address_suffix_expansion - assertAnalysis( 'partial_token_address_suffix_expansion', 'rd', ['road'] ); - assertAnalysis( 'partial_token_address_suffix_expansion', 'ctr', ['center'] ); - - assertAnalysis( 'peliasQueryPartialTokenFilter', '1 a ab abc abcdefghij', ['1','a','ab','abc','abcdefghij'] ); - assertAnalysis( 'removeAllZeroNumericPrefix', '00001', ['1'] ); - assertAnalysis( 'unique', '1 1 1', ['1','1','1'] ); - assertAnalysis( 'notnull', ' / / ', [] ); - - assertAnalysis( 'no kstem', 'mcdonalds', ['mcdonalds'] ); - assertAnalysis( 'no kstem', 'McDonald\'s', ['mcdonalds'] ); - assertAnalysis( 'no kstem', 'peoples', ['peoples'] ); - - // remove punctuation (handled by the char_filter) - assertAnalysis( 'punctuation', punctuation.all.join(''), ['0:&', '0:and', '0:und'] ); - - // ensure that very large grams are created - assertAnalysis( 'largeGrams', 'grolmanstrasse', ['grolmanstrasse']); - - suite.run( t.end ); - }); -}; - -// address suffix expansions should only performed in a way that is -// safe for 'partial tokens'. -module.exports.tests.address_suffix_expansions = function(test, common){ - test( 'address suffix expansions', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryPartialToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'safe expansions', 'aly', [ 'alley' ]); - assertAnalysis( 'safe expansions', 'xing', [ 'crossing' ]); - assertAnalysis( 'safe expansions', 'rd', [ 'road' ]); - - assertAnalysis( 'unsafe expansion', 'ct st', [ 'ct', 'st' ]); - - suite.run( t.end ); - }); -}; - -// stop words should be disabled so that the entire street prefix is indexed as ngrams -module.exports.tests.stop_words = function(test, common){ - test( 'stop words', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryPartialToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'street suffix', 'AB street', [ 'ab', 'street' ]); - assertAnalysis( 'street suffix (abbreviation)', 'AB st', [ 'ab', 'st' ]); - - suite.run( t.end ); - }); -}; - -module.exports.tests.functional = function(test, common){ - test( 'functional', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryPartialToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'country', 'Trinidad and Tobago', [ '0:trinidad', '1:and', '1:&', '2:tobago' ]); - assertAnalysis( 'place', 'Toys "R" Us!', [ 'toys', 'r', 'us' ]); - assertAnalysis( 'address', '101 mapzen place', [ '101', 'mapzen', 'place' ]); - - suite.run( t.end ); - }); -}; - -module.exports.tests.address = function(test, common){ - test( 'address', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryPartialToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - assertAnalysis( 'address', '101 mapzen place', [ - '101', 'mapzen', 'place' - ]); - - assertAnalysis( 'address', '30 w 26 st', [ - '30', 'w', '26', 'st' - ]); - - assertAnalysis( 'address', '4B 921 83 st', [ - '4b', '921', '83', 'st' - ]); - - suite.run( t.end ); - }); -}; - -// @see: https://github.com/pelias/api/issues/600 -module.exports.tests.unicode = function(test, common){ - test( 'normalization', function(t){ - - var suite = new elastictest.Suite( common.clientOpts, { schema: schema } ); - var assertAnalysis = common.analyze.bind( null, suite, t, 'peliasQueryPartialToken' ); - suite.action( function( done ){ setTimeout( done, 500 ); }); // wait for es to bring some shards up - - var latin_large_letter_e_with_acute = String.fromCodePoint(0x00C9); - var latin_small_letter_e_with_acute = String.fromCodePoint(0x00E9); - var combining_acute_accent = String.fromCodePoint(0x0301); - var latin_large_letter_e = String.fromCodePoint(0x0045); - var latin_small_letter_e = String.fromCodePoint(0x0065); - - // Chambéry (both forms appear the same) - var composed = "Chamb" + latin_small_letter_e_with_acute + "ry"; - var decomposed = "Chamb" + combining_acute_accent + latin_small_letter_e + "ry" - - assertAnalysis( 'composed', composed, ['chambery'] ); - assertAnalysis( 'decomposed', decomposed, ['chambery'] ); - - // Één (both forms appear the same) - var composed = latin_large_letter_e_with_acute + latin_small_letter_e_with_acute + "n"; - var decomposed = combining_acute_accent + latin_large_letter_e + combining_acute_accent + latin_small_letter_e + "n" - - assertAnalysis( 'composed', composed, ['een'] ); - assertAnalysis( 'decomposed', decomposed, ['een'] ); - - suite.run( t.end ); - }); -}; - -module.exports.all = function (tape, common) { - - function test(name, testFunction) { - return tape('peliasQueryPartialToken: ' + name, testFunction); - } - - for( var testCase in module.exports.tests ){ - module.exports.tests[testCase](test, common); - } -}; diff --git a/integration/autocomplete_abbreviated_street_names.js b/integration/autocomplete_abbreviated_street_names.js index df49c78d..3a7bcde5 100644 --- a/integration/autocomplete_abbreviated_street_names.js +++ b/integration/autocomplete_abbreviated_street_names.js @@ -27,24 +27,6 @@ module.exports.tests.index_expanded_form_search_contracted = function(test, comm }, done); }); - // search using 'peliasQueryPartialToken' - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { match: { - 'name.default': { - 'analyzer': 'peliasQueryPartialToken', - 'query': 'Grolmanstr.' - } - }}} - }, function( err, res ){ - t.equal( err, undefined ); - t.equal( res.hits.total, 1, 'document found' ); - done(); - }); - }); - // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ diff --git a/integration/autocomplete_directional_synonym_expansion.js b/integration/autocomplete_directional_synonym_expansion.js index eef95079..37b0d663 100644 --- a/integration/autocomplete_directional_synonym_expansion.js +++ b/integration/autocomplete_directional_synonym_expansion.js @@ -27,14 +27,14 @@ module.exports.tests.index_and_retrieve_expanded_form = function(test, common){ }, done); }); - // search using 'peliasQueryPartialToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryPartialToken', + 'analyzer': 'peliasQuery', 'query': 'nor' } }}} @@ -84,24 +84,6 @@ module.exports.tests.index_and_retrieve_contracted_form = function(test, common) }, done); }); - // search using 'peliasQueryPartialToken' - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { match: { - 'name.default': { - 'analyzer': 'peliasQueryPartialToken', - 'query': 'n' - } - }}} - }, function( err, res ){ - t.equal( err, undefined ); - t.equal( res.hits.total, 1, 'document found' ); - done(); - }); - }); - // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ @@ -141,14 +123,14 @@ module.exports.tests.index_and_retrieve_mixed_form_1 = function(test, common){ }, done); }); - // search using 'peliasQueryPartialToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryPartialToken', + 'analyzer': 'peliasQuery', 'query': 'nor' } }}} @@ -198,24 +180,6 @@ module.exports.tests.index_and_retrieve_mixed_form_2 = function(test, common){ }, done); }); - // search using 'peliasQueryPartialToken' - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { match: { - 'name.default': { - 'analyzer': 'peliasQueryPartialToken', - 'query': 'n' - } - }}} - }, function( err, res ){ - t.equal( err, undefined ); - t.equal( res.hits.total, 1, 'document found' ); - done(); - }); - }); - // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ diff --git a/integration/autocomplete_street_synonym_expansion.js b/integration/autocomplete_street_synonym_expansion.js index 20832b5d..9b6bff4f 100644 --- a/integration/autocomplete_street_synonym_expansion.js +++ b/integration/autocomplete_street_synonym_expansion.js @@ -27,14 +27,14 @@ module.exports.tests.index_and_retrieve_expanded_form = function(test, common){ }, done); }); - // search using 'peliasQueryPartialToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryPartialToken', + 'analyzer': 'peliasQuery', 'query': 'cent' } }}} @@ -84,24 +84,6 @@ module.exports.tests.index_and_retrieve_contracted_form = function(test, common) }, done); }); - // search using 'peliasQueryPartialToken' - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { match: { - 'name.default': { - 'analyzer': 'peliasQueryPartialToken', - 'query': 'ctr' - } - }}} - }, function( err, res ){ - t.equal( err, undefined ); - t.equal( res.hits.total, 1, 'document found' ); - done(); - }); - }); - // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ @@ -141,14 +123,14 @@ module.exports.tests.index_and_retrieve_mixed_form_1 = function(test, common){ }, done); }); - // search using 'peliasQueryPartialToken' + // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ index: suite.props.index, type: config.schema.typeName, body: { query: { match: { 'name.default': { - 'analyzer': 'peliasQueryPartialToken', + 'analyzer': 'peliasQuery', 'query': 'cent' } }}} @@ -198,24 +180,6 @@ module.exports.tests.index_and_retrieve_mixed_form_2 = function(test, common){ }, done); }); - // search using 'peliasQueryPartialToken' - suite.assert( function( done ){ - suite.client.search({ - index: suite.props.index, - type: config.schema.typeName, - body: { query: { match: { - 'name.default': { - 'analyzer': 'peliasQueryPartialToken', - 'query': 'ctr' - } - }}} - }, function( err, res ){ - t.equal( err, undefined ); - t.equal( res.hits.total, 1, 'document found' ); - done(); - }); - }); - // search using 'peliasQuery' suite.assert( function( done ){ suite.client.search({ diff --git a/integration/run.js b/integration/run.js index ec8d227e..b5222499 100644 --- a/integration/run.js +++ b/integration/run.js @@ -71,7 +71,6 @@ var tests = [ require('./dynamic_templates.js'), require('./analyzer_peliasIndexOneEdgeGram.js'), require('./analyzer_peliasQuery.js'), - require('./analyzer_peliasQueryPartialToken.js'), require('./analyzer_peliasPhrase.js'), require('./analyzer_peliasAdmin.js'), require('./analyzer_peliasHousenumber.js'), diff --git a/settings.js b/settings.js index 417c2a68..f55c41b9 100644 --- a/settings.js +++ b/settings.js @@ -84,22 +84,6 @@ function generate(){ "notnull" ] }, - "peliasQueryPartialToken" : { - "type": "custom", - "tokenizer" : "peliasNameTokenizer", - "char_filter" : ["punctuation", "nfkc_normalizer"], - "filter": [ - "lowercase", - "icu_folding", - "trim", - "partial_token_address_suffix_expansion", - "ampersand", - "remove_ordinals", - "removeAllZeroNumericPrefix", - "unique_only_same_position", - "notnull" - ] - }, "peliasPhrase": { "type": "custom", "tokenizer":"peliasNameTokenizer", diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index ec6d062e..48d0c5ec 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -70,25 +70,6 @@ "notnull" ] }, - "peliasQueryPartialToken": { - "type": "custom", - "tokenizer": "peliasNameTokenizer", - "char_filter": [ - "punctuation", - "nfkc_normalizer" - ], - "filter": [ - "lowercase", - "icu_folding", - "trim", - "partial_token_address_suffix_expansion", - "ampersand", - "remove_ordinals", - "removeAllZeroNumericPrefix", - "unique_only_same_position", - "notnull" - ] - }, "peliasPhrase": { "type": "custom", "tokenizer": "peliasNameTokenizer", diff --git a/test/settings.js b/test/settings.js index 77cbde00..360370af 100644 --- a/test/settings.js +++ b/test/settings.js @@ -120,34 +120,6 @@ module.exports.tests.peliasQueryAnalyzer = function (test, common) { }); }; -module.exports.tests.peliasQueryPartialTokenAnalyzer = function (test, common) { - test('has peliasQueryPartialToken analyzer', function (t) { - var s = settings(); - t.equal(typeof s.analysis.analyzer.peliasQueryPartialToken, 'object', 'there is a peliasQueryPartialToken analyzer'); - var analyzer = s.analysis.analyzer.peliasQueryPartialToken; - t.equal(analyzer.type, 'custom', 'custom analyzer'); - t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified'); - t.deepEqual(analyzer.char_filter, ["punctuation", "nfkc_normalizer"], 'character filters specified'); - t.true(Array.isArray(analyzer.filter), 'filters specified'); - t.end(); - }); - test('peliasQueryPartialToken token filters', function (t) { - var analyzer = settings().analysis.analyzer.peliasQueryPartialToken; - t.deepEqual(analyzer.filter, [ - "lowercase", - "icu_folding", - "trim", - "partial_token_address_suffix_expansion", - "ampersand", - "remove_ordinals", - "removeAllZeroNumericPrefix", - "unique_only_same_position", - "notnull" - ]); - t.end(); - }); -}; - module.exports.tests.peliasPhraseAnalyzer = function(test, common) { test('has peliasPhrase analyzer', function(t) { var s = settings(); From 4fe9a7fa77246eb56f37a228018a10b1a5b165dd Mon Sep 17 00:00:00 2001 From: Joxit Date: Thu, 12 Dec 2019 10:32:39 +0100 Subject: [PATCH 3/3] fix(partial_token_address_suffix_expansion): remove unused synonym --- ...partial_token_address_suffix_expansion.txt | 120 ------------------ test/fixtures/expected.json | 101 --------------- 2 files changed, 221 deletions(-) delete mode 100644 synonyms/partial_token_address_suffix_expansion.txt diff --git a/synonyms/partial_token_address_suffix_expansion.txt b/synonyms/partial_token_address_suffix_expansion.txt deleted file mode 100644 index 20a97095..00000000 --- a/synonyms/partial_token_address_suffix_expansion.txt +++ /dev/null @@ -1,120 +0,0 @@ -# a list of 'safe' street suffix expansions. -# -# this list should NOT include any values where the abbreviation is a prefix of -# the expanded form. -# -# EG. 'st' is a prefix of 'street' so it is not included here. -# EG. 'rd' is NOT a prefix of 'road' so it IS included here. -# -# the term 'safe' refers to whether the token may be expanded without causing -# other issues; in general ask yourself "if i expand `byu => bayou` will this cause -# issues with other tokens which *begin with byu?" -# -# EG. 'pr' is disabled as it would cause jitter when autocompleting any place -# name beginning with 'pr' such as 'princeton', on the second keypress the -# results would likely all contain names which are/begin with "pier". -# -# EG. 'ct' is disabled as expanding it to 'court' would possibly conflict with -# the state abbreviation for 'Connecticut'. -# -# please use judgement when adding new expansions as it may cause the 'jitter' -# behaviour as outlined in https://github.com/pelias/schema/pull/83 - -aly => alley -anx => annex -byu => bayou -bch => beach -bnd => bend -blf => bluff -blfs => bluffs -btm => bottom -blvd => boulevard -brg => bridge -brk => brook -cyn => canyon -cp => cape -cswy => causeway -ctr => center -chnnl => channel -clf => cliff -clb => club -cmn => common -cmns => commons -crse => course -# ct => court -cv => cove -crk => creek -crst => crest -xing => crossing -xrd => crossroad -xrds => crossroads -dl => dale -dm => dam -expy => expressway -fls => falls -fry => ferry -fld => field -flds => fields -flt => flat -flts => flats -frd => ford -frst => forest -frg => forge -frk => fork -frks => forks -fwy => freeway -gdn => garden -gdns => gardens -gtwy => gateway -gln => glenn -grn => green -grv => grove -hbr => harbor -hvn => haven -hts => heights -hwy => highway -hl => hill -hls => hills -holw => hollow -jct => junction -ky => key -kys => keys -knl => knoll -knls => knolls -lndg => landing -ln => lane -lgt => light -lgts => lights -lck => lock -lcks => locks -mnr => manor -mdw => meadow -mdws => meadows -ml => mill -mls => mills -mnt => mountain -mtwy => motorway -nck => neck -pkwy => parkway -psge => pasage -# pr => pier -pne => pine -pnes => pines -plz => plaza -rnch => ranch -rdg => ridge -rdgs => ridges -rd => road -rte => route -shr => shore -shrs => shores -skwy => skyway -spg => spring -spgs => springs -ste => suite -trfy => trafficway -tunl => tunnel -tpke => turnpike -vly => valley -vlg => village -wy => way diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 48d0c5ec..31e1638c 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -361,107 +361,6 @@ "west,w" ] }, - "partial_token_address_suffix_expansion": { - "type": "synonym", - "synonyms": [ - "aly => alley", - "anx => annex", - "byu => bayou", - "bch => beach", - "bnd => bend", - "blf => bluff", - "blfs => bluffs", - "btm => bottom", - "blvd => boulevard", - "brg => bridge", - "brk => brook", - "cyn => canyon", - "cp => cape", - "cswy => causeway", - "ctr => center", - "chnnl => channel", - "clf => cliff", - "clb => club", - "cmn => common", - "cmns => commons", - "crse => course", - "cv => cove", - "crk => creek", - "crst => crest", - "xing => crossing", - "xrd => crossroad", - "xrds => crossroads", - "dl => dale", - "dm => dam", - "expy => expressway", - "fls => falls", - "fry => ferry", - "fld => field", - "flds => fields", - "flt => flat", - "flts => flats", - "frd => ford", - "frst => forest", - "frg => forge", - "frk => fork", - "frks => forks", - "fwy => freeway", - "gdn => garden", - "gdns => gardens", - "gtwy => gateway", - "gln => glenn", - "grn => green", - "grv => grove", - "hbr => harbor", - "hvn => haven", - "hts => heights", - "hwy => highway", - "hl => hill", - "hls => hills", - "holw => hollow", - "jct => junction", - "ky => key", - "kys => keys", - "knl => knoll", - "knls => knolls", - "lndg => landing", - "ln => lane", - "lgt => light", - "lgts => lights", - "lck => lock", - "lcks => locks", - "mnr => manor", - "mdw => meadow", - "mdws => meadows", - "ml => mill", - "mls => mills", - "mnt => mountain", - "mtwy => motorway", - "nck => neck", - "pkwy => parkway", - "psge => pasage", - "pne => pine", - "pnes => pines", - "plz => plaza", - "rnch => ranch", - "rdg => ridge", - "rdgs => ridges", - "rd => road", - "rte => route", - "shr => shore", - "shrs => shores", - "skwy => skyway", - "spg => spring", - "spgs => springs", - "ste => suite", - "trfy => trafficway", - "tunl => tunnel", - "tpke => turnpike", - "vly => valley", - "vlg => village", - "wy => way" - ] - }, "street_suffix": { "type": "synonym", "synonyms": [