diff --git a/classification/StreetProperNameClassification.js b/classification/StreetProperNameClassification.js new file mode 100644 index 00000000..5c248999 --- /dev/null +++ b/classification/StreetProperNameClassification.js @@ -0,0 +1,10 @@ +const Classification = require('./Classification') + +class StreetProperNameClassification extends Classification { + constructor (confidence, meta) { + super(confidence, meta) + this.label = 'street_proper_name' + } +} + +module.exports = StreetProperNameClassification diff --git a/classification/StreetProperNameClassification.test.js b/classification/StreetProperNameClassification.test.js new file mode 100644 index 00000000..052a7081 --- /dev/null +++ b/classification/StreetProperNameClassification.test.js @@ -0,0 +1,24 @@ +const Classification = require('./StreetProperNameClassification') + +module.exports.tests = {} + +module.exports.tests.constructor = (test) => { + test('constructor', (t) => { + let c = new Classification() + t.false(c.public) + t.equals(c.label, 'street_proper_name') + t.equals(c.confidence, 1.0) + t.deepEqual(c.meta, {}) + t.end() + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`StreetProperNameClassification: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common) + } +} diff --git a/classifier/CompositeClassifier.js b/classifier/CompositeClassifier.js index cbb70d98..0af45883 100644 --- a/classifier/CompositeClassifier.js +++ b/classifier/CompositeClassifier.js @@ -106,8 +106,9 @@ class CompositeClassifier extends SectionClassifier { // find phrases which equal the composites let superPhrases = [] composites.forEach(c => { - let start = c[0].start - let end = c[c.length - 1].end + var carr = Array.isArray(c) ? c : [c] // cast to array + let start = carr[0].start + let end = carr[carr.length - 1].end superPhrases = superPhrases.concat(phrases.filter(p => p.start === start && p.end === end)) }) diff --git a/classifier/StreetProperNameClassifier.js b/classifier/StreetProperNameClassifier.js new file mode 100644 index 00000000..892f9425 --- /dev/null +++ b/classifier/StreetProperNameClassifier.js @@ -0,0 +1,29 @@ +const WordClassifier = require('./super/WordClassifier') +const StreetProperNameClassification = require('../classification/StreetProperNameClassification') + +/** + Special handling of streets with no suffix + + see: https://github.com/pelias/parser/issues/140 +**/ + +class StreetProperNameClassifier extends WordClassifier { + setup () { + this.index = { + 'broadway': true, + 'esplanade': true + } + } + + each (span) { + // skip spans which contain numbers + if (span.contains.numerals) { return } + + // classify tokens in the index as 'street_proper_name' + if (this.index[span.norm] === true) { + span.classify(new StreetProperNameClassification(0.7)) + } + } +} + +module.exports = StreetProperNameClassifier diff --git a/classifier/StreetProperNameClassifier.test.js b/classifier/StreetProperNameClassifier.test.js new file mode 100644 index 00000000..9cfd520f --- /dev/null +++ b/classifier/StreetProperNameClassifier.test.js @@ -0,0 +1,49 @@ +const StreetProperNameClassifier = require('./StreetProperNameClassifier') +const StreetProperNameClassification = require('../classification/StreetProperNameClassification') +const Span = require('../tokenization/Span') +const classifier = new StreetProperNameClassifier() + +module.exports.tests = {} + +function classify (body) { + let s = new Span(body) + classifier.each(s, null, 1) + return s +} + +module.exports.tests.contains_numerals = (test) => { + test('contains numerals: honours contains.numerals boolean', (t) => { + let s = new Span('example') + s.contains.numerals = true + classifier.each(s, null, 1) + t.deepEqual(s.classifications, {}) + t.end() + }) +} + +module.exports.tests.street_proper_names = (test) => { + let valid = [ + 'broadway', + 'esplanade' + ] + + valid.forEach(token => { + test(`street_proper_names: ${token}`, (t) => { + let s = classify(token) + t.deepEqual(s.classifications, { + StreetProperNameClassification: new StreetProperNameClassification(0.7) + }) + t.end() + }) + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction) { + return tape(`StreetProperNameClassifier: ${name}`, testFunction) + } + + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common) + } +} diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index 6e135e7f..9df7ae43 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -258,6 +258,32 @@ module.exports = [ } ] }, + { + // Broadway Market + confidence: 0.80, + Class: StreetClassification, + scheme: [ + { + is: ['StreetProperNameClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetSuffixClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + } + ] + }, + { + // Broadway + confidence: 0.82, + Class: StreetClassification, + scheme: [ + { + is: ['StreetProperNameClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + } + ] + }, { // +++ Main Street confidence: 0.84, diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 3b5663e6..95a1d821 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -8,6 +8,7 @@ const UnitTypeUnitClassifier = require('../classifier/UnitTypeUnitClassifier') const PostcodeClassifier = require('../classifier/PostcodeClassifier') const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier') const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier') +const StreetProperNameClassifier = require('../classifier/StreetProperNameClassifier') const RoadTypeClassifier = require('../classifier/RoadTypeClassifier') const ToponymClassifier = require('../classifier/ToponymClassifier') const CompoundStreetClassifier = require('../classifier/CompoundStreetClassifier') @@ -56,6 +57,7 @@ class AddressParser extends Parser { new PostcodeClassifier(), new StreetPrefixClassifier(), new StreetSuffixClassifier(), + new StreetProperNameClassifier(), new RoadTypeClassifier(), new ToponymClassifier(), new CompoundStreetClassifier(), diff --git a/resources/pelias/dictionaries/libpostal/en/street_types.txt b/resources/pelias/dictionaries/libpostal/en/street_types.txt index e132e643..30ecf9d9 100644 --- a/resources/pelias/dictionaries/libpostal/en/street_types.txt +++ b/resources/pelias/dictionaries/libpostal/en/street_types.txt @@ -10,3 +10,7 @@ furlong # 1384 Cambridge beltway, Cambridge, MD 21613, USA beltway +# https://github.com/pelias/parser/issues/140 +!broadway|bdwy|bway|bwy|brdway +!esplanade|esp|espl +market diff --git a/resources/pelias/dictionaries/libpostal/fr/street_types.txt b/resources/pelias/dictionaries/libpostal/fr/street_types.txt index 1f880099..2d24dfa4 100644 --- a/resources/pelias/dictionaries/libpostal/fr/street_types.txt +++ b/resources/pelias/dictionaries/libpostal/fr/street_types.txt @@ -1,2 +1,5 @@ cité|cite cités|cites + +# https://github.com/pelias/parser/pull/141#issuecomment-895230721 +!esplanades|esps diff --git a/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt b/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt index 7928124a..3ec0fe78 100644 --- a/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt +++ b/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt @@ -75,3 +75,7 @@ bronx !wisconsin !wyoming +# https://github.com/pelias/parser/issues/140 +!broadway +!esplanade +!market diff --git a/resources/pelias/dictionaries/whosonfirst/locality/name:fra_x_preferred.txt b/resources/pelias/dictionaries/whosonfirst/locality/name:fra_x_preferred.txt new file mode 100644 index 00000000..71ae9c5e --- /dev/null +++ b/resources/pelias/dictionaries/whosonfirst/locality/name:fra_x_preferred.txt @@ -0,0 +1,2 @@ +# https://github.com/pelias/parser/issues/140 +!broadway diff --git a/test/address.deu.test.js b/test/address.deu.test.js index a5067548..959ed1b5 100644 --- a/test/address.deu.test.js +++ b/test/address.deu.test.js @@ -27,11 +27,21 @@ const testcase = (test, common) => { { locality: 'Munich' }, { country: 'Germany' } ]) + assert('Esplanade, Berlin', [ + { street: 'Esplanade' }, + { locality: 'Berlin' } + ]) + assert('Esplanade 17, Berlin', [ { street: 'Esplanade' }, { housenumber: '17' }, { locality: 'Berlin' } ]) + assert('17 Esplanade, Berlin', [ + { housenumber: '17' }, { street: 'Esplanade' }, + { locality: 'Berlin' } + ]) + assert('Königsallee Düsseldorf', [ { street: 'Königsallee' }, { locality: 'Düsseldorf' } diff --git a/test/address.fra.test.js b/test/address.fra.test.js index 894fdd28..2544b78a 100644 --- a/test/address.fra.test.js +++ b/test/address.fra.test.js @@ -112,6 +112,11 @@ const testcase = (test, common) => { assert(`Paris 75000, France`, [ { locality: 'Paris' }, { postcode: '75000' }, { country: 'France' } ]) + + // https://github.com/pelias/parser/pull/141#issuecomment-895230721 + assert(`Esplanade de la Liberté`, [{ street: 'Esplanade de la Liberté' }]) + assert(`Esplanade du Géneral de Gaulle`, [{ street: 'Esplanade du Géneral de Gaulle' }]) + assert(`Esplanade Méditerranée`, [{ street: 'Esplanade Méditerranée' }]) } module.exports.all = (tape, common) => { diff --git a/test/address.gbr.test.js b/test/address.gbr.test.js index 79db2ac1..c51d83b7 100644 --- a/test/address.gbr.test.js +++ b/test/address.gbr.test.js @@ -4,6 +4,19 @@ const testcase = (test, common) => { assert('Rushendon Furlong', [ { street: 'Rushendon Furlong' } ]) + + // Valid street name in London + assert('Broadway Market, London', [ + { street: 'Broadway Market' }, + { locality: 'London' } + ]) + + // 'The Dove', a pub on Broadway Market + assert('24-28 Broadway Market, London', [ + { housenumber: '24-28' }, + { street: 'Broadway Market' }, + { locality: 'London' } + ]) } module.exports.all = (tape, common) => { diff --git a/test/address.usa.test.js b/test/address.usa.test.js index 979f7c99..4ea034b3 100644 --- a/test/address.usa.test.js +++ b/test/address.usa.test.js @@ -249,6 +249,36 @@ const testcase = (test, common) => { { locality: 'boston' }, { region: 'ma' } ]) + + // https://github.com/pelias/parser/issues/140 + assert('Broadway, Manhattan', [ + { street: 'Broadway' }, + { locality: 'Manhattan' } + ]) + assert('24 Broadway, Manhattan', [ + { housenumber: '24' }, { street: 'Broadway' }, + { locality: 'Manhattan' } + ]) + assert('Broadway 24, Manhattan', [ + { street: 'Broadway' }, { housenumber: '24' }, + { locality: 'Manhattan' } + ]) + assert('East Broadway, Manhattan', [ + { street: 'East Broadway' }, + { locality: 'Manhattan' } + ]) + assert('24 East Broadway, Manhattan', [ + { housenumber: '24' }, { street: 'East Broadway' }, + { locality: 'Manhattan' } + ]) + assert('West Broadway, Manhattan', [ + { street: 'West Broadway' }, + { locality: 'Manhattan' } + ]) + assert('24 West Broadway, Manhattan', [ + { housenumber: '24' }, { street: 'West Broadway' }, + { locality: 'Manhattan' } + ]) } module.exports.all = (tape, common) => {