Skip to content

Commit

Permalink
feat(nl): improve support of NL addresses (#126)
Browse files Browse the repository at this point in the history
* Examples of correct addresses in NL.
* Recognise 6 position postcodes in NL, addressing #127.
* Check for -plein as street type.
* Adjustments for typo 'plain' instead of 'plein' in libpostal resources. Addressing #128.
* Put 'test' back in as per #126 (comment).
* Prevent 'St' as a suffix in NL streetnames. Add Jr and Sr. Addressing #126.
* First of 4 digits cannot be 0. No 'SA','SD' or 'SS'. Addressing #127.
* Adjective 'korte' no longer a personal title. Addressing #129.
* Tests and fix for typo 'BurgeRmeester'. Addressing #130.
* Tests, code, and configs for '-daal', '-burg', '-baan'. Addressing #131 and #133.
* Config for '-burg' not separable. Addressing #131.
* Remove test for NL postal codes WITH spaces. Regex ok. #134
* Fix formatting spaces at the end of the line.
* Also use directionals to parse Dutch street addresses. #137
* Add Dutch titulature for street name recognition. (#130)
* Add 'plantsoen' as a street type. (#128).
* Remove test for NL locality with stopword.
  • Loading branch information
emacgillavry authored Sep 7, 2021
1 parent e92f96c commit a1af69b
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 5 deletions.
6 changes: 6 additions & 0 deletions classifier/CompoundStreetClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ class CompoundStreetClassifier extends WordClassifier {
// this removes suffixes such as 'r.' which can be ambiguous
minlength: 3
})

libpostal.load(this.suffixes, ['de', 'nl'], 'concatenated_suffixes_inseparable.txt', {
// remove any suffixes which contain less than 3 characters (excluding a period)
// this removes suffixes such as 'r.' which can be ambiguous
minlength: 3
})
}

each (span) {
Expand Down
2 changes: 1 addition & 1 deletion classifier/DirectionalClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const libpostal = require('../resources/libpostal/libpostal')

// optionally control which languages are included
// note: reducing the languages will have a considerable performance benefit
const languages = ['en', 'es', 'de', 'fr']
const languages = ['en', 'es', 'de', 'fr', 'nl']

class DirectionalClassifier extends WordClassifier {
setup () {
Expand Down
2 changes: 1 addition & 1 deletion classifier/PersonalTitleClassifier.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module.exports.tests.contains_numerals = (test) => {
module.exports.tests.classify = (test) => {
let valid = [
'Général', 'General', 'gal',
'Saint', 'st', 'cdt', 'l\'Amiral'
'Saint', 'st', 'cdt', 'l\'Amiral', 'Burgemeester'
]

valid.forEach(token => {
Expand Down
2 changes: 1 addition & 1 deletion classifier/PostcodeClassifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const dictPath = path.join(__dirname, `../resources/chromium-i18n/ssl-address`)
// const countryCodes = fs.readdirSync(dictPath)
// .filter(p => p.endsWith('.json'))
// .map(p => p.split('.')[0])
const countryCodes = ['us', 'gb', 'fr', 'de', 'es', 'pt', 'au', 'nz', 'kr', 'jp', 'in', 'ru', 'br']
const countryCodes = ['us', 'gb', 'fr', 'de', 'es', 'pt', 'au', 'nz', 'kr', 'jp', 'in', 'ru', 'br', 'nl']

class PostcodeClassifier extends WordClassifier {
setup () {
Expand Down
5 changes: 5 additions & 0 deletions classifier/PostcodeClassifier.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ module.exports.tests.classify = (test) => {
t.deepEqual(s.classifications, { PostcodeClassification: new PostcodeClassification(1.0) })
t.end()
})
test('classify: NLD', (t) => {
let s = classify('7512EC')
t.deepEqual(s.classifications, { PostcodeClassification: new PostcodeClassification(1.0) })
t.end()
})
}

module.exports.all = (tape, common) => {
Expand Down
2 changes: 1 addition & 1 deletion resources/chromium-i18n/ssl-address/NL.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"zipex":"1234 AB,2490 AA","key":"NL","name":"NETHERLANDS","fmt":"%O%n%N%n%A%n%Z %C","require":"ACZ","zip":"\\d{4} ?[A-Z]{2}","posturl":"http://www.postnl.nl/voorthuis/","id":"data/NL"}
{"zipex":"1234 AB,2490 AA","key":"NL","name":"NETHERLANDS","fmt":"%O%n%N%n%A%n%Z %C","require":"ACZ","zip":"[1-9][0-9]{3} ?(?!SA|SD|SS)[A-Z]{2}","posturl":"http://www.postnl.nl/voorthuis/","id":"data/NL"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
!kort|k
!korte|kte
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
burg|brg|bg
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
dijk
baan
daal
dijk
!plain|pln.
plein|pln
plantsoen|plnts
4 changes: 4 additions & 0 deletions resources/pelias/dictionaries/libpostal/nl/directionals.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
noordzijde|nz|n.z.|n.z|n z
oostzijde|oz|o.z.|o.z|o z
westzijde|wz|w.z.|w.z|w z
zuidzijde|zz|z.z.|z.z|z z
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
junior|jr|jnr
senior|sr|snr
# st is not a personal suffix in Dutch
!st
43 changes: 43 additions & 0 deletions resources/pelias/dictionaries/libpostal/nl/personal_titles.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
!kort|k
!korte|kte
aalmoezenier
admiraal|adm
bisschop|biss
#typo in LibPostal resource
!burgermeester|burg|bgm
burgemeester|burg|bgm
commissaris|comm
deken|dkn
directeur|dir
frater|fr
graaf
gravin
goeverneur|goev
gouverneur|gouv
heer|hr
jonker|jkr
juffrouw|juffr
hertog|htg
kanunnik|kan
kapelaan|kap
kapitein|kapt
keizer
luitenant generaal|lt gen
!mevrouw|mevr
mevrouw|mevr|mw
madame|mad
majoor|maj
notaris|not
overste|ov
pater|ptr
prelaat|prlt
rector|rect
schepen|sch
schout|sch
schout bij nacht|sbn
secretaris|secr
sekretaris|sekr
veldmaarschalk|veldm
vicaris|vic
wethouder|weth
zusters|zr
50 changes: 50 additions & 0 deletions test/address.nld.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,56 @@ const testcase = (test, common) => {
assert('Bosserdijk, Hoogland', [
{ street: 'Bosserdijk' }, { locality: 'Hoogland' }
])

assert('St Ludgerusstraat, Utrecht', [
{ street: 'Ludgerusstraat' }, { locality: 'Utrecht' }
])

assert('Lange Groenendaal, Gouda', [
{ street: 'Lange Groenendaal' }, { locality: 'Gouda' }
])

assert('Achter Clarenburg, Utrecht', [
{ street: 'Achter Clarenburg' }, { locality: 'Utrecht' }
])

assert('Rozenburg', [
[{ locality: 'Rozenburg' }],
[{ street: 'Rozenburg' }]
], false)

assert('Bloemendaal', [
[{ locality: 'Bloemendaal' }],
[{ street: 'Bloemendaal' }]
], false)

assert('Brinkstraat 87, 7512EC, Enschede', [
{ street: 'Brinkstraat' }, { housenumber: '87' }, { postcode: '7512EC' }, { locality: 'Enschede' }
])

assert('Weerdsingel O.Z., Utrecht', [
{ street: 'Weerdsingel O.Z.' }, { locality: 'Utrecht' }
])

assert('Oranjelaan Westzijde 41, Puttershoek', [
{ street: 'Oranjelaan Westzijde' }, { housenumber: '41' }, { locality: 'Puttershoek' }
])

assert('Rembrandtplein, Amsterdam', [
{ street: 'Rembrandtplein' }, { locality: 'Amsterdam' }
])

assert('Korte Tiendeweg, Gouda', [
{ street: 'Korte Tiendeweg' }, { locality: 'Gouda' }
])

assert('Burgemeester Martenssingel, Gouda', [
{ street: 'Burgemeester Martenssingel' }, { locality: 'Gouda' }
])

assert('Agorabaan, Lelystad', [
{ street: 'Agorabaan' }, { locality: 'Lelystad' }
])
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit a1af69b

Please sign in to comment.