diff --git a/bin/downloadPostal b/bin/downloadPostal new file mode 100755 index 00000000..9e486654 --- /dev/null +++ b/bin/downloadPostal @@ -0,0 +1,3 @@ +#!/bin/bash + +exec node ./bin/downloadPostalData.js diff --git a/bin/downloadPostalData.js b/bin/downloadPostalData.js new file mode 100644 index 00000000..8606833a --- /dev/null +++ b/bin/downloadPostalData.js @@ -0,0 +1,11 @@ +'use strict'; + +const config = require('pelias-config').generate(); +const validateISOCode = require('../lib/validateISOCode'); + +const countryCode = validateISOCode(config.imports.geonames.countryCode); + +const filename = countryCode === 'ALL' ? 'allCountries' : countryCode; + +const task = require('../lib/tasks/downloadPostal'); +task(filename); diff --git a/bin/startPostal b/bin/startPostal new file mode 100755 index 00000000..89d33b21 --- /dev/null +++ b/bin/startPostal @@ -0,0 +1,3 @@ +#!/bin/bash + +exec node --max_old_space_size=8000 importPostal.js diff --git a/importPostal.js b/importPostal.js new file mode 100644 index 00000000..15e6b2fd --- /dev/null +++ b/importPostal.js @@ -0,0 +1,26 @@ +const config = require('pelias-config').generate(); +const _ = require('lodash'); +const logger = require('pelias-logger').get('geonames'); + +if (_.has(config, 'imports.geonames.adminLookup')) { + logger.info('imports.geonames.adminLookup has been deprecated, ' + + 'enable adminLookup using imports.adminLookup.enabled = true'); +} + +const resolvers = require( './lib/tasks/resolvers' ); +const task = require('./lib/tasks/importPostal'); +const validateISOCode = require('./lib/validateISOCode'); +// const dbclient = require('pelias-dbclient'); + +const isocode = validateISOCode( config.imports.geonames.countryCode ); +const filename = isocode === 'ALL' ? 'allCountries' : isocode; +const sources = resolvers.selectPostalSources( filename ); + +// const endstream = dbclient({name: 'geonames'}); + +for(const source of sources){ + task( source ); +} + + + diff --git a/lib/streams/featureCountryFilterPostalStream.js b/lib/streams/featureCountryFilterPostalStream.js new file mode 100644 index 00000000..0fbc7382 --- /dev/null +++ b/lib/streams/featureCountryFilterPostalStream.js @@ -0,0 +1,21 @@ +var filter = require('through2-filter'); +var _ = require( 'lodash' ); +var unwantedFcodes = ['CA','GB','NL']; +const config = require('pelias-config').generate(); + +function filterRecord(data) { + if(config.imports.geonames.countryCode==='ALL' && _.includes(unwantedFcodes,data.country_code)) { + return data.postal_code.length > 4; + } + return true; +} + + +function create() { + return filter.obj(filterRecord); +} + +module.exports = { + filterRecord: filterRecord, + create: create +}; diff --git a/lib/streams/peliasPostalDocGenerator.js b/lib/streams/peliasPostalDocGenerator.js new file mode 100644 index 00000000..f38175c2 --- /dev/null +++ b/lib/streams/peliasPostalDocGenerator.js @@ -0,0 +1,43 @@ +var Document = require( 'pelias-model' ).Document; +var logger = require( 'pelias-logger' ).get( 'geonames' ); +var through2 = require('through2'); +module.exports = {}; + +module.exports.create = function() { + return through2.obj(function(data,enc,next){ + var record; + try{ + + const country_code = data.country_code; + const postal_code = data.postal_code; + + + const postal_code_clean = + (postal_code.includes(country_code)?postal_code.substring(3):postal_code).replace(' ',''); + const id = `${country_code}${postal_code_clean}`; + const alias = (postal_code.includes(' ')?postal_code.replace(' ',''):null); + + record = new Document('geonames','postalcode', id) + .setName('default',(alias===null?postal_code:`${postal_code}, ${alias}`)) + .setSource('geonames') + // .setNameAlias('alt',aliases[0]) + .setCentroid({ + lat:data.latitude, + lon:data.longitude + }) + .setPopularity(9000) + .addParent('postalcode',postal_code, id, alias,'geonames'); + + + + }catch (e){ + logger.warn( + 'Failed to create a Document from:', data, 'Exception:', e + ); + } + if( record !== undefined ){ + this.push( record ); + } + next(); + }); +}; \ No newline at end of file diff --git a/lib/tasks/downloadPostal.js b/lib/tasks/downloadPostal.js new file mode 100644 index 00000000..3deba71d --- /dev/null +++ b/lib/tasks/downloadPostal.js @@ -0,0 +1,68 @@ +const child_process = require('child_process'); +const fs = require('fs'); + +const logger = require('pelias-logger').get('geonames'); + +// use datapath setting from your config file +const config = require('pelias-config').generate(); +const basepath = config.imports.geonames.datapath; +const sourceURL = config.imports.geonames.sourceURL; + +module.exports = function (countryCode) { + + fs.mkdirSync(basepath, {recursive: true}); + fs.mkdirSync(`${basepath}/postal`, {recursive: true}); + + + const urlPrefix = sourceURL || 'http://download.geonames.org/export/zip'; + const remoteFilePath = `${urlPrefix}/${countryCode}.zip`; + + + const localFileName = `${basepath}/postal/${countryCode}.zip`; + logger.info('downloading datafile from:', remoteFilePath); + + const command = `curl ${remoteFilePath} > ${localFileName}`; + + + if (countryCode === 'allCountries') { + const full_countries = [ 'allCountries','CA_full.csv', 'GB_full.csv', 'NL_full.csv']; + const jobs = []; + for (const countryCode of full_countries) { + const localFileName = `${basepath}/postal/${countryCode}.zip`; + const remoteFilePath = `${urlPrefix}/${countryCode}.zip`; + + logger.info('downloading datafile from:', remoteFilePath); + const command = `curl ${remoteFilePath} > ${localFileName}`; + jobs.push(child_process.exec(command)); + + } + jobs.forEach((job)=>{ + job.stdout.on('data', (data) => { + process.stdout.write(data); + }); + + job.stderr.on('data', (data) => { + process.stderr.write(data); + }); + job.on('close', (code) => { + process.exitCode = code; + }); + }); + } else { + let job = child_process.exec(command); + job.stdout.on('data', (data) => { + process.stdout.write(data); + }); + + job.stderr.on('data', (data) => { + process.stderr.write(data); + }); + + job.on('close', (code) => { + console.log(`Postal Codes download finished with exit code ${code}`); + process.exitCode = code; + }); + } + + +}; diff --git a/lib/tasks/import.js b/lib/tasks/import.js index 16df1c50..37fceb27 100644 --- a/lib/tasks/import.js +++ b/lib/tasks/import.js @@ -19,6 +19,6 @@ module.exports = function( sourceStream, endStream ){ .pipe( blacklistStream() ) .pipe( adminLookupStream.create() ) .pipe( overrideLookedUpLocalityAndLocaladmin.create() ) - .pipe(model.createDocumentMapperStream()) + .pipe( model.createDocumentMapperStream() ) .pipe( endStream ); }; diff --git a/lib/tasks/importPostal.js b/lib/tasks/importPostal.js new file mode 100644 index 00000000..191be8fd --- /dev/null +++ b/lib/tasks/importPostal.js @@ -0,0 +1,58 @@ +var dbclient = require('pelias-dbclient'); +var unzipper = require('unzipper'); +var csv = require('fast-csv'); +const through2 = require('through2'); +var adminLookupStream = require('pelias-wof-admin-lookup'); + +var featureCountryFilterPostalStream = require('../streams/featureCountryFilterPostalStream'); +var peliasPostalDocGenerator = require('../streams/peliasPostalDocGenerator'); +var model = require('pelias-model'); +const overrideLookedUpLocalityAndLocaladmin = require('../streams/overrideLookedUpLocalityAndLocaladmin'); + + + +var transformJSON = function() { + return through2.obj(function(data,enc,next){ + data = { + country_code:data[0], + postal_code:data[1], + place_name:data[2], + admin_name1:data[3], + admin_code1:data[4], + admin_name2:data[5], + admin_code2:data[6], + admin_name3:data[7], + admin_code3:data[8], + latitude:data[9], + longitude:data[10], + accuracy:data[11], + }; + next(null,data); + }); +}; + +module.exports = function (sourceStream, endStream) { + endStream = endStream || dbclient({name: 'geonames'}); + + sourceStream + .pipe(unzipper.ParseOne('^(?!readme).*$')) + .pipe(csv.parse({delimiter:'\t'})) + .on('finish',()=>{ + console.log('1 file - done'); + }) + .pipe(transformJSON()) + .pipe(featureCountryFilterPostalStream.create()) + .pipe( peliasPostalDocGenerator.create() ) + .pipe( adminLookupStream.create() ) + .pipe( overrideLookedUpLocalityAndLocaladmin.create() ) + .pipe( model.createDocumentMapperStream() ) + // .on('data',(data)=>{ + // console.log(JSON.stringify(data)); + // }); + .pipe( endStream ); + + + + + +}; diff --git a/lib/tasks/resolvers.js b/lib/tasks/resolvers.js index 82b231f8..3fcc0609 100644 --- a/lib/tasks/resolvers.js +++ b/lib/tasks/resolvers.js @@ -28,8 +28,50 @@ function selectSource(filename) { return getLocalFileStream(filename) || getRemoteFileStream(filename); } + +function getLocalPostalFileStreams(country) { + if(country==='allCountries') { + const full_countries = [ 'CA_full.csv', 'GB_full.csv', 'NL_full.csv']; + // const full_countries = ['allCountries', 'CA_full.csv', 'GB_full.csv', 'NL_full.csv']; + const postalFileStreams = []; + for(const file of full_countries) { + const localFileName = util.format('%s/postal/%s.zip', basepath, file); + + if (fs.existsSync(localFileName)) { + logger.info('reading datafile from disk at:', localFileName); + postalFileStreams.push(fs.createReadStream(localFileName)); + } else { + logger.warn(`${localFileName} doesn't exist.`); + } + } + return postalFileStreams; + } + else{ + const localFileName = util.format('%s/postal/%s.zip', basepath, country); + if (fs.existsSync(localFileName)) { + logger.info('reading datafile from disk at:', localFileName); + return [fs.createReadStream(localFileName)]; + } else { + return undefined; + } + } + +} + +function getRemotePostalFileStreams(country) { + var remoteFilePath = util.format( 'http://download.geonames.org/export/zip/%s.zip', country ); + + logger.info( 'streaming datafile from:', remoteFilePath ); + return [request.get( remoteFilePath )]; +} + +function selectPostalSources(country) { + return getLocalPostalFileStreams(country) || getRemotePostalFileStreams(country); +} + module.exports = { getLocalFileStream: getLocalFileStream, getRemoteFileStream: getRemoteFileStream, - selectSource: selectSource + selectSource: selectSource, + selectPostalSources:selectPostalSources }; diff --git a/metadata/category_mapping.json b/metadata/category_mapping.json index b0f46032..caa2d8ad 100644 --- a/metadata/category_mapping.json +++ b/metadata/category_mapping.json @@ -597,4 +597,4 @@ "PNDSI": ["natural:water", "natural"], "PNDSF": ["natural:water", "natural"], "MTS": ["natural"] -} +} \ No newline at end of file diff --git a/metadata/popularity_mapping.json b/metadata/popularity_mapping.json index fec7ec30..4ad69996 100644 --- a/metadata/popularity_mapping.json +++ b/metadata/popularity_mapping.json @@ -1,4 +1,4 @@ { "HSTS": 5000, "RLG": 5000 -} +} \ No newline at end of file diff --git a/package.json b/package.json index 10a63326..a3b9e42a 100644 --- a/package.json +++ b/package.json @@ -8,12 +8,14 @@ "scripts": { "download_metadata": "mkdirp metadata && node bin/updateMetadata.js", "download": "./bin/download", + "downloadPostal": "./bin/downloadPostal", "countryCodes": "node bin/viewCountryCodes.js", "functional": "./bin/functional", "import": "./bin/start", "lint": "jshint .", "postinstall": "npm run download_metadata", "start": "./bin/start", + "startPostal": "./bin/startPostal", "test": "NODE_ENV=test npm run units", "travis": "npm test && npm run functional", "units": "./bin/units", @@ -40,6 +42,8 @@ "JSONStream": "^1.0.7", "cli-table": "^0.3.0", "csv-parse": "^4.8.2", + "etl": "^0.6.12", + "fast-csv": "^4.3.6", "geonames-stream": "^2.0.3", "lodash": "^4.17.15", "mkdirp": "^1.0.0", @@ -50,7 +54,7 @@ "pelias-model": "^9.0.0", "pelias-wof-admin-lookup": "^7.3.0", "request": "^2.34.0", - "through2": "^3.0.0", + "through2": "^3.0.2", "through2-filter": "^3.0.0", "through2-sink": "^1.0.0", "unzipper": "^0.10.0"