Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add geonames postal code - need improvement #396

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bin/downloadPostal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

exec node ./bin/downloadPostalData.js
11 changes: 11 additions & 0 deletions bin/downloadPostalData.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
'use strict';

const config = require('pelias-config').generate();
const validateISOCode = require('../lib/validateISOCode');

const countryCode = validateISOCode(config.imports.geonames.countryCode);

const filename = countryCode === 'ALL' ? 'allCountries' : countryCode;

const task = require('../lib/tasks/downloadPostal');
task(filename);
3 changes: 3 additions & 0 deletions bin/startPostal
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

exec node --max_old_space_size=8000 importPostal.js
26 changes: 26 additions & 0 deletions importPostal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const config = require('pelias-config').generate();
const _ = require('lodash');
const logger = require('pelias-logger').get('geonames');

if (_.has(config, 'imports.geonames.adminLookup')) {
logger.info('imports.geonames.adminLookup has been deprecated, ' +
'enable adminLookup using imports.adminLookup.enabled = true');
}

const resolvers = require( './lib/tasks/resolvers' );
const task = require('./lib/tasks/importPostal');
const validateISOCode = require('./lib/validateISOCode');
// const dbclient = require('pelias-dbclient');

const isocode = validateISOCode( config.imports.geonames.countryCode );
const filename = isocode === 'ALL' ? 'allCountries' : isocode;
const sources = resolvers.selectPostalSources( filename );

// const endstream = dbclient({name: 'geonames'});

for(const source of sources){
task( source );
}



21 changes: 21 additions & 0 deletions lib/streams/featureCountryFilterPostalStream.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
var filter = require('through2-filter');
var _ = require( 'lodash' );
var unwantedFcodes = ['CA','GB','NL'];
const config = require('pelias-config').generate();

function filterRecord(data) {
if(config.imports.geonames.countryCode==='ALL' && _.includes(unwantedFcodes,data.country_code)) {
return data.postal_code.length > 4;
}
return true;
}


function create() {
return filter.obj(filterRecord);
}

module.exports = {
filterRecord: filterRecord,
create: create
};
43 changes: 43 additions & 0 deletions lib/streams/peliasPostalDocGenerator.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
var Document = require( 'pelias-model' ).Document;
var logger = require( 'pelias-logger' ).get( 'geonames' );
var through2 = require('through2');
module.exports = {};

module.exports.create = function() {
return through2.obj(function(data,enc,next){
var record;
try{

const country_code = data.country_code;
const postal_code = data.postal_code;


const postal_code_clean =
(postal_code.includes(country_code)?postal_code.substring(3):postal_code).replace(' ','');
const id = `${country_code}${postal_code_clean}`;
const alias = (postal_code.includes(' ')?postal_code.replace(' ',''):null);

record = new Document('geonames','postalcode', id)
.setName('default',(alias===null?postal_code:`${postal_code}, ${alias}`))
.setSource('geonames')
// .setNameAlias('alt',aliases[0])
.setCentroid({
lat:data.latitude,
lon:data.longitude
})
.setPopularity(9000)
.addParent('postalcode',postal_code, id, alias,'geonames');



}catch (e){
logger.warn(
'Failed to create a Document from:', data, 'Exception:', e
);
}
if( record !== undefined ){
this.push( record );
}
next();
});
};
68 changes: 68 additions & 0 deletions lib/tasks/downloadPostal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
const child_process = require('child_process');
const fs = require('fs');

const logger = require('pelias-logger').get('geonames');

// use datapath setting from your config file
const config = require('pelias-config').generate();
const basepath = config.imports.geonames.datapath;
const sourceURL = config.imports.geonames.sourceURL;

module.exports = function (countryCode) {

fs.mkdirSync(basepath, {recursive: true});
fs.mkdirSync(`${basepath}/postal`, {recursive: true});


const urlPrefix = sourceURL || 'http://download.geonames.org/export/zip';
const remoteFilePath = `${urlPrefix}/${countryCode}.zip`;


const localFileName = `${basepath}/postal/${countryCode}.zip`;
logger.info('downloading datafile from:', remoteFilePath);

const command = `curl ${remoteFilePath} > ${localFileName}`;


if (countryCode === 'allCountries') {
const full_countries = [ 'allCountries','CA_full.csv', 'GB_full.csv', 'NL_full.csv'];
const jobs = [];
for (const countryCode of full_countries) {
const localFileName = `${basepath}/postal/${countryCode}.zip`;
const remoteFilePath = `${urlPrefix}/${countryCode}.zip`;

logger.info('downloading datafile from:', remoteFilePath);
const command = `curl ${remoteFilePath} > ${localFileName}`;
jobs.push(child_process.exec(command));

}
jobs.forEach((job)=>{
job.stdout.on('data', (data) => {
process.stdout.write(data);
});

job.stderr.on('data', (data) => {
process.stderr.write(data);
});
job.on('close', (code) => {
process.exitCode = code;
});
});
} else {
let job = child_process.exec(command);
job.stdout.on('data', (data) => {
process.stdout.write(data);
});

job.stderr.on('data', (data) => {
process.stderr.write(data);
});

job.on('close', (code) => {
console.log(`Postal Codes download finished with exit code ${code}`);
process.exitCode = code;
});
}


};
2 changes: 1 addition & 1 deletion lib/tasks/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ module.exports = function( sourceStream, endStream ){
.pipe( blacklistStream() )
.pipe( adminLookupStream.create() )
.pipe( overrideLookedUpLocalityAndLocaladmin.create() )
.pipe(model.createDocumentMapperStream())
.pipe( model.createDocumentMapperStream() )
.pipe( endStream );
};
58 changes: 58 additions & 0 deletions lib/tasks/importPostal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
var dbclient = require('pelias-dbclient');
var unzipper = require('unzipper');
var csv = require('fast-csv');
const through2 = require('through2');
var adminLookupStream = require('pelias-wof-admin-lookup');

var featureCountryFilterPostalStream = require('../streams/featureCountryFilterPostalStream');
var peliasPostalDocGenerator = require('../streams/peliasPostalDocGenerator');
var model = require('pelias-model');
const overrideLookedUpLocalityAndLocaladmin = require('../streams/overrideLookedUpLocalityAndLocaladmin');



var transformJSON = function() {
return through2.obj(function(data,enc,next){
data = {
country_code:data[0],
postal_code:data[1],
place_name:data[2],
admin_name1:data[3],
admin_code1:data[4],
admin_name2:data[5],
admin_code2:data[6],
admin_name3:data[7],
admin_code3:data[8],
latitude:data[9],
longitude:data[10],
accuracy:data[11],
};
next(null,data);
});
};

module.exports = function (sourceStream, endStream) {
endStream = endStream || dbclient({name: 'geonames'});

sourceStream
.pipe(unzipper.ParseOne('^(?!readme).*$'))
.pipe(csv.parse({delimiter:'\t'}))
.on('finish',()=>{
console.log('1 file - done');
})
.pipe(transformJSON())
.pipe(featureCountryFilterPostalStream.create())
.pipe( peliasPostalDocGenerator.create() )
.pipe( adminLookupStream.create() )
.pipe( overrideLookedUpLocalityAndLocaladmin.create() )
.pipe( model.createDocumentMapperStream() )
// .on('data',(data)=>{
// console.log(JSON.stringify(data));
// });
.pipe( endStream );





};
44 changes: 43 additions & 1 deletion lib/tasks/resolvers.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,50 @@ function selectSource(filename) {
return getLocalFileStream(filename) || getRemoteFileStream(filename);
}


function getLocalPostalFileStreams(country) {
if(country==='allCountries') {
const full_countries = [ 'CA_full.csv', 'GB_full.csv', 'NL_full.csv'];
// const full_countries = ['allCountries', 'CA_full.csv', 'GB_full.csv', 'NL_full.csv'];
const postalFileStreams = [];
for(const file of full_countries) {
const localFileName = util.format('%s/postal/%s.zip', basepath, file);

if (fs.existsSync(localFileName)) {
logger.info('reading datafile from disk at:', localFileName);
postalFileStreams.push(fs.createReadStream(localFileName));
} else {
logger.warn(`${localFileName} doesn't exist.`);
}
}
return postalFileStreams;
}
else{
const localFileName = util.format('%s/postal/%s.zip', basepath, country);
if (fs.existsSync(localFileName)) {
logger.info('reading datafile from disk at:', localFileName);
return [fs.createReadStream(localFileName)];
} else {
return undefined;
}
}

}

function getRemotePostalFileStreams(country) {
var remoteFilePath = util.format( 'http://download.geonames.org/export/zip/%s.zip', country );

logger.info( 'streaming datafile from:', remoteFilePath );
return [request.get( remoteFilePath )];
}

function selectPostalSources(country) {
return getLocalPostalFileStreams(country) || getRemotePostalFileStreams(country);
}

module.exports = {
getLocalFileStream: getLocalFileStream,
getRemoteFileStream: getRemoteFileStream,
selectSource: selectSource
selectSource: selectSource,
selectPostalSources:selectPostalSources
};
2 changes: 1 addition & 1 deletion metadata/category_mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -597,4 +597,4 @@
"PNDSI": ["natural:water", "natural"],
"PNDSF": ["natural:water", "natural"],
"MTS": ["natural"]
}
}
2 changes: 1 addition & 1 deletion metadata/popularity_mapping.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"HSTS": 5000,
"RLG": 5000
}
}
6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
"scripts": {
"download_metadata": "mkdirp metadata && node bin/updateMetadata.js",
"download": "./bin/download",
"downloadPostal": "./bin/downloadPostal",
"countryCodes": "node bin/viewCountryCodes.js",
"functional": "./bin/functional",
"import": "./bin/start",
"lint": "jshint .",
"postinstall": "npm run download_metadata",
"start": "./bin/start",
"startPostal": "./bin/startPostal",
"test": "NODE_ENV=test npm run units",
"travis": "npm test && npm run functional",
"units": "./bin/units",
Expand All @@ -40,6 +42,8 @@
"JSONStream": "^1.0.7",
"cli-table": "^0.3.0",
"csv-parse": "^4.8.2",
"etl": "^0.6.12",
"fast-csv": "^4.3.6",
"geonames-stream": "^2.0.3",
"lodash": "^4.17.15",
"mkdirp": "^1.0.0",
Expand All @@ -50,7 +54,7 @@
"pelias-model": "^9.0.0",
"pelias-wof-admin-lookup": "^7.3.0",
"request": "^2.34.0",
"through2": "^3.0.0",
"through2": "^3.0.2",
"through2-filter": "^3.0.0",
"through2-sink": "^1.0.0",
"unzipper": "^0.10.0"
Expand Down