Skip to content

Commit

Permalink
feat(data point): data point validation skeleton
Browse files Browse the repository at this point in the history
Closes #26
  • Loading branch information
buchslava committed Apr 4, 2016
1 parent dac7be0 commit ab24c4a
Show file tree
Hide file tree
Showing 13 changed files with 233 additions and 13 deletions.
44 changes: 38 additions & 6 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
'use strict';

const path = require('path');
const lodash = require('lodash');
const async = require('async');
const _ = require('lodash');
const utils = require('./lib/utils');
const fileUtils = require('./lib/utils/file');
const DdfIndexGenerator = require('./lib/ddf-definitions/ddf-index-generator');
const DdfData = require('./lib/ddf-definitions/ddf-data');
const ddfRules = require('./lib/ddf-rules');
const ddfDataPointRules = require('./lib/ddf-rules/data-point-rules');
const logger = utils.logger;

if (utils.settings.isIndexGenerationMode === true) {
Expand Down Expand Up @@ -35,27 +37,57 @@ if (utils.settings.isIndexGenerationMode === true) {

if (utils.settings.isIndexGenerationMode === false) {
const ddfData = new DdfData(utils.ddfRootFolder);
const out = [];

let out = [];

ddfData.load(() => {
ddfRules.forEach(ruleSet => {
Object.getOwnPropertySymbols(ruleSet).forEach(key => {
const result = ruleSet[key](ddfData);

if (!lodash.isArray(result) && !lodash.isEmpty(result)) {
if (!_.isArray(result) && !_.isEmpty(result)) {
out.push(result.view());
}

if (lodash.isArray(result) && !lodash.isEmpty(result)) {
if (_.isArray(result) && !_.isEmpty(result)) {
result.forEach(resultRecord => {
out.push(resultRecord.view());
});
}
});
});

logger.notice(JSON.stringify(out));
function prepareDataPointProcessor(detail) {
return cb => {
ddfData.getDataPoint().loadDetail(detail, () => {
Object.getOwnPropertySymbols(ddfDataPointRules).forEach(key => {
const result = ddfDataPointRules[key](ddfData, detail);

if (!_.isEmpty(result)) {
out = out.concat(result);
}
});

ddfData.getDataPoint().removeAllData();
cb();
});
};
}

const dataPointActions = [];

ddfData.getDataPoint().details.forEach(detail => {
dataPointActions.push(prepareDataPointProcessor(detail));
});

async.waterfall(dataPointActions, err => {
if (err) {
throw err;
}

logger.notice(JSON.stringify(out));

ddfData.dismiss();
ddfData.dismiss();
});
});
}
8 changes: 8 additions & 0 deletions lib/data/file-descriptor.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ class FileDescriptor {
});
}

is(type) {
if (!_.isArray(type)) {
return type === this.type;
}

return _.includes(type, this.type);
}

check(cb) {
async.parallel(getIssueCases(this), (err, results) => {
if (err) {
Expand Down
27 changes: 22 additions & 5 deletions lib/ddf-definitions/data-point.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,35 @@
'use strict';

const fileUtils = require('../utils/file');

class DataPoint {
constructor(db, fileDescriptor) {
this.db = db;
this.fileDescriptor = fileDescriptor;
constructor() {
this.details = [];
this.header = [];
this.content = [];
}

getAllData() {
return this.collection.find();
}

addDetail(fileDescriptor, header) {
this.details.push({fileDescriptor, header});
removeAllData() {
this.content = [];
}

addDetail(fileDescriptor) {
this.details.push({fileDescriptor, header: this.header});
}

loadDetail(detail, cb) {
fileUtils.readFile(detail.fileDescriptor.fullPath, (err, content) => {
if (err) {
throw err;
}

this.content = content;
cb();
});
}
}

Expand Down
12 changes: 11 additions & 1 deletion lib/ddf-definitions/ddf-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,16 @@ class DdfData {
function processDirectoryDescriptor(directoriesDescriptor) {
directoriesDescriptor.fileDescriptors
.forEach(fileDescriptor => {
if (fileDescriptor.type) {
if (fileDescriptor.is(constants.DATA_POINT)) {
loaders.push(_cb => {
fileDescriptor.fillHeaders(() => {
that.expectedClass[fileDescriptor.type].addDetail(fileDescriptor);
_cb();
});
});
}

if (fileDescriptor.is([constants.CONCEPT, constants.ENTITY])) {
loaders.push(_cb => {
that.db
.fillCollection(
Expand Down Expand Up @@ -59,6 +68,7 @@ class DdfData {
});
}

// todo: provide default value when it will be supported
dismiss(_cb) {
let cb = _cb;

Expand Down
30 changes: 30 additions & 0 deletions lib/ddf-rules/data-point-rules.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
'use strict';

const registry = require('./registry');
const Issue = require('./issue');
const LINE_NUM_INCLUDING_HEADER = 2;

module.exports = {
[registry.DATA_POINT_VALUE_NOT_NUMERIC]: (ddfData, dataPointDetail) => {
const result = [];

dataPointDetail.fileDescriptor.details.measures.forEach(measure => {
ddfData.getDataPoint().content.forEach((dataPointRecord, line) => {
if (isNaN(dataPointRecord[measure]) === true) {
result.push(
new Issue(
registry.DATA_POINT_VALUE_NOT_NUMERIC,
dataPointDetail.fileDescriptor.fullPath,
{
measure,
line: line + LINE_NUM_INCLUDING_HEADER,
value: dataPointRecord[measure]
})
);
}
});
});

return result;
}
};
4 changes: 3 additions & 1 deletion lib/ddf-rules/registry.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ exports.INDEX_IS_NOT_FOUND = Symbol.for('INDEX_IS_NOT_FOUND');
exports.INCORRECT_FILE = Symbol.for('INCORRECT_FILE');
exports.CONCEPT_ID_IS_NOT_UNIQUE = Symbol.for('CONCEPT_ID_IS_NOT_UNIQUE');
exports.ENTITY_HEADER_IS_NOT_CONCEPT = Symbol.for('ENTITY_HEADER_IS_NOT_CONCEPT');
exports.DATA_POINT_VALUE_NOT_NUMERIC = Symbol.for('DATA_POINT_VALUE_NOT_NUMERIC');

exports.descriptions = {
[exports.INDEX_IS_NOT_FOUND]: 'Index is not found',
[exports.INCORRECT_FILE]: 'Incorrect file',
[exports.CONCEPT_ID_IS_NOT_UNIQUE]: 'Concept Id is not unique',
[exports.ENTITY_HEADER_IS_NOT_CONCEPT]: 'Entity header is not correct'
[exports.ENTITY_HEADER_IS_NOT_CONCEPT]: 'Entity header is not correct',
[exports.DATA_POINT_VALUE_NOT_NUMERIC]: 'Measure in data point has not numeric type'
};
36 changes: 36 additions & 0 deletions lib/utils/file.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
'use strict';
const path = require('path');
const fs = require('fs');
const csv = require('csv-stream');
const CSV_OPTIONS = {
escapeChar: '"',
enclosedChar: '"'
};

/* eslint-disable */
const homeFolder = process.platform === 'win32' ? process.env.USERPROFILE : process.env.HOME;
/* eslint-disable */
Expand Down Expand Up @@ -105,8 +111,38 @@ function getFileLine(filename, lineNo, callback) {
});
}

function readFile(filePath, cb) {
const csvStream = csv.createStream(CSV_OPTIONS);
const fileStream = fs.createReadStream(filePath);
const content = [];

let ddfRecord = {};

fileStream.on('error', err => cb(err));
fileStream.on('readable', () => {
fileStream
.pipe(csvStream)
.on('error', err => {
cb(err);
})
.on('data', () => {
content.push(ddfRecord);
ddfRecord = {};
})
.on('column', (key, value) => {
if (value) {
ddfRecord[key] = value;
}
});
});
fileStream.on('end', () => {
cb(null, content);
});
}

exports.norm = norm;
exports.walkDir = walk;
exports.readDir = read;
exports.writeFile = writeFile;
exports.getFileLine = getFileLine;
exports.readFile = readFile;
62 changes: 62 additions & 0 deletions test/ddf-data-point-rules.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
'use strict';
const chai = require('chai');
const sinonChai = require('sinon-chai');
const DdfData = require('../lib/ddf-definitions/ddf-data');
const rulesRegistry = require('../lib/ddf-rules/registry');
const dataPointsRules = require('../lib/ddf-rules/data-point-rules');
const expect = chai.expect;

chai.use(sinonChai);

describe('rules for data points', () => {
let ddfData = null;

describe('when "DATA_POINT_VALUE_NOT_NUMERIC" rule', () => {
afterEach(done => {
ddfData.dismiss(() => {
done();
});
});

it('any issue should NOT be found for folder without the problem (fixtures/good-folder)', done => {
ddfData = new DdfData('./test/fixtures/good-folder');
ddfData.load(() => {
const dataPointValueNotNumRule = dataPointsRules[rulesRegistry.DATA_POINT_VALUE_NOT_NUMERIC];
const expectedDataPointDetail = ddfData.getDataPoint().details[0];

ddfData.getDataPoint().loadDetail(expectedDataPointDetail, () => {
expect(dataPointValueNotNumRule(ddfData, expectedDataPointDetail).length).to.equal(0);

done();
});
});
});

it(`an issue should be found for folder with the problem
(fixtures/rules-cases/data-point-value-not-num)`, done => {
ddfData = new DdfData('./test/fixtures/rules-cases/data-point-value-not-num');
ddfData.load(() => {
const dataPointValueNotNumRule = dataPointsRules[rulesRegistry.DATA_POINT_VALUE_NOT_NUMERIC];
const expectedDataPointDetail = ddfData.getDataPoint().details[0];
const expectedFileName = 'ddf--datapoints--pop--by--country--year.csv';
const expectedMeasure = 'pop';
const expectedLine = 2;
const expectedValue = 'huge';

ddfData.getDataPoint().loadDetail(expectedDataPointDetail, () => {
const issues = dataPointValueNotNumRule(ddfData, expectedDataPointDetail);

expect(issues.length).to.equal(1);
expect(issues[0].type).to.equal(rulesRegistry.DATA_POINT_VALUE_NOT_NUMERIC);
expect(issues[0].path.endsWith(expectedFileName)).to.be.true;
expect(!!issues[0].data).to.be.true;
expect(issues[0].data.measure).to.equal(expectedMeasure);
expect(issues[0].data.line).to.equal(expectedLine);
expect(issues[0].data.value).to.equal(expectedValue);

done();
});
});
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
country,year,pop
usa,1960,100000
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
concept,type,domain,name
lat,measure,,Latitude
lng,measure,,Longitude
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
concept,type,domain,name
name,string,,
geo,entity domain,,
region,entity set,geo,Region
country,entity set,geoCountry
capital,entity set,geo,Capital
pop,measure,geo,Population
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
country,year,pop
usa,1960,huge
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
geo,name,lat,lng,is--region,is--country,is--capital
and,Andorra,,,0,1,0
afg,Afghanistan,,,0,1,0
dza,Algeria,,,0,1,0
africa,Africa,,,1,0,0
europe,Europe,,,1,0,0
americas,Americas,,,1,0,0
asia,Asia,,,1,0,0
vat,Vatican,,,0,1,1

0 comments on commit ab24c4a

Please sign in to comment.