From d2a6924dca5d610ad557aa2bc58475b0ae6c3caf Mon Sep 17 00:00:00 2001 From: buchslava Date: Thu, 12 May 2016 11:00:32 +0300 Subject: [PATCH] feat(rules): WRONG_ENTITY_IS_HEADER, NON_CONCEPT_HEADER and WRONG_ENTITY_IS_VALUE rules Closes #81 #82 #83 #56 --- doc/rules/NON_CONCEPT_HEADER.md | 50 ++++++++ doc/rules/WRONG_ENTITY_IS_HEADER.md | 78 +++++++++++++ doc/rules/WRONG_ENTITY_IS_VALUE.md | 33 ++++++ lib/ddf-definitions/concept.js | 9 ++ lib/ddf-definitions/constants.js | 2 + lib/ddf-definitions/ddf-json-corrector.js | 5 +- lib/ddf-rules/concept-rules.js | 65 +++++++++++ lib/ddf-rules/data-point-rules.js | 12 +- lib/ddf-rules/entity-rules.js | 107 +++++++++++++----- lib/ddf-rules/registry.js | 10 +- test/concept-rules.spec.js | 47 ++++++++ test/entry-rules.spec.js | 82 ++++++++++++-- .../concept-is-not-unique/ddf--concepts.csv | 12 +- .../ddf--concepts.csv | 8 +- .../ddf--concepts.csv | 9 -- .../ddf--entities--geo--country.csv | 2 - .../ddf--concepts--measures.csv | 3 + .../non-concept-header/ddf--concepts.csv | 8 ++ ...df--datapoints--pop--by--country--year.csv | 2 + .../ddf--entities--geo--country.csv | 9 ++ .../wrong-entity-is-header/ddf--concepts.csv | 9 ++ .../ddf--entities--geo--country.csv | 2 + .../wrong-entity-is-value/ddf--concepts.csv | 4 + .../ddf--entities--geo--country.csv | 4 + 24 files changed, 500 insertions(+), 72 deletions(-) create mode 100644 doc/rules/NON_CONCEPT_HEADER.md create mode 100644 doc/rules/WRONG_ENTITY_IS_HEADER.md create mode 100644 doc/rules/WRONG_ENTITY_IS_VALUE.md delete mode 100644 test/fixtures/rules-cases/entity-header-is-no-concept/ddf--concepts.csv delete mode 100644 test/fixtures/rules-cases/entity-header-is-no-concept/ddf--entities--geo--country.csv create mode 100644 test/fixtures/rules-cases/non-concept-header/ddf--concepts--measures.csv create mode 100644 test/fixtures/rules-cases/non-concept-header/ddf--concepts.csv create mode 100644 test/fixtures/rules-cases/non-concept-header/ddf--datapoints--pop--by--country--year.csv create mode 100644 test/fixtures/rules-cases/non-concept-header/ddf--entities--geo--country.csv create mode 100644 test/fixtures/rules-cases/wrong-entity-is-header/ddf--concepts.csv create mode 100644 test/fixtures/rules-cases/wrong-entity-is-header/ddf--entities--geo--country.csv create mode 100644 test/fixtures/rules-cases/wrong-entity-is-value/ddf--concepts.csv create mode 100644 test/fixtures/rules-cases/wrong-entity-is-value/ddf--entities--geo--country.csv diff --git a/doc/rules/NON_CONCEPT_HEADER.md b/doc/rules/NON_CONCEPT_HEADER.md new file mode 100644 index 0000000..3cf6f45 --- /dev/null +++ b/doc/rules/NON_CONCEPT_HEADER.md @@ -0,0 +1,50 @@ +# NON_CONCEPT_HEADER + +## Rule test folder + +`test/fixtures/rules-cases/non-concept-header` + +## Description +Each part of any header should be concept (is-- fields are excluded in this case) + +## Examples of correct data + +ddf--concepts.csv +``` +concept,concept_type,domain,name +name,string,, +geo,entity_domain,, +country,entity_set,geo,Country +pop,measure,geo,Population +year,time,,year +``` + +ddf--datapoints--pop--by--country--year.csv +``` +country,year,pop +vat,1960,100000 +``` + +## Examples of incorrect data + +ddf--concepts.csv +``` +concept,concept_type,domain,name +name,string,, +geo,entity_domain,, +country,entity_set,geo,Country +pop,measure,geo,Population +year,time,,year +``` + +ddf--datapoints--pop--by--country--year.csv +``` +countryFOO,year,pop +vat,1960,100000 +``` + +## Output data format + +Should be included next information: + +incorrect header value diff --git a/doc/rules/WRONG_ENTITY_IS_HEADER.md b/doc/rules/WRONG_ENTITY_IS_HEADER.md new file mode 100644 index 0000000..dab5229 --- /dev/null +++ b/doc/rules/WRONG_ENTITY_IS_HEADER.md @@ -0,0 +1,78 @@ +# WRONG_ENTITY_IS_HEADER + +## Rule test folder + +`test/fixtures/rules-cases/wrong-entity-is-header` + +## Description +An issue according to this rule will be fired when `is-header` in concept is defined and not valid: not a concept with `entity_set` type + +## Examples of correct data + +`ddf--concepts.csv` +``` +"concept","name","concept_type","domain", +"income_groups","Income groups","entity_set","geo", +"geo","Geographic location","entity_domain",, +``` +and +`ddf--entities--geo--income_groups.csv` +``` +income_groups,name,gwid,is--income_groups +high_income,High income,i268,TRUE +lower_middle_income,Lower middle income,i269,TRUE +low_income,Low income,i266,TRUE +upper_middle_income,Upper middle income,i267,TRUE +``` + +## Examples of incorrect data + +`ddf--concepts.csv` +``` +"concept","name","concept_type","domain", +"income_groups","Income groups","entity_set","geo", +"geo","Geographic location","entity_domain",, +``` +and +`ddf--entities--geo--income_groups.csv` +``` +income_groups,name,gwid,is--foo_groups +high_income,High income,i268,TRUE +lower_middle_income,Lower middle income,i269,TRUE +low_income,Low income,i266,TRUE +upper_middle_income,Upper middle income,i267,TRUE +``` + +## Output data format + +* `message` - kind of issue. It should be `Not a concept` or `Wrong concept type` +* `header name` - csv's column name + +### Additional information + +is--header is not mandatory anywhere, absence just means all entities have value false for that is--header. + +only error when: `is--xxx` is used when `xxx` is not defined in concepts as an entity_set. No other case should give an error. + +So the following is also valid (though the `is--country` is nonsensical): +`ddf--concepts.csv` +``` +"concept","name","concept_type","domain", +"income_groups","Income groups","entity_set","geo", +"geo","Geographic location","entity_domain",, +"country","Country","entity_set","geo" +``` +`incomegroups.csv` +``` +income_groups,name,gwid,is--income_groups,is--country +high_income,High income,i268,TRUE,FALSE +lower_middle_income,Lower middle income,i269,TRUE,FALSE +low_income,Low income,i266,TRUE,TRUE +upper_middle_income,Upper middle income,i267,TRUE,TRUE +``` +`ddf--index.csv` +``` +"key","value","file" +"income_groups","name","incomegroups.csv" +"income_groups","gwid","incomegroups.csv" +``` diff --git a/doc/rules/WRONG_ENTITY_IS_VALUE.md b/doc/rules/WRONG_ENTITY_IS_VALUE.md new file mode 100644 index 0000000..05c72de --- /dev/null +++ b/doc/rules/WRONG_ENTITY_IS_VALUE.md @@ -0,0 +1,33 @@ +# WRONG_ENTITY_IS_VALUE` + +## Rule test folder + +`test/fixtures/rules-cases/wrong-entity-is-value` + +## Description +An issue according to this rule will be fired when value under `is-` header doesn't look like boolean + +## Examples of correct data + +`ddf--entities--geo--income_groups.csv` +``` +income_groups,name,gwid,is--income_groups +high_income,High income,i268,TRUE +lower_middle_income,Lower middle income,i269,TRUE +low_income,Low income,i266,TRUE +upper_middle_income,Upper middle income,i267,TRUE +``` + +## Examples of incorrect data + +`ddf--entities--geo--income_groups.csv` +``` +income_groups,name,gwid,is--income_groups +high_income,High income,i268,FOO +``` + +## Output data format + +* `header name` - csv's column name +* `header value` +* `line in csv` diff --git a/lib/ddf-definitions/concept.js b/lib/ddf-definitions/concept.js index 36b7852..79a2be3 100644 --- a/lib/ddf-definitions/concept.js +++ b/lib/ddf-definitions/concept.js @@ -14,6 +14,15 @@ class Concept { return this.collection.find(); } + /*eslint camelcase: ["error", {properties: "never"}]*/ + getDataIdsByType(type) { + return this.collection.find({concept_type: type}).map(record => record.concept); + } + + getRecordByKey(concept) { + return _.head(this.collection.find({concept})); + } + getDataByFiles() { return _.groupBy(this.getAllData(), record => record.$$source); } diff --git a/lib/ddf-definitions/constants.js b/lib/ddf-definitions/constants.js index f26ab48..8d12020 100644 --- a/lib/ddf-definitions/constants.js +++ b/lib/ddf-definitions/constants.js @@ -3,3 +3,5 @@ exports.CONCEPT = Symbol.for('concepts'); exports.ENTITY = Symbol.for('entities'); exports.DATA_POINT = Symbol.for('datapoints'); + +exports.LINE_NUM_INCLUDING_HEADER = 2; diff --git a/lib/ddf-definitions/ddf-json-corrector.js b/lib/ddf-definitions/ddf-json-corrector.js index 6feae99..ba9fb2b 100644 --- a/lib/ddf-definitions/ddf-json-corrector.js +++ b/lib/ddf-definitions/ddf-json-corrector.js @@ -4,12 +4,11 @@ const _ = require('lodash'); const async = require('async'); const json2csv = require('json2csv'); const DdfDataSet = require('./ddf-data-set'); +const constants = require('../ddf-definitions/constants'); const generalDdfRules = require('../ddf-rules/general-rules'); const rulesRegistry = require('../ddf-rules/registry'); const fileUtils = require('../utils/file'); -const LINE_NUM_INCLUDING_HEADER = 2; - function correctFile(data, cb) { fileUtils.readFile(data.file, (err, content) => { if (err) { @@ -17,7 +16,7 @@ function correctFile(data, cb) { } data.warnings.forEach(issue => { - content[issue.data.line - LINE_NUM_INCLUDING_HEADER][issue.data.column] = + content[issue.data.line - constants.LINE_NUM_INCLUDING_HEADER][issue.data.column] = _.first(issue.suggestions); }); diff --git a/lib/ddf-rules/concept-rules.js b/lib/ddf-rules/concept-rules.js index 7baa335..63448ae 100644 --- a/lib/ddf-rules/concept-rules.js +++ b/lib/ddf-rules/concept-rules.js @@ -3,6 +3,61 @@ const _ = require('lodash'); const registry = require('./registry'); const Issue = require('./issue'); +const Levenshtein = require('levenshtein'); +const SUGGEST_TOLERANCE = 3; + +function getDataPointHeaderDetails(ddfDataSet) { + const result = []; + + ddfDataSet.getDataPoint().details.forEach(detail => + detail.fileDescriptor.headers.forEach(header => result.push({header, detail}))); + + return result; +} + +function getEntityHeaderDetails(ddfDataSet) { + const result = []; + + ddfDataSet.getEntity().details.forEach(detail => { + detail.header.forEach(header => { + if (!_.startsWith(header, 'is--')) { + result.push({header, detail}); + } + }); + }); + + return result; +} + +function getHeaderDetailObjects(ddfDataSet) { + return getDataPointHeaderDetails(ddfDataSet) + .concat(getEntityHeaderDetails(ddfDataSet)); +} + +function setNonConceptHeaderIssue(conceptIds, detailObject, result) { + if (conceptIds.indexOf(detailObject.header) < 0) { + const suggestions = _.uniq( + conceptIds + .map(concept => { + const levenshtein = new Levenshtein(concept, detailObject.header); + + return { + concept, + distance: levenshtein.distance + }; + }) + .filter(suggest => suggest.distance < SUGGEST_TOLERANCE) + .map(suggest => suggest.concept) + ); + + const issue = new Issue(registry.NON_CONCEPT_HEADER) + .setPath(detailObject.detail.fileDescriptor.fullPath) + .setData(detailObject.header) + .setSuggestions(suggestions); + + result.push(issue); + } +} module.exports = { [registry.CONCEPT_ID_IS_NOT_UNIQUE]: ddfDataSet => { @@ -28,6 +83,16 @@ module.exports = { .setData(nonUniqueConceptIds); } + return result; + }, + [registry.NON_CONCEPT_HEADER]: ddfDataSet => { + const result = []; + const conceptIds = ddfDataSet.getConcept().getIds(); + + getHeaderDetailObjects(ddfDataSet) + .map(headerDetailObject => + setNonConceptHeaderIssue(conceptIds, headerDetailObject, result)); + return result; } }; diff --git a/lib/ddf-rules/data-point-rules.js b/lib/ddf-rules/data-point-rules.js index d545b20..33f9cce 100644 --- a/lib/ddf-rules/data-point-rules.js +++ b/lib/ddf-rules/data-point-rules.js @@ -4,13 +4,13 @@ const _ = require('lodash'); const ddfTimeUtils = require('ddf-time-utils'); const registry = require('./registry'); const Issue = require('./issue'); -const LINE_NUM_INCLUDING_HEADER = 2; +const constants = require('../ddf-definitions/constants'); function constructEntityCondition(entity) { - const espectedKey = `is--${entity}`; + const expectedKey = `is--${entity}`; return { - [espectedKey]: {$in: ['1', 'TRUE', 'true']} + [expectedKey]: {$in: ['1', 'TRUE', 'true']} }; } @@ -30,7 +30,7 @@ module.exports = { if (isNaN(dataPointRecord[measure])) { const data = { measure, - line: line + LINE_NUM_INCLUDING_HEADER, + line: line + constants.LINE_NUM_INCLUDING_HEADER, value: dataPointRecord[measure] }; const issue = new Issue(registry.DATA_POINT_VALUE_NOT_NUMERIC) @@ -63,7 +63,7 @@ module.exports = { if (!_.includes(entityValueHash[entityKey], dataPointRecord[entityKey])) { const data = { concept: entityKey, - line: line + LINE_NUM_INCLUDING_HEADER, + line: line + constants.LINE_NUM_INCLUDING_HEADER, value: dataPointRecord[entityKey] }; const issue = new Issue(registry.DATA_POINT_UNEXPECTED_ENTITY_VALUE) @@ -89,7 +89,7 @@ module.exports = { if (!ddfTimeUtils.detectTimeType(dataPointRecord[timeKey])) { const data = { concept: timeKey, - line: line + LINE_NUM_INCLUDING_HEADER, + line: line + constants.LINE_NUM_INCLUDING_HEADER, value: dataPointRecord[timeKey] }; const issue = new Issue(registry.DATA_POINT_UNEXPECTED_TIME_VALUE) diff --git a/lib/ddf-rules/entity-rules.js b/lib/ddf-rules/entity-rules.js index 4d61ddc..d2edc5f 100644 --- a/lib/ddf-rules/entity-rules.js +++ b/lib/ddf-rules/entity-rules.js @@ -4,46 +4,99 @@ const _ = require('lodash'); const Levenshtein = require('levenshtein'); const registry = require('./registry'); const Issue = require('./issue'); +const constants = require('../ddf-definitions/constants'); const SUGGEST_TOLERANCE = 5; +const IS_HEADER_PREFIX = 'is--'; module.exports = { - [registry.ENTITY_HEADER_IS_NOT_CONCEPT]: ddfDataSet => { + [registry.WRONG_ENTITY_IS_HEADER]: ddfDataSet => { const result = []; const conceptIds = ddfDataSet.getConcept().getIds(); + const entitySetIds = ddfDataSet.getConcept().getDataIdsByType('entity_set'); + + function getInformationAboutNonConcept(actualHeaderDetail, headerDetail) { + return !_.includes(conceptIds, actualHeaderDetail) ? { + message: 'Not a concept', + headerDetail + } : null; + } + + function getInformationAboutWrongConcept(actualHeaderDetail, headerDetail) { + const conceptRecord = ddfDataSet.getConcept().getRecordByKey(actualHeaderDetail); + + return !conceptRecord || conceptRecord.concept_type !== 'entity_set' ? { + message: 'Wrong concept type', + headerDetail + } : null; + } ddfDataSet.getEntity().details.forEach(detail => { - detail.header.forEach(recordParam => { - let record = recordParam; + detail.header.forEach(headerDetail => { + let actualHeaderDetail = headerDetail; - if (_.includes(recordParam, 'is--')) { - record = recordParam.replace(/^is--/, ''); - } + if (_.startsWith(headerDetail, IS_HEADER_PREFIX)) { + actualHeaderDetail = headerDetail.replace(IS_HEADER_PREFIX, ''); + + const data = getInformationAboutNonConcept(actualHeaderDetail, headerDetail) || + getInformationAboutWrongConcept(actualHeaderDetail, headerDetail); + + if (data) { + const suggestions = + entitySetIds + .map(concept => { + const levenshtein = new Levenshtein(concept, actualHeaderDetail); + + return { + concept, + distance: levenshtein.distance + }; + }) + .filter(suggest => suggest.distance < SUGGEST_TOLERANCE) + .map(suggest => suggest.concept); + const issue = new Issue(registry.WRONG_ENTITY_IS_HEADER) + .setPath(detail.fileDescriptor.fullPath) + .setData(data) + .setSuggestions(suggestions); - if (conceptIds.indexOf(record) < 0) { - const suggestions = _.uniq( - conceptIds - .map(concept => { - const levenshtein = new Levenshtein(concept, record); - - return { - concept, - distance: levenshtein.distance - }; - }) - .filter(suggest => suggest.distance < SUGGEST_TOLERANCE) - .map(suggest => suggest.concept) - ); - - const issue = new Issue(registry.ENTITY_HEADER_IS_NOT_CONCEPT) - .setPath(detail.fileDescriptor.fullPath) - .setData(recordParam) - .setSuggestions(suggestions); - - result.push(issue); + result.push(issue); + } } }); }); + return result; + }, + [registry.WRONG_ENTITY_IS_VALUE]: ddfDataSet => { + const result = []; + const entities = ddfDataSet.getEntity().getDataByFiles(); + const entityFiles = _.keys(entities); + const VALUE_TEMPLATE = ['true', 'false', '0', '1']; + + entityFiles.forEach(entityFile => { + if (!_.isEmpty(entities[entityFile])) { + const expectedKeys = _.keys(_.head(entities[entityFile])) + .filter(entityRecordKey => + _.startsWith(entityRecordKey, IS_HEADER_PREFIX)); + + entities[entityFile].forEach(entityRecord => { + expectedKeys.forEach(key => { + if (!_.includes(VALUE_TEMPLATE, _.lowerCase(entityRecord[key]))) { + const data = { + header: key, + line: entityRecord.$$lineNumber + constants.LINE_NUM_INCLUDING_HEADER, + value: entityRecord[key] + }; + const issue = new Issue(registry.WRONG_ENTITY_IS_VALUE) + .setPath(entityFile) + .setData(data); + + result.push(issue); + } + }); + }); + } + }); + return result; } }; diff --git a/lib/ddf-rules/registry.js b/lib/ddf-rules/registry.js index 8500cc4..3e8aae1 100644 --- a/lib/ddf-rules/registry.js +++ b/lib/ddf-rules/registry.js @@ -6,10 +6,12 @@ exports.INDEX_IS_NOT_FOUND = Symbol.for('INDEX_IS_NOT_FOUND'); exports.INCORRECT_FILE = Symbol.for('INCORRECT_FILE'); exports.INCORRECT_JSON_FIELD = Symbol.for('INCORRECT_JSON_FIELD'); exports.CONCEPT_ID_IS_NOT_UNIQUE = Symbol.for('CONCEPT_ID_IS_NOT_UNIQUE'); -exports.ENTITY_HEADER_IS_NOT_CONCEPT = Symbol.for('ENTITY_HEADER_IS_NOT_CONCEPT'); +exports.NON_CONCEPT_HEADER = Symbol.for('NON_CONCEPT_HEADER'); exports.DATA_POINT_VALUE_NOT_NUMERIC = Symbol.for('DATA_POINT_VALUE_NOT_NUMERIC'); exports.DATA_POINT_UNEXPECTED_ENTITY_VALUE = Symbol.for('DATA_POINT_UNEXPECTED_ENTITY_VALUE'); exports.DATA_POINT_UNEXPECTED_TIME_VALUE = Symbol.for('DATA_POINT_UNEXPECTED_TIME_VALUE'); +exports.WRONG_ENTITY_IS_HEADER = Symbol.for('WRONG_ENTITY_IS_HEADER'); +exports.WRONG_ENTITY_IS_VALUE = Symbol.for('WRONG_ENTITY_IS_VALUE'); exports.descriptions = { [exports.NON_DDF_DATA_SET]: 'This data set is not DDF', @@ -18,8 +20,10 @@ exports.descriptions = { [exports.INCORRECT_FILE]: 'Incorrect file', [exports.INCORRECT_JSON_FIELD]: 'Incorrect JSON field', [exports.CONCEPT_ID_IS_NOT_UNIQUE]: 'Concept Id is not unique', - [exports.ENTITY_HEADER_IS_NOT_CONCEPT]: 'Entity header is not correct', + [exports.NON_CONCEPT_HEADER]: 'Non concept header', [exports.DATA_POINT_VALUE_NOT_NUMERIC]: 'Measure in data point has not numeric type', [exports.DATA_POINT_UNEXPECTED_ENTITY_VALUE]: 'Unexpected entity value in the data point', - [exports.DATA_POINT_UNEXPECTED_TIME_VALUE]: 'Unexpected time value in the data point' + [exports.DATA_POINT_UNEXPECTED_TIME_VALUE]: 'Unexpected time value in the data point', + [exports.WRONG_ENTITY_IS_HEADER]: 'Wrong "is" header', + [exports.WRONG_ENTITY_IS_VALUE]: 'Wrong value for "is" header' }; diff --git a/test/concept-rules.spec.js b/test/concept-rules.spec.js index de5d755..7d13270 100644 --- a/test/concept-rules.spec.js +++ b/test/concept-rules.spec.js @@ -1,4 +1,5 @@ 'use strict'; +const _ = require('lodash'); const chai = require('chai'); const sinonChai = require('sinon-chai'); const expect = chai.expect; @@ -42,4 +43,50 @@ describe('rules for concept', () => { }); }); }); + + describe('when "NON_CONCEPT_HEADER" rule', () => { + afterEach(done => { + ddfDataSet.dismiss(() => { + done(); + }); + }); + + it('any issue should NOT be found for folder without the problem (fixtures/good-folder)', done => { + ddfDataSet = new DdfDataSet('./test/fixtures/good-folder'); + ddfDataSet.load(() => { + expect(conceptRules[rulesRegistry.NON_CONCEPT_HEADER](ddfDataSet).length).to.equal(0); + + done(); + }); + }); + + it(`issues should be found for folder with the problem + (fixtures/rules-cases/non-concept-header)`, done => { + ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/non-concept-header'); + ddfDataSet.load(() => { + const result = conceptRules[rulesRegistry.NON_CONCEPT_HEADER](ddfDataSet); + const issuesData = [ + { + wrongHeaderDetail: 'wrong-header-1', + suggestions: [] + }, + { + wrongHeaderDetail: 'xgeo', + suggestions: ['geo'] + } + ]; + + expect(result).to.be.not.null; + + issuesData.forEach((issueData, index) => { + expect(result[index].type).to.equal(rulesRegistry.NON_CONCEPT_HEADER); + expect(!!result[index].data).to.be.true; + expect(result[index].data).to.equal(issueData.wrongHeaderDetail); + expect(_.head(result[index].suggestions)).to.equal(_.head(issueData.suggestions)); + }); + + done(); + }); + }); + }); }); diff --git a/test/entry-rules.spec.js b/test/entry-rules.spec.js index bcc3076..d7e94fa 100644 --- a/test/entry-rules.spec.js +++ b/test/entry-rules.spec.js @@ -11,7 +11,7 @@ chai.use(sinonChai); describe('rules for entry', () => { let ddfDataSet = null; - describe('when "ENTITY_HEADER_IS_NOT_CONCEPT" rule', () => { + describe('when "WRONG_ENTITY_IS_HEADER" rule', () => { afterEach(done => { ddfDataSet.dismiss(() => { done(); @@ -21,24 +21,82 @@ describe('rules for entry', () => { it('any issue should NOT be found for folder without the problem (fixtures/good-folder)', done => { ddfDataSet = new DdfDataSet('./test/fixtures/good-folder'); ddfDataSet.load(() => { - expect(entryRules[rulesRegistry.ENTITY_HEADER_IS_NOT_CONCEPT](ddfDataSet).length).to.equal(0); + expect(entryRules[rulesRegistry.WRONG_ENTITY_IS_HEADER](ddfDataSet).length).to.equal(0); done(); }); }); it(`issues should be found for folder with the problem - (fixtures/rules-cases/entity-header-is-no-concept)`, done => { - ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/entity-header-is-no-concept'); + (fixtures/rules-cases/wrong-entity-is-header)`, done => { + ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/wrong-entity-is-header'); ddfDataSet.load(() => { - const result = entryRules[rulesRegistry.ENTITY_HEADER_IS_NOT_CONCEPT](ddfDataSet); - const EXPECTED_ERROR_COUNT = 2; - - expect(result.length).to.equal(EXPECTED_ERROR_COUNT); - expect(result[0].type).to.equal(rulesRegistry.ENTITY_HEADER_IS_NOT_CONCEPT); - expect(result[0].data).to.equal('foo'); - expect(result[1].type).to.equal(rulesRegistry.ENTITY_HEADER_IS_NOT_CONCEPT); - expect(result[1].data).to.equal('is--bar'); + const result = entryRules[rulesRegistry.WRONG_ENTITY_IS_HEADER](ddfDataSet); + const issuesData = [ + { + message: 'Not a concept', + headerDetail: 'is--bar' + }, + { + message: 'Wrong concept type', + headerDetail: 'is--geo' + } + ]; + + expect(result.length).to.equal(issuesData.length); + + issuesData.forEach((issueData, index) => { + expect(result[index].type).to.equal(rulesRegistry.WRONG_ENTITY_IS_HEADER); + expect(!!result[index].data).to.be.true; + expect(result[index].data.message).to.equal(issueData.message); + expect(result[index].data.headerDetail).to.equal(issueData.headerDetail); + }); + + done(); + }); + }); + }); + + describe('when "WRONG_ENTITY_IS_VALUE" rule', () => { + afterEach(done => { + ddfDataSet.dismiss(() => { + done(); + }); + }); + + it('any issue should NOT be found for folder without the problem (fixtures/good-folder)', done => { + ddfDataSet = new DdfDataSet('./test/fixtures/good-folder'); + ddfDataSet.load(() => { + expect(entryRules[rulesRegistry.WRONG_ENTITY_IS_VALUE](ddfDataSet).length).to.equal(0); + + done(); + }); + }); + + it(`issues should be found for folder with the problem + (fixtures/rules-cases/wrong-entity-is-value)`, done => { + ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/wrong-entity-is-value'); + ddfDataSet.load(() => { + const result = entryRules[rulesRegistry.WRONG_ENTITY_IS_VALUE](ddfDataSet); + const issuesData = [ + { + header: 'is--country', + value: 'foo' + }, + { + header: 'is--capital', + value: 'bar' + } + ]; + + expect(result.length).to.equal(issuesData.length); + + issuesData.forEach((issueData, index) => { + expect(result[index].type).to.equal(rulesRegistry.WRONG_ENTITY_IS_VALUE); + expect(!!result[index].data).to.be.true; + expect(result[index].data.header).to.equal(issueData.header); + expect(result[index].data.value).to.equal(issueData.value); + }); done(); }); diff --git a/test/fixtures/rules-cases/concept-is-not-unique/ddf--concepts.csv b/test/fixtures/rules-cases/concept-is-not-unique/ddf--concepts.csv index bdb9082..05256b7 100644 --- a/test/fixtures/rules-cases/concept-is-not-unique/ddf--concepts.csv +++ b/test/fixtures/rules-cases/concept-is-not-unique/ddf--concepts.csv @@ -1,9 +1,9 @@ concept,type,domain,name name,string,, -geo,entity domain,, -geo,entity domain,, -region,entity set,geo,Region -country,entity set,geoCountry -country,entity set,geoCountry -capital,entity set,geo,Capital +geo,entity_domain,, +geo,entity_domain,, +region,entity_set,geo,Region +country,entity_set,geoCountry +country,entity_set,geoCountry +capital,entity_set,geo,Capital pop,measure,geo,Population diff --git a/test/fixtures/rules-cases/data-point-value-not-num/ddf--concepts.csv b/test/fixtures/rules-cases/data-point-value-not-num/ddf--concepts.csv index 55158f3..80d2cb0 100644 --- a/test/fixtures/rules-cases/data-point-value-not-num/ddf--concepts.csv +++ b/test/fixtures/rules-cases/data-point-value-not-num/ddf--concepts.csv @@ -1,7 +1,7 @@ concept,type,domain,name name,string,, -geo,entity domain,, -region,entity set,geo,Region -country,entity set,geoCountry -capital,entity set,geo,Capital +geo,entity_domain,, +region,entity_set,geo,Region +country,entity_set,geoCountry +capital,entity_set,geo,Capital pop,measure,geo,Population diff --git a/test/fixtures/rules-cases/entity-header-is-no-concept/ddf--concepts.csv b/test/fixtures/rules-cases/entity-header-is-no-concept/ddf--concepts.csv deleted file mode 100644 index 84edf57..0000000 --- a/test/fixtures/rules-cases/entity-header-is-no-concept/ddf--concepts.csv +++ /dev/null @@ -1,9 +0,0 @@ -concept,type,domain,name -name,string,, -geo,entity domain,, -region,entity set,geo,Region -country,entity set,geoCountry -capital,entity set,geo,Capital -pop,measure,geo,Population -lat,measure,,Latitude -lng,measure,,Longitude diff --git a/test/fixtures/rules-cases/entity-header-is-no-concept/ddf--entities--geo--country.csv b/test/fixtures/rules-cases/entity-header-is-no-concept/ddf--entities--geo--country.csv deleted file mode 100644 index db93716..0000000 --- a/test/fixtures/rules-cases/entity-header-is-no-concept/ddf--entities--geo--country.csv +++ /dev/null @@ -1,2 +0,0 @@ -geo,foo,lat,lng,is--bar,is--country,is--capital -and,Andorra,,,0,1,0 diff --git a/test/fixtures/rules-cases/non-concept-header/ddf--concepts--measures.csv b/test/fixtures/rules-cases/non-concept-header/ddf--concepts--measures.csv new file mode 100644 index 0000000..718d454 --- /dev/null +++ b/test/fixtures/rules-cases/non-concept-header/ddf--concepts--measures.csv @@ -0,0 +1,3 @@ +concept,type,domain,name +lat,measure,,Latitude +lng,measure,,Longitude diff --git a/test/fixtures/rules-cases/non-concept-header/ddf--concepts.csv b/test/fixtures/rules-cases/non-concept-header/ddf--concepts.csv new file mode 100644 index 0000000..180c032 --- /dev/null +++ b/test/fixtures/rules-cases/non-concept-header/ddf--concepts.csv @@ -0,0 +1,8 @@ +concept,concept_type,domain,name +name,string,, +geo,entity_domain,, +region,entity_set,geo,Region +country,entity_set,geo,Country +capital,entity_set,geo,Capital +pop,measure,geo,Population +year,time,,year diff --git a/test/fixtures/rules-cases/non-concept-header/ddf--datapoints--pop--by--country--year.csv b/test/fixtures/rules-cases/non-concept-header/ddf--datapoints--pop--by--country--year.csv new file mode 100644 index 0000000..bf73b6c --- /dev/null +++ b/test/fixtures/rules-cases/non-concept-header/ddf--datapoints--pop--by--country--year.csv @@ -0,0 +1,2 @@ +wrong-header-1,year,pop +vat,1960,100000 diff --git a/test/fixtures/rules-cases/non-concept-header/ddf--entities--geo--country.csv b/test/fixtures/rules-cases/non-concept-header/ddf--entities--geo--country.csv new file mode 100644 index 0000000..d163618 --- /dev/null +++ b/test/fixtures/rules-cases/non-concept-header/ddf--entities--geo--country.csv @@ -0,0 +1,9 @@ +xgeo,name,lat,lng,is--region,is--country,is--capital +and,Andorra,,,0,1,0 +afg,Afghanistan,,,0,1,0 +dza,Algeria,,,0,1,0 +africa,Africa,,,1,0,0 +europe,Europe,,,1,0,0 +americas,Americas,,,1,0,0 +asia,Asia,,,1,0,0 +vat,Vatican,,,0,1,1 diff --git a/test/fixtures/rules-cases/wrong-entity-is-header/ddf--concepts.csv b/test/fixtures/rules-cases/wrong-entity-is-header/ddf--concepts.csv new file mode 100644 index 0000000..8f8a105 --- /dev/null +++ b/test/fixtures/rules-cases/wrong-entity-is-header/ddf--concepts.csv @@ -0,0 +1,9 @@ +concept,concept_type,domain,name +name,string,, +geo,entity_domain,, +region,entity_set,geo,Region +country,entity_set,geoCountry +capital,entity_set,geo,Capital +pop,measure,geo,Population +lat,measure,,Latitude +lng,measure,,Longitude diff --git a/test/fixtures/rules-cases/wrong-entity-is-header/ddf--entities--geo--country.csv b/test/fixtures/rules-cases/wrong-entity-is-header/ddf--entities--geo--country.csv new file mode 100644 index 0000000..3213319 --- /dev/null +++ b/test/fixtures/rules-cases/wrong-entity-is-header/ddf--entities--geo--country.csv @@ -0,0 +1,2 @@ +geo,name,lat,lng,is--bar,is--country,is--geo +and,Andorra,,,0,1,0 diff --git a/test/fixtures/rules-cases/wrong-entity-is-value/ddf--concepts.csv b/test/fixtures/rules-cases/wrong-entity-is-value/ddf--concepts.csv new file mode 100644 index 0000000..8878bfb --- /dev/null +++ b/test/fixtures/rules-cases/wrong-entity-is-value/ddf--concepts.csv @@ -0,0 +1,4 @@ +concept,concept_type,domain,name +name,string,, +geo,entity_domain,, +country,entity_set,geo,Country diff --git a/test/fixtures/rules-cases/wrong-entity-is-value/ddf--entities--geo--country.csv b/test/fixtures/rules-cases/wrong-entity-is-value/ddf--entities--geo--country.csv new file mode 100644 index 0000000..a20ec2a --- /dev/null +++ b/test/fixtures/rules-cases/wrong-entity-is-value/ddf--entities--geo--country.csv @@ -0,0 +1,4 @@ +geo,name,lat,lng,is--region,is--country,is--capital +and,Andorra,,,0,TRUE,0 +afg,Afghanistan,,,0,foo,0 +dza,Algeria,,,0,TRUE,bar