Skip to content

Commit 76c0659

Browse files
committed
feat(rules): Two new datapackage validation rules
Closes #494
1 parent d7bd477 commit 76c0659

File tree

45 files changed

+1860
-76
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1860
-76
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"test-travis": "istanbul cover mocha _mocha -- -R spec --timeout 200000 --compilers ts:ts-node/register --recursive test/**/*.spec.ts && codecov",
1313
"changelog": "conventional-changelog -i CHANGELOG.md -s -p angular",
1414
"github-release": "conventional-github-releaser -p angular",
15-
"build": "tsc && touch lib/package.json && echo \\{\\\"version\\\": \\\"1.15.2\\\"\\} > lib/package.json",
15+
"build": "tsc && touch lib/package.json && echo \\{\\\"version\\\": \\\"1.16.0\\\"\\} > lib/package.json",
1616
"prepublish": "npm run build",
1717
"preversion": "npm test",
1818
"version": "npm run changelog && git add CHANGELOG.md",
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { isArray, includes, compact } from 'lodash';
2+
import { DdfDataSet } from '../../ddf-definitions/ddf-data-set';
3+
import { Issue } from '../issue';
4+
import { DATAPACKAGE_NONEXISTENT_CONCEPT } from '../registry';
5+
import { DATA_PACKAGE_FILE } from '../../data/data-package';
6+
import * as path from 'path';
7+
import { DDFRoot } from '../../data/ddf-root';
8+
9+
const toArray = value => isArray(value) ? value : [value];
10+
const fillConceptsSetBySchema = (dataPackageSchema, conceptsSet: Set<string>) => {
11+
[
12+
...dataPackageSchema.concepts,
13+
...dataPackageSchema.entities,
14+
...dataPackageSchema.datapoints
15+
].forEach(resource => {
16+
for (const pk of toArray(resource.primaryKey)) {
17+
conceptsSet.add(pk);
18+
}
19+
20+
conceptsSet.add(resource.value);
21+
});
22+
};
23+
const fillResources = (ddfRoot: DDFRoot, conceptsSet: Set<string>) => {
24+
ddfRoot.getDataPackageResources().forEach(resource => {
25+
for (const pk of toArray(resource.schema.primaryKey)) {
26+
conceptsSet.add(pk);
27+
}
28+
29+
for (const field of resource.schema.fields) {
30+
conceptsSet.add(field.name);
31+
}
32+
});
33+
};
34+
35+
export const rule = {
36+
rule: (ddfDataSet: DdfDataSet) => {
37+
const ddfRoot = ddfDataSet.ddfRoot;
38+
const dataPackagePath = path.resolve(ddfRoot.dataPackageDescriptor.rootFolder, DATA_PACKAGE_FILE);
39+
const conceptsSet = new Set<string>();
40+
const dataPackageSchema = ddfRoot.getDataPackageSchema();
41+
42+
if (dataPackageSchema) {
43+
fillConceptsSetBySchema(dataPackageSchema, conceptsSet);
44+
}
45+
46+
fillResources(ddfRoot, conceptsSet);
47+
48+
const originalConcepts = ddfDataSet.getConcept().getAllData().map(record => record.concept);
49+
50+
return compact(Array.from(conceptsSet.values()))
51+
.map(concept => concept.replace(/^is--/, ''))
52+
.filter(concept => concept !== 'concept' && concept !== 'concept_type' && !includes(originalConcepts, concept))
53+
.map(concept => new Issue(DATAPACKAGE_NONEXISTENT_CONCEPT).setPath(dataPackagePath).setData(concept))
54+
}
55+
};
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import * as path from 'path';
2+
import { isEmpty, compact } from 'lodash';
3+
import { DdfDataSet } from '../../ddf-definitions/ddf-data-set';
4+
import { Issue } from '../issue';
5+
import { DATAPACKAGE_NONEXISTENT_RESOURCE } from '../registry';
6+
import { DATA_PACKAGE_FILE } from '../../data/data-package';
7+
import { DDFRoot } from '../../data/ddf-root';
8+
9+
const getNonexistentResourcesIssues = (
10+
ddfRoot: DDFRoot,
11+
dataPackagePath: string,
12+
resourcesMap: Map<string, number>): Issue[] => {
13+
if (!ddfRoot.getDataPackageSchema()) {
14+
return [];
15+
}
16+
17+
return compact([
18+
...ddfRoot.getDataPackageSchema().concepts,
19+
...ddfRoot.getDataPackageSchema().entities,
20+
...ddfRoot.getDataPackageSchema().datapoints
21+
].map(record => {
22+
const nonexistentResources = record.resources.filter(resource => !resourcesMap.has(resource));
23+
24+
if (!isEmpty(nonexistentResources)) {
25+
return new Issue(DATAPACKAGE_NONEXISTENT_RESOURCE)
26+
.setPath(dataPackagePath)
27+
.setData({
28+
nonexistentResources, record,
29+
specific: 'is NOT found in resources, but found in schema section'
30+
});
31+
}
32+
33+
return null;
34+
}));
35+
};
36+
37+
const fillResourceMapCounters = (ddfRoot: DDFRoot, resourcesMap: Map<string, number>) => {
38+
if (!ddfRoot.getDataPackageSchema()) {
39+
return [];
40+
}
41+
42+
[
43+
...ddfRoot.getDataPackageSchema().concepts,
44+
...ddfRoot.getDataPackageSchema().entities,
45+
...ddfRoot.getDataPackageSchema().datapoints
46+
].forEach(record => {
47+
record.resources.forEach(resource => {
48+
if (resourcesMap.has(resource)) {
49+
resourcesMap.set(resource, resourcesMap.get(resource) + 1);
50+
}
51+
52+
return resource;
53+
})
54+
});
55+
};
56+
57+
const getNonexistentSchemaResourcesIssues = (dataPackagePath: string, resourcesMap: Map<string, number>): Issue[] =>
58+
Array.from(resourcesMap.keys())
59+
.filter(resource => resourcesMap.get(resource) === 0)
60+
.map(resource => new Issue(DATAPACKAGE_NONEXISTENT_RESOURCE)
61+
.setPath(dataPackagePath)
62+
.setData({resource, specific: 'is NOT found in ddfSchema schema, but found in resources section'}));
63+
64+
export const rule = {
65+
rule: (ddfDataSet: DdfDataSet) => {
66+
const ddfRoot = ddfDataSet.ddfRoot;
67+
const dataPackagePath = path.resolve(ddfRoot.dataPackageDescriptor.rootFolder, DATA_PACKAGE_FILE);
68+
const resourcesMap = ddfRoot.getDataPackageResources()
69+
.map(resource => resource.name)
70+
.reduce((mapValue, resourceName) => {
71+
mapValue.set(resourceName, 0);
72+
73+
return mapValue;
74+
}, new Map<string, number>());
75+
76+
fillResourceMapCounters(ddfRoot, resourcesMap);
77+
78+
return [
79+
...getNonexistentResourcesIssues(ddfRoot, dataPackagePath, resourcesMap),
80+
...getNonexistentSchemaResourcesIssues(dataPackagePath, resourcesMap)
81+
];
82+
}
83+
};

src/ddf-rules/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ import { rule as nonUniqueResourceName } from './data-package-rules/non-unique-r
1616
import { rule as nonUniqueResourceFile } from './data-package-rules/non-unique-resource-file';
1717
import { rule as dataPointWithoutIndicator } from './data-package-rules/datapoint-without-indicator';
1818
import { rule as sameKeyValueConcept } from './data-package-rules/same-key-value-concept';
19+
import { rule as nonexistentResource } from './data-package-rules/nonexistent-resource';
20+
import { rule as nonexistentConcept } from './data-package-rules/nonexistent-concept';
1921

2022
// import { rule as measureValueNotNumeric } from './data-point-rules/measure-value-not-numeric';
2123
import { rule as unexpectedEntityValue } from './data-point-rules/unexpected-entity-value';
@@ -62,6 +64,8 @@ export const allRules = {
6264
[registry.DATA_POINT_UNEXPECTED_ENTITY_VALUE]: unexpectedEntityValue,
6365
[registry.DATA_POINT_UNEXPECTED_TIME_VALUE]: unexpectedTimeValue,
6466
[registry.SAME_KEY_VALUE_CONCEPT]: sameKeyValueConcept,
67+
[registry.DATAPACKAGE_NONEXISTENT_RESOURCE]: nonexistentResource,
68+
[registry.DATAPACKAGE_NONEXISTENT_CONCEPT]: nonexistentConcept,
6569
[registry.WRONG_ENTITY_IS_HEADER]: wrongEntityIsHeader,
6670
[registry.WRONG_ENTITY_IS_VALUE]: wrongEntityIsValue,
6771
[registry.NON_UNIQUE_ENTITY_VALUE]: nonUniqueEntityValue,

src/ddf-rules/registry.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ export const DATAPACKAGE_NON_CONCEPT_FIELD = Symbol.for('DATAPACKAGE_NON_CONCEPT
2424
export const DATAPACKAGE_INCORRECT_PRIMARY_KEY = Symbol.for('DATAPACKAGE_INCORRECT_PRIMARY_KEY');
2525
export const DATAPACKAGE_NON_UNIQUE_RESOURCE_NAME = Symbol.for('DATAPACKAGE_NON_UNIQUE_RESOURCE_NAME');
2626
export const DATAPACKAGE_NON_UNIQUE_RESOURCE_FILE = Symbol.for('DATAPACKAGE_NON_UNIQUE_RESOURCE_FILE');
27+
export const DATAPACKAGE_NONEXISTENT_RESOURCE = Symbol.for('DATAPACKAGE_NONEXISTENT_RESOURCE');
28+
export const DATAPACKAGE_NONEXISTENT_CONCEPT = Symbol.for('DATAPACKAGE_NONEXISTENT_CONCEPT');
2729
export const UNEXPECTED_TRANSLATION_HEADER = Symbol.for('UNEXPECTED_TRANSLATION_HEADER');
2830
export const UNEXPECTED_TRANSLATIONS_DATA = Symbol.for('UNEXPECTED_TRANSLATIONS_DATA');
2931
export const UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA = Symbol.for('UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA');
@@ -63,6 +65,8 @@ export const tags: any = {
6365
[DATA_POINT_UNEXPECTED_TIME_VALUE]: [DATAPOINT_TAG],
6466
[WRONG_DATA_POINT_HEADER]: [DATAPOINT_TAG],
6567
[SAME_KEY_VALUE_CONCEPT]: [DATAPACKAGE_TAG],
68+
[DATAPACKAGE_NONEXISTENT_RESOURCE]: [DATAPACKAGE_TAG],
69+
[DATAPACKAGE_NONEXISTENT_CONCEPT]: [DATAPACKAGE_TAG],
6670
[WRONG_ENTITY_IS_HEADER]: [],
6771
[WRONG_ENTITY_IS_VALUE]: [],
6872
[NON_UNIQUE_ENTITY_VALUE]: [],
@@ -113,6 +117,8 @@ export const descriptions = {
113117
[DATAPACKAGE_INCORRECT_PRIMARY_KEY]: 'Datapackage: Fields section does not contain primary key.',
114118
[DATAPACKAGE_NON_UNIQUE_RESOURCE_NAME]: 'Datapackage: Non-unique resource name found in datapackage.json.',
115119
[DATAPACKAGE_NON_UNIQUE_RESOURCE_FILE]: 'Datapackage: Non-unique resource file found in datapackage.json.',
120+
[DATAPACKAGE_NONEXISTENT_RESOURCE]: 'Resource found in datapackage that is not present in folder',
121+
[DATAPACKAGE_NONEXISTENT_CONCEPT]: 'Concept found in datapackage but is not listed in concepts table',
116122
[UNEXPECTED_TRANSLATION_HEADER]: 'Translations: Unexpected header in translation files',
117123
[UNEXPECTED_TRANSLATIONS_DATA]: 'Translations: Unexpected translations data: primary key is not consistent.',
118124
[UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA]: 'Translations: Unexpected translations datapoint data: primary key is not consistent.',
@@ -153,8 +159,10 @@ export const howToFix = {
153159
[DATAPACKAGE_INCORRECT_PRIMARY_KEY]: 'Regenerate or update datapackage as described here: https://github.com/Gapminder/ddf-validation#datapackage',
154160
[DATAPACKAGE_NON_UNIQUE_RESOURCE_NAME]: 'Regenerate or update datapackage as described here: https://github.com/Gapminder/ddf-validation#datapackage',
155161
[DATAPACKAGE_NON_UNIQUE_RESOURCE_FILE]: 'Regenerate or update datapackage as described here: https://github.com/Gapminder/ddf-validation#datapackage',
156-
[UNEXPECTED_TRANSLATION_HEADER]: '',
157-
[UNEXPECTED_TRANSLATIONS_DATA]: '',
162+
[DATAPACKAGE_NONEXISTENT_RESOURCE]: 'Resource found in datapackage that is not present in folder',
163+
[DATAPACKAGE_NONEXISTENT_CONCEPT]: 'Concept found in datapackage but is not listed in concepts table',
164+
[UNEXPECTED_TRANSLATION_HEADER]: 'Remove the resource from datapackage or restore it in the folder',
165+
[UNEXPECTED_TRANSLATIONS_DATA]: 'Remove the resource from datapackage or add it to the concepts table',
158166
[UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA]: '',
159167
[DUPLICATED_DATA_POINT_TRANSLATION_KEY]: '',
160168
[DUPLICATED_TRANSLATION_KEY]: '',

test/api.spec.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ describe('api', () => {
215215
describe('and DDF dataset is correct', () => {
216216
it('should dataset is correct', done => {
217217
const issues = [];
218-
const streamValidator = new StreamValidator('./test/fixtures/good-folder', {});
218+
const streamValidator = new StreamValidator('./test/fixtures/good-folder-dp', {});
219219

220220
streamValidator.on('issue', issue => {
221221
issues.push(issue);
@@ -233,7 +233,7 @@ describe('api', () => {
233233

234234
it('should custom settings be processed correctly (excludeDirs as string)', done => {
235235
const issues = [];
236-
const streamValidator = new StreamValidator('./test/fixtures/good-folder', {
236+
const streamValidator = new StreamValidator('./test/fixtures/good-folder-dp', {
237237
excludeRules: 'WRONG_DATA_POINT_HEADER',
238238
excludeDirs: '.gitingore, .git',
239239
isCheckHidden: true
@@ -266,7 +266,7 @@ describe('api', () => {
266266
excludeDirs: ['.gitingore', '.git'],
267267
isCheckHidden: true
268268
};
269-
const streamValidator = new StreamValidator('./test/fixtures/good-folder', EXPECTED_SETTINGS);
269+
const streamValidator = new StreamValidator('./test/fixtures/good-folder-dp', EXPECTED_SETTINGS);
270270

271271
streamValidator.on('issue', issue => {
272272
issues.push(issue);
@@ -320,7 +320,7 @@ describe('api', () => {
320320
const _StreamValidator = require('../lib/index').StreamValidator;
321321

322322
it('should result for generic and multi thread modes be same ', done => {
323-
const EXPECTED_ISSUES_COUNT = 4;
323+
const EXPECTED_ISSUES_COUNT = 15;
324324
const DATA_SET_PATH = './test/fixtures/rules-cases/data-point-constraint-violation';
325325

326326
parallel({

test/data-package-rules.spec.ts

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ import {
88
DATAPACKAGE_NON_UNIQUE_RESOURCE_FILE,
99
DATAPACKAGE_NON_UNIQUE_RESOURCE_NAME,
1010
DATA_POINT_WITHOUT_INDICATOR,
11-
SAME_KEY_VALUE_CONCEPT
11+
SAME_KEY_VALUE_CONCEPT,
12+
DATAPACKAGE_NONEXISTENT_RESOURCE, DATAPACKAGE_NONEXISTENT_CONCEPT
1213
} from '../src/ddf-rules/registry';
1314
import { DdfDataSet } from '../src/ddf-definitions/ddf-data-set';
1415
import { Issue } from '../src/ddf-rules/issue';
@@ -368,4 +369,118 @@ describe('ddf datapackage.json validation', () => {
368369
});
369370
});
370371
});
372+
373+
describe('when "DATAPACKAGE_NONEXISTENT_RESOURCE" rule', () => {
374+
it('any issue should NOT be found for a folder without the problem', done => {
375+
const ddfDataSet = new DdfDataSet('./test/fixtures/dummy-companies-with-dp', null);
376+
377+
ddfDataSet.load(() => {
378+
const issues = allRules[DATAPACKAGE_NONEXISTENT_RESOURCE].rule(ddfDataSet);
379+
380+
expect(issues.length).to.equal(0);
381+
382+
done();
383+
});
384+
});
385+
386+
it('an issue should be found for a folder with the problem', done => {
387+
const ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/dp-nonexistent-resource', null);
388+
const expectedData = [
389+
{
390+
nonexistentResources: [
391+
'company_size_string--by--company--anno-2'
392+
],
393+
record: {
394+
primaryKey: [
395+
'anno',
396+
'company'
397+
],
398+
value: 'company_size_string',
399+
resources: [
400+
'company_size_string--by--company--anno-2'
401+
]
402+
},
403+
specific: 'is NOT found in resources, but found in schema section'
404+
},
405+
{
406+
nonexistentResources: [
407+
'company_size_string--by--company--anno-2'
408+
],
409+
record: {
410+
primaryKey: [
411+
'anno',
412+
'english_speaking_company'
413+
],
414+
value: 'company_size_string',
415+
resources: [
416+
'company_size_string--by--company--anno-2'
417+
]
418+
},
419+
specific: 'is NOT found in resources, but found in schema section'
420+
},
421+
{
422+
nonexistentResources: [
423+
'company_size_string--by--company--anno-2'
424+
],
425+
record: {
426+
primaryKey: [
427+
'anno',
428+
'foundation'
429+
],
430+
value: 'company_size_string',
431+
resources: [
432+
'company_size_string--by--company--anno-2'
433+
]
434+
},
435+
specific: 'is NOT found in resources, but found in schema section'
436+
},
437+
{
438+
resource: 'company_size-2',
439+
specific: 'is NOT found in ddfSchema schema, but found in resources section'
440+
}
441+
];
442+
443+
ddfDataSet.load(() => {
444+
const issues: Issue[] = allRules[DATAPACKAGE_NONEXISTENT_RESOURCE].rule(ddfDataSet);
445+
446+
expect(issues.length).to.equal(expectedData.length);
447+
448+
issues.forEach((issue, order) => {
449+
expect(endsWith(issue.path, 'datapackage.json')).to.be.true;
450+
expect(issue.data).to.deep.equal(expectedData[order]);
451+
});
452+
453+
done();
454+
});
455+
});
456+
});
457+
458+
describe('when "DATAPACKAGE_NONEXISTENT_CONCEPT" rule', () => {
459+
it('any issue should NOT be found for a folder without the problem', done => {
460+
const ddfDataSet = new DdfDataSet('./test/fixtures/dummy-companies-with-dp', null);
461+
462+
ddfDataSet.load(() => {
463+
const issues = allRules[DATAPACKAGE_NONEXISTENT_CONCEPT].rule(ddfDataSet);
464+
465+
expect(issues.length).to.equal(0);
466+
467+
done();
468+
});
469+
});
470+
471+
it('an issue should be found for a folder with the problem', done => {
472+
const ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/dp-nonexistent-concept', null);
473+
474+
ddfDataSet.load(() => {
475+
const issues: Issue[] = allRules[DATAPACKAGE_NONEXISTENT_CONCEPT].rule(ddfDataSet);
476+
const issue = head(issues);
477+
478+
expect(issues.length).to.equal(1);
479+
expect(endsWith(issue.path, 'datapackage.json')).to.be.true;
480+
expect(issue.data).to.equal('company_size');
481+
482+
done();
483+
});
484+
});
485+
});
371486
});

0 commit comments

Comments
 (0)