Skip to content

Commit c204e42

Browse files
committed
feat(rules): validate enum values when set in datapackage.json
Closes #258
1 parent 84fe09b commit c204e42

31 files changed

+131038
-50
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"babyparse": "0.4.6",
3939
"ddf-time-utils": "0.1.7",
4040
"deep-diff": "0.3.4",
41-
"fast-csv": "2.4.0",
41+
"fast-csv": "2.3.0",
4242
"fs": "0.0.2",
4343
"json2csv": "3.7.3",
4444
"levenshtein": "1.0.5",

src/data/data-package.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,7 @@ export class DataPackage {
451451
this.fileDescriptors = dataPackage.resources.map(resource => ({
452452
filename: resource.path,
453453
name: resource.name,
454+
schema: resource.schema,
454455
fullPath: resolve(this.rootFolder, resource.path),
455456
type: getTypeByResource(resource)
456457
}));
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { compact, flattenDeep, includes } from 'lodash';
2+
import { DATA_POINT_CONSTRAINT_VIOLATION } from '../registry';
3+
import { cacheFor, IConstraintDescriptor } from './shared';
4+
import { Issue } from '../issue';
5+
6+
export const rule = {
7+
isDataPoint: true,
8+
recordRule: dataPointDescriptor => {
9+
const constraints: IConstraintDescriptor[] = cacheFor.constraintsByFileDescriptor(dataPointDescriptor);
10+
const constraintViolation = (constraint: IConstraintDescriptor) => {
11+
return !includes(constraint.constraints, dataPointDescriptor.record[constraint.fieldName]);
12+
};
13+
14+
return compact(flattenDeep(
15+
constraints.filter(constraintViolation).map((constraint: IConstraintDescriptor) =>
16+
new Issue(DATA_POINT_CONSTRAINT_VIOLATION).setPath(constraint.fullPath).setData({
17+
constraints: constraint.constraints,
18+
fieldName: constraint.fieldName,
19+
fieldValue: dataPointDescriptor.record[constraint.fieldName],
20+
line: dataPointDescriptor.line
21+
}))
22+
));
23+
}
24+
};

src/ddf-rules/data-point-rules/shared.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
import { compact, flattenDeep } from 'lodash';
2+
import { DirectoryDescriptor } from '../../data/directory-descriptor';
3+
4+
export interface IConstraintDescriptor {
5+
fullPath: string;
6+
file: string;
7+
fieldName: string;
8+
constraints: string[]
9+
}
10+
111
function constructEntityCondition(entity) {
212
const expectedKey = `is--${entity}`;
313

@@ -64,6 +74,29 @@ export const cacheFor = {
6474
.filter(conceptTypeKey => conceptTypeDictionary[conceptTypeKey] === type);
6575
}
6676

77+
return cache[key];
78+
},
79+
constraintsByFileDescriptor: (dataPointDescriptor): IConstraintDescriptor[] => {
80+
const forExpectedFile = (currentFileDescriptor: any) => dataPointDescriptor.fileDescriptor.file === currentFileDescriptor.filename;
81+
const hasConstraints = (field: any) => field.constraints;
82+
const getSchemaFields = (fileDescriptor: any) => fileDescriptor.schema && fileDescriptor.schema.fields ? fileDescriptor.schema.fields : [];
83+
const key = `${dataPointDescriptor.ddfDataSet.ddfRoot.path}@Constraints@${dataPointDescriptor.fileDescriptor.file}`;
84+
85+
if (!cache[key]) {
86+
cache[key] = compact(flattenDeep(
87+
dataPointDescriptor.ddfDataSet.ddfRoot.directoryDescriptors.map((directoryDescriptor: DirectoryDescriptor) =>
88+
directoryDescriptor.dataPackage.fileDescriptors.filter(forExpectedFile).map((fileDescriptor: any) =>
89+
getSchemaFields(fileDescriptor).filter(hasConstraints).map((field: any) => ({
90+
fullPath: dataPointDescriptor.fileDescriptor.fullPath,
91+
file: dataPointDescriptor.fileDescriptor.file,
92+
fieldName: field.name,
93+
constraints: field.constraints.enum
94+
}))
95+
)
96+
)
97+
));
98+
}
99+
67100
return cache[key];
68101
}
69102
};
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { compact, flattenDeep, get as getValue } from 'lodash';
2+
import { UNEXISTING_CONSTRAINT_VALUE } from '../registry';
3+
import { DdfDataSet } from '../../ddf-definitions/ddf-data-set';
4+
import { DirectoryDescriptor } from '../../data/directory-descriptor';
5+
import { DATA_POINT } from '../../ddf-definitions/constants';
6+
import { Issue } from '../issue';
7+
8+
const checkConstraintValue = (ddfDataSet: DdfDataSet, name: string, value: string): boolean => {
9+
const allEntities = ddfDataSet.getEntity().getAllData();
10+
11+
for (const record of allEntities) {
12+
if (record[name] === value) {
13+
return true;
14+
}
15+
}
16+
17+
return false;
18+
};
19+
const forDataPointType = (fileDescriptor: any) => fileDescriptor.type === DATA_POINT;
20+
const constraintsAreExists = (field: any) => field.constraints && field.constraints.enum;
21+
const constraintValueAreNotPresentInEntities =
22+
(ddfDataSet: DdfDataSet, name: string, value: string) => !checkConstraintValue(ddfDataSet, name, value);
23+
const getConstraintsByField = (field: any) => getValue<any | string[]>(field, 'constraints.enum', []);
24+
const getSchemaFields = (fileDescriptor: any) => getValue<any | any[]>(fileDescriptor, 'schema.fields', []);
25+
26+
export const rule = {
27+
rule: (ddfDataSet: DdfDataSet) => {
28+
const issues = ddfDataSet.ddfRoot.directoryDescriptors.map((directoryDescriptor: DirectoryDescriptor) =>
29+
directoryDescriptor.dataPackage.fileDescriptors.filter(forDataPointType).map((fileDescriptor: any) => {
30+
const constrainedSchemaFields = getSchemaFields(fileDescriptor).filter(constraintsAreExists);
31+
32+
return constrainedSchemaFields.map((field: any) => {
33+
const issuesSource = getConstraintsByField(field).filter(value => constraintValueAreNotPresentInEntities(ddfDataSet, field.name, value));
34+
35+
return issuesSource.map(value =>
36+
new Issue(UNEXISTING_CONSTRAINT_VALUE).setPath(fileDescriptor.fullPath).setData({constraintEntityValue: value}));
37+
});
38+
}));
39+
40+
return compact(flattenDeep(issues));
41+
}
42+
};

src/ddf-rules/index.ts

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,42 @@
11
import * as registry from './registry';
22

3-
import {rule as conceptIdIsNotUnique} from './concept-rules/concept-id-is-not-unique';
4-
import {rule as emptyConceptId} from './concept-rules/empty-concept-id';
5-
import {rule as nonConceptHeader} from './concept-rules/non-concept-header';
6-
import {rule as conceptMandatoryFieldNotFound} from './concept-rules/concept-mandatory-field-not-found';
7-
import {rule as conceptsNotFound} from './concept-rules/concepts-not-found';
8-
import {rule as invalidDrillUp} from './concept-rules/invalid-drill-up';
3+
import { rule as conceptIdIsNotUnique } from './concept-rules/concept-id-is-not-unique';
4+
import { rule as emptyConceptId } from './concept-rules/empty-concept-id';
5+
import { rule as nonConceptHeader } from './concept-rules/non-concept-header';
6+
import { rule as conceptMandatoryFieldNotFound } from './concept-rules/concept-mandatory-field-not-found';
7+
import { rule as conceptsNotFound } from './concept-rules/concepts-not-found';
8+
import { rule as invalidDrillUp } from './concept-rules/invalid-drill-up';
99

10-
import {rule as incorrectFile} from './data-package-rules/incorrect-file';
11-
import {rule as confusedFields} from './data-package-rules/confused-fields';
12-
import {rule as nonConceptPrimaryKey} from './data-package-rules/non-concept-primary-key';
13-
import {rule as nonUniqueResourceName} from './data-package-rules/non-unique-resource-name';
14-
import {rule as nonUniqueResourceFile} from './data-package-rules/non-unique-resource-file';
15-
import {rule as dataPointWithoutIndicator} from './data-package-rules/datapoint-without-indicator';
10+
import { rule as incorrectFile } from './data-package-rules/incorrect-file';
11+
import { rule as confusedFields } from './data-package-rules/confused-fields';
12+
import { rule as nonConceptPrimaryKey } from './data-package-rules/non-concept-primary-key';
13+
import { rule as nonUniqueResourceName } from './data-package-rules/non-unique-resource-name';
14+
import { rule as nonUniqueResourceFile } from './data-package-rules/non-unique-resource-file';
15+
import { rule as dataPointWithoutIndicator } from './data-package-rules/datapoint-without-indicator';
1616

17-
import {rule as measureValueNotNumeric} from './data-point-rules/measure-value-not-numeric';
18-
import {rule as unexpectedEntityValue} from './data-point-rules/unexpected-entity-value';
19-
import {rule as unexpectedTimeValue} from './data-point-rules/unexpected-time-value';
17+
import { rule as measureValueNotNumeric } from './data-point-rules/measure-value-not-numeric';
18+
import { rule as unexpectedEntityValue } from './data-point-rules/unexpected-entity-value';
19+
import { rule as unexpectedTimeValue } from './data-point-rules/unexpected-time-value';
20+
import { rule as dataPointConstraintViolation } from './data-point-rules/constraint-violation';
2021

21-
import {rule as nonUniqueEntityValue} from './entity-rules/non-unique-entity-value';
22-
import {rule as wrongEntityIsHeader} from './entity-rules/wrong-entity-is-header';
23-
import {rule as wrongEntityIsValue} from './entity-rules/wrong-entity-is-value';
22+
import { rule as nonUniqueEntityValue } from './entity-rules/non-unique-entity-value';
23+
import { rule as wrongEntityIsHeader } from './entity-rules/wrong-entity-is-header';
24+
import { rule as wrongEntityIsValue } from './entity-rules/wrong-entity-is-value';
25+
import { rule as unexistingConstraintValueRule } from './entity-rules/unexisting-constraint-value';
2426

25-
import {rule as emptyData} from './general-rules/empty-data';
26-
import {rule as unexpectedData} from './general-rules/unexpected-data';
27-
import {rule as wrongDataPointHeader} from './general-rules/wrong-data-point-header';
28-
import {rule as incorrectIdentifier} from './general-rules/incorrect-identifier';
29-
import {rule as incorrectJsonField} from './general-rules/incorrect-json-field';
30-
import {rule as nonDdfFolder} from './general-rules/non-ddf-folder';
31-
import {rule as nonDdfDataset} from './general-rules/non-ddf-dataset';
27+
import { rule as emptyData } from './general-rules/empty-data';
28+
import { rule as unexpectedData } from './general-rules/unexpected-data';
29+
import { rule as wrongDataPointHeader } from './general-rules/wrong-data-point-header';
30+
import { rule as incorrectIdentifier } from './general-rules/incorrect-identifier';
31+
import { rule as incorrectJsonField } from './general-rules/incorrect-json-field';
32+
import { rule as nonDdfFolder } from './general-rules/non-ddf-folder';
33+
import { rule as nonDdfDataset } from './general-rules/non-ddf-dataset';
3234

33-
import {rule as unexpectedTranslationHeader} from './translation-rules/unexpected-translation-header';
34-
import {rule as unexpectedTranslationsData} from './translation-rules/unexpected-translations-data';
35-
import {rule as unexpectedDataPointTranslationsData} from './translation-rules/unexpected-data-point-translations-data';
36-
import {rule as duplicatedDataPointTranslationKey} from './translation-rules/duplicated-data-point-translation-key';
37-
import {rule as duplicatedTranslationKey} from './translation-rules/duplicated-translation-key';
35+
import { rule as unexpectedTranslationHeader } from './translation-rules/unexpected-translation-header';
36+
import { rule as unexpectedTranslationsData } from './translation-rules/unexpected-translations-data';
37+
import { rule as unexpectedDataPointTranslationsData } from './translation-rules/unexpected-data-point-translations-data';
38+
import { rule as duplicatedDataPointTranslationKey } from './translation-rules/duplicated-data-point-translation-key';
39+
import { rule as duplicatedTranslationKey } from './translation-rules/duplicated-translation-key';
3840

3941
export const allRules = {
4042
[registry.CONCEPT_ID_IS_NOT_UNIQUE]: conceptIdIsNotUnique,
@@ -66,5 +68,7 @@ export const allRules = {
6668
[registry.UNEXPECTED_TRANSLATIONS_DATA]: unexpectedTranslationsData,
6769
[registry.UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA]: unexpectedDataPointTranslationsData,
6870
[registry.DUPLICATED_DATA_POINT_TRANSLATION_KEY]: duplicatedDataPointTranslationKey,
69-
[registry.DUPLICATED_TRANSLATION_KEY]: duplicatedTranslationKey
71+
[registry.DUPLICATED_TRANSLATION_KEY]: duplicatedTranslationKey,
72+
[registry.UNEXISTING_CONSTRAINT_VALUE]: unexistingConstraintValueRule,
73+
[registry.DATA_POINT_CONSTRAINT_VIOLATION]: dataPointConstraintViolation
7074
};

src/ddf-rules/registry.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@ export const UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA = Symbol.for('UNEXPECTED_DA
2828
export const DUPLICATED_DATA_POINT_TRANSLATION_KEY = Symbol.for('DUPLICATED_DATA_POINT_TRANSLATION_KEY');
2929
export const DUPLICATED_TRANSLATION_KEY = Symbol.for('DUPLICATED_TRANSLATION_KEY');
3030
export const DATA_POINT_WITHOUT_INDICATOR = Symbol.for('DATA_POINT_WITHOUT_INDICATOR');
31+
export const UNEXISTING_CONSTRAINT_VALUE = Symbol.for('UNEXISTING_CONSTRAINT_VALUE');
32+
export const DATA_POINT_CONSTRAINT_VIOLATION = Symbol.for('DATA_POINT_CONSTRAINT_VIOLATION');
3133

3234
export const WARNING_TAG = Symbol.for('WARNING');
3335
export const FILE_SYSTEM_TAG = Symbol.for('FILE_SYSTEM');
3436
export const DATAPOINT_TAG = Symbol.for('DATAPOINT');
3537
export const TRANSLATION_TAG = Symbol.for('TRANSLATION');
3638

37-
function tagsToString(tags: Array<any>) {
39+
function tagsToString(tags: any[]) {
3840
return tags.map(tag => Symbol.keyFor(tag));
3941
}
4042

@@ -68,7 +70,9 @@ export const tags: any = {
6870
[UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA]: [TRANSLATION_TAG, DATAPOINT_TAG],
6971
[DUPLICATED_DATA_POINT_TRANSLATION_KEY]: [TRANSLATION_TAG, DATAPOINT_TAG],
7072
[DUPLICATED_TRANSLATION_KEY]: [TRANSLATION_TAG],
71-
[DATA_POINT_WITHOUT_INDICATOR]: [DATAPOINT_TAG]
73+
[DATA_POINT_WITHOUT_INDICATOR]: [DATAPOINT_TAG],
74+
[UNEXISTING_CONSTRAINT_VALUE]: [],
75+
[DATA_POINT_CONSTRAINT_VIOLATION]: [DATAPOINT_TAG]
7276
};
7377

7478
export const descriptions = {
@@ -125,7 +129,9 @@ export const descriptions = {
125129
primary key is not consistent`,
126130
[DUPLICATED_DATA_POINT_TRANSLATION_KEY]: 'Duplicated data point translation key',
127131
[DUPLICATED_TRANSLATION_KEY]: 'Duplicated translation key',
128-
[DATA_POINT_WITHOUT_INDICATOR]: 'Datapoint without indicator: primary key is equal fields in datapackage.json resource'
132+
[DATA_POINT_WITHOUT_INDICATOR]: 'Datapoint without indicator: primary key is equal fields in datapackage.json resource',
133+
[UNEXISTING_CONSTRAINT_VALUE]: 'Constraint value that described in datapackage.json is not a valid entity value',
134+
[DATA_POINT_CONSTRAINT_VIOLATION]: 'Constraint violation for particular datapoint. See datapackage.json format.'
129135
};
130136

131137
export const getRulesInformation = () => Object.getOwnPropertySymbols(exports.descriptions)

test/data-point-rules.spec.ts

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
import * as chai from 'chai';
2-
import {head} from 'lodash';
3-
import {DdfDataSet} from '../src/ddf-definitions/ddf-data-set';
2+
import { head, flattenDeep, compact, isEqual } from 'lodash';
3+
import { DdfDataSet } from '../src/ddf-definitions/ddf-data-set';
44
import {
55
MEASURE_VALUE_NOT_NUMERIC,
66
DATA_POINT_UNEXPECTED_ENTITY_VALUE,
7-
DATA_POINT_UNEXPECTED_TIME_VALUE
7+
DATA_POINT_UNEXPECTED_TIME_VALUE,
8+
DATA_POINT_CONSTRAINT_VIOLATION
89
} from '../src/ddf-rules/registry';
9-
import {allRules} from '../src/ddf-rules';
10-
import {Issue} from '../src/ddf-rules/issue';
10+
import { allRules } from '../src/ddf-rules';
11+
import { Issue } from '../src/ddf-rules/issue';
1112

1213
const expect = chai.expect;
1314

1415
describe('rules for data points', () => {
1516
let ddfDataSet = null;
1617

17-
describe('when data set is correct (\'fixtures/good-folder\')', () => {
18+
describe(`when data set is correct ('fixtures/good-folder')`, () => {
1819
ddfDataSet = new DdfDataSet('./test/fixtures/good-folder', null);
1920

2021
Object.getOwnPropertySymbols(allRules).forEach(dataPointRuleKey => {
@@ -41,9 +42,10 @@ describe('rules for data points', () => {
4142
});
4243
});
4344

45+
4446
describe('when data set is NOT correct', () => {
4547
it(`an issue should be found for rule 'DATA_POINT_VALUE_NOT_NUMERIC'
46-
(fixtures/rules-cases/data-point-value-not-num)`, done => {
48+
(fixtures/rules-cases/data-point-value-not-num)`, done => {
4749
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-value-not-num', null);
4850
ddfDataSet.load(() => {
4951
const dataPointValueNotNumRule = allRules[MEASURE_VALUE_NOT_NUMERIC].recordRule;
@@ -72,7 +74,7 @@ describe('rules for data points', () => {
7274
});
7375

7476
it(`an issue should be found for rule 'DATA_POINT_UNEXPECTED_ENTITY_VALUE'
75-
(fixtures/rules-cases/data-point-unexpected-entity-value)`, done => {
77+
(fixtures/rules-cases/data-point-unexpected-entity-value)`, done => {
7678
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-unexpected-entity-value', null);
7779
ddfDataSet.load(() => {
7880
const dataPointUnexpectedConceptRule = allRules[DATA_POINT_UNEXPECTED_ENTITY_VALUE].recordRule;
@@ -102,7 +104,7 @@ describe('rules for data points', () => {
102104
});
103105

104106
it(`an issue should be found for rule 'DATA_POINT_UNEXPECTED_TIME_VALUE'
105-
(fixtures/rules-cases/data-point-unexpected-time-value)`, done => {
107+
(fixtures/rules-cases/data-point-unexpected-time-value)`, done => {
106108
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-unexpected-time-value', null);
107109
ddfDataSet.load(() => {
108110
const dataPointUnexpectedTimeRule = allRules[DATA_POINT_UNEXPECTED_TIME_VALUE].recordRule;
@@ -130,5 +132,43 @@ describe('rules for data points', () => {
130132
);
131133
});
132134
});
135+
136+
it(`an issue should be found for rule 'DATA_POINT_CONSTRAINT_VIOLATION'
137+
(fixtures/rules-cases/data-point-constraint-violation)`, done => {
138+
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-constraint-violation', null);
139+
ddfDataSet.load(() => {
140+
const dataPointConstraintViolationRule = allRules[DATA_POINT_CONSTRAINT_VIOLATION].recordRule;
141+
const fileDescriptor = head(ddfDataSet.getDataPoint().fileDescriptors);
142+
const issuesStorage = [];
143+
const EXPECTED_ISSUES_QUANTITY = 2;
144+
const EXPECTED_ISSUES_DATA = [{
145+
path: 'ddf--datapoints--population--by--country_code-900--year--age.csv',
146+
data: {constraints: ['900'], fieldName: 'country_code', fieldValue: '777', line: 1}
147+
}, {
148+
path: 'ddf--datapoints--population--by--country_code-900--year--age.csv',
149+
data: {constraints: ['900'], fieldName: 'country_code', fieldValue: '901', line: 3}
150+
}];
151+
152+
ddfDataSet.getDataPoint().loadFile(fileDescriptor,
153+
(record, line) => {
154+
issuesStorage.push(dataPointConstraintViolationRule({ddfDataSet, fileDescriptor, record, line}));
155+
},
156+
() => {
157+
const issues = compact(flattenDeep(issuesStorage));
158+
159+
expect(issues.length).to.equal(EXPECTED_ISSUES_QUANTITY);
160+
161+
issues.forEach((issue: Issue, index: number) => {
162+
expect(issue.type).to.equal(DATA_POINT_CONSTRAINT_VIOLATION);
163+
expect(issue.path.endsWith(EXPECTED_ISSUES_DATA[index].path)).to.be.true;
164+
expect(!!issue.data).to.be.true;
165+
expect(isEqual(issue.data, EXPECTED_ISSUES_DATA[index].data)).to.be.true;
166+
});
167+
168+
done();
169+
}
170+
);
171+
});
172+
});
133173
});
134174
});

0 commit comments

Comments
 (0)