Skip to content

Commit

Permalink
feat(rules): validate enum values when set in datapackage.json
Browse files Browse the repository at this point in the history
Closes #258
  • Loading branch information
buchslava committed Apr 21, 2017
1 parent 84fe09b commit c204e42
Show file tree
Hide file tree
Showing 31 changed files with 131,038 additions and 50 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"babyparse": "0.4.6",
"ddf-time-utils": "0.1.7",
"deep-diff": "0.3.4",
"fast-csv": "2.4.0",
"fast-csv": "2.3.0",
"fs": "0.0.2",
"json2csv": "3.7.3",
"levenshtein": "1.0.5",
Expand Down
1 change: 1 addition & 0 deletions src/data/data-package.ts
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ export class DataPackage {
this.fileDescriptors = dataPackage.resources.map(resource => ({
filename: resource.path,
name: resource.name,
schema: resource.schema,
fullPath: resolve(this.rootFolder, resource.path),
type: getTypeByResource(resource)
}));
Expand Down
24 changes: 24 additions & 0 deletions src/ddf-rules/data-point-rules/constraint-violation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { compact, flattenDeep, includes } from 'lodash';
import { DATA_POINT_CONSTRAINT_VIOLATION } from '../registry';
import { cacheFor, IConstraintDescriptor } from './shared';
import { Issue } from '../issue';

export const rule = {
isDataPoint: true,
recordRule: dataPointDescriptor => {
const constraints: IConstraintDescriptor[] = cacheFor.constraintsByFileDescriptor(dataPointDescriptor);
const constraintViolation = (constraint: IConstraintDescriptor) => {
return !includes(constraint.constraints, dataPointDescriptor.record[constraint.fieldName]);
};

return compact(flattenDeep(
constraints.filter(constraintViolation).map((constraint: IConstraintDescriptor) =>
new Issue(DATA_POINT_CONSTRAINT_VIOLATION).setPath(constraint.fullPath).setData({
constraints: constraint.constraints,
fieldName: constraint.fieldName,
fieldValue: dataPointDescriptor.record[constraint.fieldName],
line: dataPointDescriptor.line
}))
));
}
};
33 changes: 33 additions & 0 deletions src/ddf-rules/data-point-rules/shared.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
import { compact, flattenDeep } from 'lodash';
import { DirectoryDescriptor } from '../../data/directory-descriptor';

export interface IConstraintDescriptor {
fullPath: string;
file: string;
fieldName: string;
constraints: string[]
}

function constructEntityCondition(entity) {
const expectedKey = `is--${entity}`;

Expand Down Expand Up @@ -64,6 +74,29 @@ export const cacheFor = {
.filter(conceptTypeKey => conceptTypeDictionary[conceptTypeKey] === type);
}

return cache[key];
},
constraintsByFileDescriptor: (dataPointDescriptor): IConstraintDescriptor[] => {
const forExpectedFile = (currentFileDescriptor: any) => dataPointDescriptor.fileDescriptor.file === currentFileDescriptor.filename;
const hasConstraints = (field: any) => field.constraints;
const getSchemaFields = (fileDescriptor: any) => fileDescriptor.schema && fileDescriptor.schema.fields ? fileDescriptor.schema.fields : [];
const key = `${dataPointDescriptor.ddfDataSet.ddfRoot.path}@Constraints@${dataPointDescriptor.fileDescriptor.file}`;

if (!cache[key]) {
cache[key] = compact(flattenDeep(
dataPointDescriptor.ddfDataSet.ddfRoot.directoryDescriptors.map((directoryDescriptor: DirectoryDescriptor) =>
directoryDescriptor.dataPackage.fileDescriptors.filter(forExpectedFile).map((fileDescriptor: any) =>
getSchemaFields(fileDescriptor).filter(hasConstraints).map((field: any) => ({
fullPath: dataPointDescriptor.fileDescriptor.fullPath,
file: dataPointDescriptor.fileDescriptor.file,
fieldName: field.name,
constraints: field.constraints.enum
}))
)
)
));
}

return cache[key];
}
};
42 changes: 42 additions & 0 deletions src/ddf-rules/entity-rules/unexisting-constraint-value.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { compact, flattenDeep, get as getValue } from 'lodash';
import { UNEXISTING_CONSTRAINT_VALUE } from '../registry';
import { DdfDataSet } from '../../ddf-definitions/ddf-data-set';
import { DirectoryDescriptor } from '../../data/directory-descriptor';
import { DATA_POINT } from '../../ddf-definitions/constants';
import { Issue } from '../issue';

const checkConstraintValue = (ddfDataSet: DdfDataSet, name: string, value: string): boolean => {
const allEntities = ddfDataSet.getEntity().getAllData();

for (const record of allEntities) {
if (record[name] === value) {
return true;
}
}

return false;
};
const forDataPointType = (fileDescriptor: any) => fileDescriptor.type === DATA_POINT;
const constraintsAreExists = (field: any) => field.constraints && field.constraints.enum;
const constraintValueAreNotPresentInEntities =
(ddfDataSet: DdfDataSet, name: string, value: string) => !checkConstraintValue(ddfDataSet, name, value);
const getConstraintsByField = (field: any) => getValue<any | string[]>(field, 'constraints.enum', []);
const getSchemaFields = (fileDescriptor: any) => getValue<any | any[]>(fileDescriptor, 'schema.fields', []);

export const rule = {
rule: (ddfDataSet: DdfDataSet) => {
const issues = ddfDataSet.ddfRoot.directoryDescriptors.map((directoryDescriptor: DirectoryDescriptor) =>
directoryDescriptor.dataPackage.fileDescriptors.filter(forDataPointType).map((fileDescriptor: any) => {
const constrainedSchemaFields = getSchemaFields(fileDescriptor).filter(constraintsAreExists);

return constrainedSchemaFields.map((field: any) => {
const issuesSource = getConstraintsByField(field).filter(value => constraintValueAreNotPresentInEntities(ddfDataSet, field.name, value));

return issuesSource.map(value =>
new Issue(UNEXISTING_CONSTRAINT_VALUE).setPath(fileDescriptor.fullPath).setData({constraintEntityValue: value}));
});
}));

return compact(flattenDeep(issues));
}
};
66 changes: 35 additions & 31 deletions src/ddf-rules/index.ts
Original file line number Diff line number Diff line change
@@ -1,40 +1,42 @@
import * as registry from './registry';

import {rule as conceptIdIsNotUnique} from './concept-rules/concept-id-is-not-unique';
import {rule as emptyConceptId} from './concept-rules/empty-concept-id';
import {rule as nonConceptHeader} from './concept-rules/non-concept-header';
import {rule as conceptMandatoryFieldNotFound} from './concept-rules/concept-mandatory-field-not-found';
import {rule as conceptsNotFound} from './concept-rules/concepts-not-found';
import {rule as invalidDrillUp} from './concept-rules/invalid-drill-up';
import { rule as conceptIdIsNotUnique } from './concept-rules/concept-id-is-not-unique';
import { rule as emptyConceptId } from './concept-rules/empty-concept-id';
import { rule as nonConceptHeader } from './concept-rules/non-concept-header';
import { rule as conceptMandatoryFieldNotFound } from './concept-rules/concept-mandatory-field-not-found';
import { rule as conceptsNotFound } from './concept-rules/concepts-not-found';
import { rule as invalidDrillUp } from './concept-rules/invalid-drill-up';

import {rule as incorrectFile} from './data-package-rules/incorrect-file';
import {rule as confusedFields} from './data-package-rules/confused-fields';
import {rule as nonConceptPrimaryKey} from './data-package-rules/non-concept-primary-key';
import {rule as nonUniqueResourceName} from './data-package-rules/non-unique-resource-name';
import {rule as nonUniqueResourceFile} from './data-package-rules/non-unique-resource-file';
import {rule as dataPointWithoutIndicator} from './data-package-rules/datapoint-without-indicator';
import { rule as incorrectFile } from './data-package-rules/incorrect-file';
import { rule as confusedFields } from './data-package-rules/confused-fields';
import { rule as nonConceptPrimaryKey } from './data-package-rules/non-concept-primary-key';
import { rule as nonUniqueResourceName } from './data-package-rules/non-unique-resource-name';
import { rule as nonUniqueResourceFile } from './data-package-rules/non-unique-resource-file';
import { rule as dataPointWithoutIndicator } from './data-package-rules/datapoint-without-indicator';

import {rule as measureValueNotNumeric} from './data-point-rules/measure-value-not-numeric';
import {rule as unexpectedEntityValue} from './data-point-rules/unexpected-entity-value';
import {rule as unexpectedTimeValue} from './data-point-rules/unexpected-time-value';
import { rule as measureValueNotNumeric } from './data-point-rules/measure-value-not-numeric';
import { rule as unexpectedEntityValue } from './data-point-rules/unexpected-entity-value';
import { rule as unexpectedTimeValue } from './data-point-rules/unexpected-time-value';
import { rule as dataPointConstraintViolation } from './data-point-rules/constraint-violation';

import {rule as nonUniqueEntityValue} from './entity-rules/non-unique-entity-value';
import {rule as wrongEntityIsHeader} from './entity-rules/wrong-entity-is-header';
import {rule as wrongEntityIsValue} from './entity-rules/wrong-entity-is-value';
import { rule as nonUniqueEntityValue } from './entity-rules/non-unique-entity-value';
import { rule as wrongEntityIsHeader } from './entity-rules/wrong-entity-is-header';
import { rule as wrongEntityIsValue } from './entity-rules/wrong-entity-is-value';
import { rule as unexistingConstraintValueRule } from './entity-rules/unexisting-constraint-value';

import {rule as emptyData} from './general-rules/empty-data';
import {rule as unexpectedData} from './general-rules/unexpected-data';
import {rule as wrongDataPointHeader} from './general-rules/wrong-data-point-header';
import {rule as incorrectIdentifier} from './general-rules/incorrect-identifier';
import {rule as incorrectJsonField} from './general-rules/incorrect-json-field';
import {rule as nonDdfFolder} from './general-rules/non-ddf-folder';
import {rule as nonDdfDataset} from './general-rules/non-ddf-dataset';
import { rule as emptyData } from './general-rules/empty-data';
import { rule as unexpectedData } from './general-rules/unexpected-data';
import { rule as wrongDataPointHeader } from './general-rules/wrong-data-point-header';
import { rule as incorrectIdentifier } from './general-rules/incorrect-identifier';
import { rule as incorrectJsonField } from './general-rules/incorrect-json-field';
import { rule as nonDdfFolder } from './general-rules/non-ddf-folder';
import { rule as nonDdfDataset } from './general-rules/non-ddf-dataset';

import {rule as unexpectedTranslationHeader} from './translation-rules/unexpected-translation-header';
import {rule as unexpectedTranslationsData} from './translation-rules/unexpected-translations-data';
import {rule as unexpectedDataPointTranslationsData} from './translation-rules/unexpected-data-point-translations-data';
import {rule as duplicatedDataPointTranslationKey} from './translation-rules/duplicated-data-point-translation-key';
import {rule as duplicatedTranslationKey} from './translation-rules/duplicated-translation-key';
import { rule as unexpectedTranslationHeader } from './translation-rules/unexpected-translation-header';
import { rule as unexpectedTranslationsData } from './translation-rules/unexpected-translations-data';
import { rule as unexpectedDataPointTranslationsData } from './translation-rules/unexpected-data-point-translations-data';
import { rule as duplicatedDataPointTranslationKey } from './translation-rules/duplicated-data-point-translation-key';
import { rule as duplicatedTranslationKey } from './translation-rules/duplicated-translation-key';

export const allRules = {
[registry.CONCEPT_ID_IS_NOT_UNIQUE]: conceptIdIsNotUnique,
Expand Down Expand Up @@ -66,5 +68,7 @@ export const allRules = {
[registry.UNEXPECTED_TRANSLATIONS_DATA]: unexpectedTranslationsData,
[registry.UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA]: unexpectedDataPointTranslationsData,
[registry.DUPLICATED_DATA_POINT_TRANSLATION_KEY]: duplicatedDataPointTranslationKey,
[registry.DUPLICATED_TRANSLATION_KEY]: duplicatedTranslationKey
[registry.DUPLICATED_TRANSLATION_KEY]: duplicatedTranslationKey,
[registry.UNEXISTING_CONSTRAINT_VALUE]: unexistingConstraintValueRule,
[registry.DATA_POINT_CONSTRAINT_VIOLATION]: dataPointConstraintViolation
};
12 changes: 9 additions & 3 deletions src/ddf-rules/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ export const UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA = Symbol.for('UNEXPECTED_DA
export const DUPLICATED_DATA_POINT_TRANSLATION_KEY = Symbol.for('DUPLICATED_DATA_POINT_TRANSLATION_KEY');
export const DUPLICATED_TRANSLATION_KEY = Symbol.for('DUPLICATED_TRANSLATION_KEY');
export const DATA_POINT_WITHOUT_INDICATOR = Symbol.for('DATA_POINT_WITHOUT_INDICATOR');
export const UNEXISTING_CONSTRAINT_VALUE = Symbol.for('UNEXISTING_CONSTRAINT_VALUE');
export const DATA_POINT_CONSTRAINT_VIOLATION = Symbol.for('DATA_POINT_CONSTRAINT_VIOLATION');

export const WARNING_TAG = Symbol.for('WARNING');
export const FILE_SYSTEM_TAG = Symbol.for('FILE_SYSTEM');
export const DATAPOINT_TAG = Symbol.for('DATAPOINT');
export const TRANSLATION_TAG = Symbol.for('TRANSLATION');

function tagsToString(tags: Array<any>) {
function tagsToString(tags: any[]) {
return tags.map(tag => Symbol.keyFor(tag));
}

Expand Down Expand Up @@ -68,7 +70,9 @@ export const tags: any = {
[UNEXPECTED_DATA_POINT_TRANSLATIONS_DATA]: [TRANSLATION_TAG, DATAPOINT_TAG],
[DUPLICATED_DATA_POINT_TRANSLATION_KEY]: [TRANSLATION_TAG, DATAPOINT_TAG],
[DUPLICATED_TRANSLATION_KEY]: [TRANSLATION_TAG],
[DATA_POINT_WITHOUT_INDICATOR]: [DATAPOINT_TAG]
[DATA_POINT_WITHOUT_INDICATOR]: [DATAPOINT_TAG],
[UNEXISTING_CONSTRAINT_VALUE]: [],
[DATA_POINT_CONSTRAINT_VIOLATION]: [DATAPOINT_TAG]
};

export const descriptions = {
Expand Down Expand Up @@ -125,7 +129,9 @@ export const descriptions = {
primary key is not consistent`,
[DUPLICATED_DATA_POINT_TRANSLATION_KEY]: 'Duplicated data point translation key',
[DUPLICATED_TRANSLATION_KEY]: 'Duplicated translation key',
[DATA_POINT_WITHOUT_INDICATOR]: 'Datapoint without indicator: primary key is equal fields in datapackage.json resource'
[DATA_POINT_WITHOUT_INDICATOR]: 'Datapoint without indicator: primary key is equal fields in datapackage.json resource',
[UNEXISTING_CONSTRAINT_VALUE]: 'Constraint value that described in datapackage.json is not a valid entity value',
[DATA_POINT_CONSTRAINT_VIOLATION]: 'Constraint violation for particular datapoint. See datapackage.json format.'
};

export const getRulesInformation = () => Object.getOwnPropertySymbols(exports.descriptions)
Expand Down
58 changes: 49 additions & 9 deletions test/data-point-rules.spec.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
import * as chai from 'chai';
import {head} from 'lodash';
import {DdfDataSet} from '../src/ddf-definitions/ddf-data-set';
import { head, flattenDeep, compact, isEqual } from 'lodash';
import { DdfDataSet } from '../src/ddf-definitions/ddf-data-set';
import {
MEASURE_VALUE_NOT_NUMERIC,
DATA_POINT_UNEXPECTED_ENTITY_VALUE,
DATA_POINT_UNEXPECTED_TIME_VALUE
DATA_POINT_UNEXPECTED_TIME_VALUE,
DATA_POINT_CONSTRAINT_VIOLATION
} from '../src/ddf-rules/registry';
import {allRules} from '../src/ddf-rules';
import {Issue} from '../src/ddf-rules/issue';
import { allRules } from '../src/ddf-rules';
import { Issue } from '../src/ddf-rules/issue';

const expect = chai.expect;

describe('rules for data points', () => {
let ddfDataSet = null;

describe('when data set is correct (\'fixtures/good-folder\')', () => {
describe(`when data set is correct ('fixtures/good-folder')`, () => {
ddfDataSet = new DdfDataSet('./test/fixtures/good-folder', null);

Object.getOwnPropertySymbols(allRules).forEach(dataPointRuleKey => {
Expand All @@ -41,9 +42,10 @@ describe('rules for data points', () => {
});
});


describe('when data set is NOT correct', () => {
it(`an issue should be found for rule 'DATA_POINT_VALUE_NOT_NUMERIC'
(fixtures/rules-cases/data-point-value-not-num)`, done => {
(fixtures/rules-cases/data-point-value-not-num)`, done => {
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-value-not-num', null);
ddfDataSet.load(() => {
const dataPointValueNotNumRule = allRules[MEASURE_VALUE_NOT_NUMERIC].recordRule;
Expand Down Expand Up @@ -72,7 +74,7 @@ describe('rules for data points', () => {
});

it(`an issue should be found for rule 'DATA_POINT_UNEXPECTED_ENTITY_VALUE'
(fixtures/rules-cases/data-point-unexpected-entity-value)`, done => {
(fixtures/rules-cases/data-point-unexpected-entity-value)`, done => {
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-unexpected-entity-value', null);
ddfDataSet.load(() => {
const dataPointUnexpectedConceptRule = allRules[DATA_POINT_UNEXPECTED_ENTITY_VALUE].recordRule;
Expand Down Expand Up @@ -102,7 +104,7 @@ describe('rules for data points', () => {
});

it(`an issue should be found for rule 'DATA_POINT_UNEXPECTED_TIME_VALUE'
(fixtures/rules-cases/data-point-unexpected-time-value)`, done => {
(fixtures/rules-cases/data-point-unexpected-time-value)`, done => {
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-unexpected-time-value', null);
ddfDataSet.load(() => {
const dataPointUnexpectedTimeRule = allRules[DATA_POINT_UNEXPECTED_TIME_VALUE].recordRule;
Expand Down Expand Up @@ -130,5 +132,43 @@ describe('rules for data points', () => {
);
});
});

it(`an issue should be found for rule 'DATA_POINT_CONSTRAINT_VIOLATION'
(fixtures/rules-cases/data-point-constraint-violation)`, done => {
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/data-point-constraint-violation', null);
ddfDataSet.load(() => {
const dataPointConstraintViolationRule = allRules[DATA_POINT_CONSTRAINT_VIOLATION].recordRule;
const fileDescriptor = head(ddfDataSet.getDataPoint().fileDescriptors);
const issuesStorage = [];
const EXPECTED_ISSUES_QUANTITY = 2;
const EXPECTED_ISSUES_DATA = [{
path: 'ddf--datapoints--population--by--country_code-900--year--age.csv',
data: {constraints: ['900'], fieldName: 'country_code', fieldValue: '777', line: 1}
}, {
path: 'ddf--datapoints--population--by--country_code-900--year--age.csv',
data: {constraints: ['900'], fieldName: 'country_code', fieldValue: '901', line: 3}
}];

ddfDataSet.getDataPoint().loadFile(fileDescriptor,
(record, line) => {
issuesStorage.push(dataPointConstraintViolationRule({ddfDataSet, fileDescriptor, record, line}));
},
() => {
const issues = compact(flattenDeep(issuesStorage));

expect(issues.length).to.equal(EXPECTED_ISSUES_QUANTITY);

issues.forEach((issue: Issue, index: number) => {
expect(issue.type).to.equal(DATA_POINT_CONSTRAINT_VIOLATION);
expect(issue.path.endsWith(EXPECTED_ISSUES_DATA[index].path)).to.be.true;
expect(!!issue.data).to.be.true;
expect(isEqual(issue.data, EXPECTED_ISSUES_DATA[index].data)).to.be.true;
});

done();
}
);
});
});
});
});
Loading

0 comments on commit c204e42

Please sign in to comment.