diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c4444e510..4c8d9e042 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,6 +29,9 @@ jobs: sudo apt-get install -y -qq libicu-dev pip install wheel pyicu pip install -e ".[dev]" + - name: Run checks for default model + run: | + python contrib/check_model.py - name: Run the tests run: | make test diff --git a/contrib/check_model.py b/contrib/check_model.py new file mode 100644 index 000000000..c086c77a8 --- /dev/null +++ b/contrib/check_model.py @@ -0,0 +1,142 @@ +import sys +from collections import defaultdict +from followthemoney import model + + +IGNORE_DIVERGENT_TYPES = [ + "author", + "organization", + "classification", + "gender", + "number", + "authority", + "duration", + "cpvCode", + "nutsCode", + "area", + "subject", + "sender", +] + +IGNORE_DIVERGENT_LABELS = [ + "wikidataId", + "parent", + "holder", + "number", + "authority", + "title", + "cpvCode", + "nutsCode", + "criteria", + "procedure", + "callForTenders", + "ticker", +] + +IGNORE_LABEL_COLLISIONS = [ + "Description", + "Address", + "Notes", + "Customs declarations", + "Country of origin", + "Payments received", + "Payments made", + "Entity", + "Passport number", + "The language of the translated text", + "Responding to", + "Document number", + "ISIN", +] + + +def test_divergent_types(by_name): + divergent = {} + + for name, props in by_name.items(): + if len(props) == 1 or name in IGNORE_DIVERGENT_TYPES: + continue + + types = set([p.type for p in props]) + if len(types) > 1: + divergent[name] = props + + return divergent + + +def test_divergent_labels(by_name): + divergent = {} + + for name, props in by_name.items(): + if len(props) == 1 or name in IGNORE_DIVERGENT_LABELS: + continue + + labels = set([p.label for p in props]) + if len(labels) > 1: + divergent[name] = props + + return divergent + + +def test_label_collisions(by_label): + collisions = {} + + for label, props in by_label.items(): + if len(props) == 1 or label in IGNORE_LABEL_COLLISIONS: + continue + + names = set([p.name for p in props]) + if len(names) > 1: + collisions[label] = props + + return collisions + + +if __name__ == '__main__': + by_name = defaultdict(set) + by_label = defaultdict(set) + + for schema in model: + for prop in schema.properties.values(): + by_name[prop.name].add(prop) + by_label[prop.label].add(prop) + + divergent_types = test_divergent_types(by_name) + divergent_labels = test_divergent_labels(by_name) + label_collisions = test_label_collisions(by_label) + + failed = False + + if divergent_types: + failed = True + print("DIVERGENT TYPES\n") + for name, props in divergent_types.items(): + print(f" {name}:") + for prop in props: + print(f" * {prop.qname} - {prop.type.name}") + print() + print() + + if divergent_labels: + failed = True + print("DIVERGENT LABELS\n") + for name, props in divergent_labels.items(): + print(f" {name}:") + for prop in props: + print(f" * {prop.qname} - {prop.label}") + print() + print() + + if label_collisions: + failed = True + print("COLLIDING LABELS\n") + for label, props in label_collisions.items(): + print(f" {label}:") + for prop in props: + print(f" * {prop.qname}") + print() + + if failed: + sys.exit(1) + + print("No issues found.") diff --git a/contrib/collisions.py b/contrib/collisions.py deleted file mode 100644 index 68525b64e..000000000 --- a/contrib/collisions.py +++ /dev/null @@ -1,21 +0,0 @@ -from collections import defaultdict -from followthemoney import model - -by_name = defaultdict(set) -for schema in model: - for prop in schema.properties.values(): - by_name[prop.name].add(prop) - -for props in by_name.values(): - if len(props) == 1: - continue - - types = set([p.type for p in props]) - if len(types) > 1: - print(f"[{props}] divergent types: {types}") - - labels = set([p.label for p in props]) - if len(labels) > 1: - print(f"[{props}] divergent labels: {labels}") - - # print(props) \ No newline at end of file