-
-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add script to check for common model issues #1124
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import sys | ||
from collections import defaultdict | ||
from followthemoney import model | ||
|
||
|
||
IGNORE_DIVERGENT_TYPES = [ | ||
"author", | ||
"organization", | ||
"gender", | ||
"number", | ||
"authority", | ||
"duration", | ||
"area", | ||
"subject", | ||
"sender", | ||
] | ||
|
||
IGNORE_DIVERGENT_LABELS = [ | ||
"parent", | ||
"holder", | ||
"number", | ||
"authority", | ||
"criteria", | ||
"procedure", | ||
] | ||
|
||
IGNORE_LABEL_COLLISIONS = [ | ||
"Address", | ||
"Notes", | ||
"Customs declarations", | ||
"Country of origin", | ||
"Payments received", | ||
"Payments made", | ||
"Entity", | ||
"The language of the translated text", | ||
"Responding to", | ||
"ISIN", | ||
"Document number", | ||
] | ||
|
||
|
||
def test_divergent_types(by_name): | ||
divergent = {} | ||
|
||
for name, props in by_name.items(): | ||
if len(props) == 1 or name in IGNORE_DIVERGENT_TYPES: | ||
continue | ||
|
||
types = set([p.type for p in props]) | ||
if len(types) > 1: | ||
divergent[name] = props | ||
|
||
return divergent | ||
|
||
|
||
def test_divergent_labels(by_name): | ||
divergent = {} | ||
|
||
for name, props in by_name.items(): | ||
if len(props) == 1 or name in IGNORE_DIVERGENT_LABELS: | ||
continue | ||
|
||
labels = set([p.label for p in props]) | ||
if len(labels) > 1: | ||
divergent[name] = props | ||
|
||
return divergent | ||
|
||
|
||
def test_label_collisions(by_label): | ||
collisions = {} | ||
|
||
for label, props in by_label.items(): | ||
if len(props) == 1 or label in IGNORE_LABEL_COLLISIONS: | ||
continue | ||
|
||
names = set([p.name for p in props]) | ||
if len(names) > 1: | ||
collisions[label] = props | ||
|
||
return collisions | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The three functions above: test_divergent_types, _labels, _collisions all use the same basic pattern. Would it be worth pulling this out into a generic function to reduce duplication? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While they use the same structure, they do different things, and abstracting the generic structure would probably require passing a bunch of parameters or predicates as lambdas. I’m not sure this would make it easier to understand/maintain tbh |
||
|
||
if __name__ == '__main__': | ||
by_name = defaultdict(set) | ||
by_label = defaultdict(set) | ||
|
||
for schema in model: | ||
for prop in schema.properties.values(): | ||
by_name[prop.name].add(prop) | ||
by_label[prop.label].add(prop) | ||
|
||
divergent_types = test_divergent_types(by_name) | ||
divergent_labels = test_divergent_labels(by_name) | ||
label_collisions = test_label_collisions(by_label) | ||
|
||
failed = False | ||
|
||
if divergent_types: | ||
failed = True | ||
print("DIVERGENT TYPES\n") | ||
for name, props in divergent_types.items(): | ||
print(f" {name}:") | ||
for prop in props: | ||
print(f" * {prop.qname} - {prop.type.name}") | ||
print() | ||
print() | ||
|
||
if divergent_labels: | ||
failed = True | ||
print("DIVERGENT LABELS\n") | ||
for name, props in divergent_labels.items(): | ||
print(f" {name}:") | ||
for prop in props: | ||
print(f" * {prop.qname} - {prop.label}") | ||
print() | ||
print() | ||
|
||
if label_collisions: | ||
failed = True | ||
print("COLLIDING LABELS\n") | ||
for label, props in label_collisions.items(): | ||
print(f" {label}:") | ||
for prop in props: | ||
print(f" * {prop.qname}") | ||
print() | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth extracting this into a function to reduce replication? |
||
if failed: | ||
sys.exit(1) | ||
|
||
print("No issues found.") |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question. Should we be concerned that there is both an authority type, and an authority label?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What "divergent types" means: There are two properties with the name "authority" that have different types (haven’t checked it, but probably one is has the
entity
and the othername
or something like that).What "divergent labels" means: There are two properties with the same name, but they use different labels in the UI (e.g.
CallForTenders:authority
has the label "Name of contracting authority" whileSanction:authority
has the label "Authority").We should be concerned about all of these issues. However, there are some issues that can only be resolved with breaking changes, so I’ve added them to the ignore list for now (otherwise it would break CI).