Skip to content

Commit

Permalink
Merge pull request #1097 from globalwordnet/issue-1044
Browse files Browse the repository at this point in the history
Make sure that no instances have hyponyms
  • Loading branch information
jmccrae authored Oct 10, 2024
2 parents b5e81be + 55f17ef commit 7429974
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 151 deletions.
100 changes: 49 additions & 51 deletions scripts/sense_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,53 +4,53 @@
from sys import exit

lex_filenums = {
"src/xml/wn-adj.all.xml": 0,
"src/xml/wn-adj.pert.xml": 1,
"src/xml/wn-adv.all.xml": 2,
"src/xml/wn-noun.Tops.xml": 3,
"src/xml/wn-noun.act.xml": 4,
"src/xml/wn-noun.animal.xml": 5,
"src/xml/wn-noun.artifact.xml": 6,
"src/xml/wn-noun.attribute.xml": 7,
"src/xml/wn-noun.body.xml": 8,
"src/xml/wn-noun.cognition.xml": 9,
"src/xml/wn-noun.communication.xml": 10,
"src/xml/wn-noun.event.xml": 11,
"src/xml/wn-noun.feeling.xml": 12,
"src/xml/wn-noun.food.xml": 13,
"src/xml/wn-noun.group.xml": 14,
"src/xml/wn-noun.location.xml": 15,
"src/xml/wn-noun.motive.xml": 16,
"src/xml/wn-noun.object.xml": 17,
"src/xml/wn-noun.person.xml": 18,
"src/xml/wn-noun.phenomenon.xml": 19,
"src/xml/wn-noun.plant.xml": 20,
"src/xml/wn-noun.possession.xml": 21,
"src/xml/wn-noun.process.xml": 22,
"src/xml/wn-noun.quantity.xml": 23,
"src/xml/wn-noun.relation.xml": 24,
"src/xml/wn-noun.shape.xml": 25,
"src/xml/wn-noun.state.xml": 26,
"src/xml/wn-noun.substance.xml": 27,
"src/xml/wn-noun.time.xml": 28,
"src/xml/wn-verb.body.xml": 29,
"src/xml/wn-verb.change.xml": 30,
"src/xml/wn-verb.cognition.xml": 31,
"src/xml/wn-verb.communication.xml": 32,
"src/xml/wn-verb.competition.xml": 33,
"src/xml/wn-verb.consumption.xml": 34,
"src/xml/wn-verb.contact.xml": 35,
"src/xml/wn-verb.creation.xml": 36,
"src/xml/wn-verb.emotion.xml": 37,
"src/xml/wn-verb.motion.xml": 38,
"src/xml/wn-verb.perception.xml": 39,
"src/xml/wn-verb.possession.xml": 40,
"src/xml/wn-verb.social.xml": 41,
"src/xml/wn-verb.stative.xml": 42,
"src/xml/wn-verb.weather.xml": 43,
"src/xml/wn-adj.ppl.xml": 44,
"src/xml/wn-contrib.colloq.xml": 50,
"src/xml/wn-contrib.plwn.xml": 51}
"adj.all": 0,
"adj.pert": 1,
"adv.all": 2,
"noun.Tops": 3,
"noun.act": 4,
"noun.animal": 5,
"noun.artifact": 6,
"noun.attribute": 7,
"noun.body": 8,
"noun.cognition": 9,
"noun.communication": 10,
"noun.event": 11,
"noun.feeling": 12,
"noun.food": 13,
"noun.group": 14,
"noun.location": 15,
"noun.motive": 16,
"noun.object": 17,
"noun.person": 18,
"noun.phenomenon": 19,
"noun.plant": 20,
"noun.possession": 21,
"noun.process": 22,
"noun.quantity": 23,
"noun.relation": 24,
"noun.shape": 25,
"noun.state": 26,
"noun.substance": 27,
"noun.time": 28,
"verb.body": 29,
"verb.change": 30,
"verb.cognition": 31,
"verb.communication": 32,
"verb.competition": 33,
"verb.consumption": 34,
"verb.contact": 35,
"verb.creation": 36,
"verb.emotion": 37,
"verb.motion": 38,
"verb.perception": 39,
"verb.possession": 40,
"verb.social": 41,
"verb.stative": 42,
"verb.weather": 43,
"adj.ppl": 44,
"contrib.colloq": 50,
"contrib.plwn": 51}

ss_types = {
PartOfSpeech.NOUN: 1,
Expand Down Expand Up @@ -118,16 +118,14 @@ def get_head_word(wn, s):
exit(-1)


def get_sense_key(wn, e, s, wn_file):
def get_sense_key(wn, e, s):
"""Calculate the sense key for a sense of an entry"""
ss = wn.synset_by_id(s.synset)
lemma = e.lemma.written_form.replace(
" ", "_").replace(
"&apos", "'").lower()
ss_type = ss_types[ss.part_of_speech]
if not wn_file.startswith("src/xml/wn-"):
wn_file = f"src/xml/wn-{wn_file}.xml"
lex_filenum = lex_filenums[wn_file]
lex_filenum = lex_filenums[ss.lex_name]
if s.id:
lex_id = extract_lex_id(unmap_sense_key(s.id))
else:
Expand Down
71 changes: 40 additions & 31 deletions scripts/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,40 +141,29 @@ def check_lex_files(wn, fix):
"adv": PartOfSpeech.ADVERB
}
errors = 0
for f in glob.glob("src/xml/wn-*.xml"):
lexfile = f[11:-4]
lex_pos = pos_map[lexfile[:3]]
swn = parse_wordnet(f)
for synset in swn.synsets:
if synset.lex_name != lexfile:
print("%s declared in %s but listed as %s" %
(synset.id, lexfile, synset.lex_name))
for entry in wn.entries:
for sense in entry.senses:
if not sense.id:
print("%s does not have a sense key" % (sense.id))
errors += 1
if not equal_pos(lex_pos, synset.part_of_speech):
print("%s declared in %s but has wrong POS %s" %
(synset.id, lexfile, synset.part_of_speech))
if not wn.synset_by_id(sense.synset):
print("%s refers to nonexistent synset %s" %
(sense.id, sense.synset))
errors += 1
for entry in swn.entries:
if len(entry.senses) == 0:
print("%s is empty in %s" % (entry.id, lexfile))
continue
calc_sense_key = sense_keys.get_sense_key(
wn, entry, sense)
sense_key = unmap_sense_key(sense.id)
if sense_key != calc_sense_key:
if fix:
print(
"sed -i 's/%s/%s/' src/xml/*" %
(sense_key, calc_sense_key))
else:
print(
"%s has declared key %s but should be %s" %
(sense.id, sense_key, calc_sense_key))
errors += 1
for sense in entry.senses:
if not sense.id:
print("%s does not have a sense key" % (sense.id))
errors += 1
calc_sense_key = sense_keys.get_sense_key(
wn, entry, sense, f)
sense_key = unmap_sense_key(sense.id)
if sense_key != calc_sense_key:
if fix:
print(
"sed -i 's/%s/%s/' src/xml/*" %
(sense_key, calc_sense_key))
else:
print(
"%s has declared key %s but should be %s" %
(sense.id, sense_key, calc_sense_key))
errors += 1

return errors

Expand Down Expand Up @@ -286,6 +275,8 @@ def main():
sense.id, sense2.id, sense.synset))
errors += 1

instances = set()

for synset in wn.synsets:
if synset.id[-1:] != synset.part_of_speech.value:
print(
Expand Down Expand Up @@ -356,6 +347,15 @@ def main():
print("ERROR: noun synset %s has no hypernym" % synset.id)
errors += 1

if any(sr.rel_type == SynsetRelType.INSTANCE_HYPERNYM
for sr in synset.synset_relations):
if any(sr.rel_type == SynsetRelType.HYPERNYM
for sr in synset.synset_relations):
print("Error: synset %s has both hypernym and instance hypernym"
% synset.id)
errors += 1
instances.add(synset.id)

if len(synset.definitions) == 0:
print("ERROR: synset without definition %s" % (synset.id))
errors += 1
Expand All @@ -373,6 +373,15 @@ def main():
(synset.id, item[1], item[0]))
errors += 1

for synset in wn.synsets:
for sr in synset.synset_relations:
if sr.rel_type == SynsetRelType.HYPERNYM:
if sr.target in instances:
print(
"ERROR: Hypernym targets instance %s => %s" %
(synset.id, sr.target))
errors += 1

for error in check_symmetry(wn, fix):
if fix:
print(error)
Expand Down
4 changes: 4 additions & 0 deletions src/deprecations.csv
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@
"ewn-01798679-v","i30704","ewn-00064841-v","i22102","Duplicate (#1043)"
"ewn-02125976-v","i32364","ewn-00064841-v","i22102","Duplicate (#1043)"
"ewn-00078154-v","i22163","ewn-00064841-v","i22102","Duplicate (#1043)"
"ewn-09369905-n","i85747","ewn-09372942-n","i85763","Duplicate (#1044)"
"ewn-08893374-n","i83377","ewn-08879115-n","i83373","Merged with hypernym (#1044)"
"ewn-90010441-n","","ewn-09067337-n","i84182","Merge with hypernym (#1044)"
"ewn-00596900-v","i24739","ewn-01311849-s","i7131","Not attested (#1060)"
"ewn-10809460-n","i94174","ewn-10146463-n","i90187","Compositional; use 10146463-n instead (#1068)"
"ewn-83000076-n","","ewn-10146463-n","i90187","Compositional (#1068)"
"ewn-10809055-n","i94171","ewn-10146463-n","i90187","Compositional (#1068)"
Expand Down
8 changes: 4 additions & 4 deletions src/yaml/entries-b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6259,11 +6259,11 @@ Blighia sapida:
synset: 12763050-n
Blighty:
n:
pronunciation:
- value: ˈblaɪti
sense:
- id: 'blighty%1:15:00::'
synset: 08893374-n
- exemplifies:
- 'slang%1:10:01::'
id: 'blighty%1:15:01::'
synset: 08879115-n
Blimp:
n:
pronunciation:
Expand Down
10 changes: 6 additions & 4 deletions src/yaml/entries-m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24334,8 +24334,8 @@ mare liberum:
mare nostrum:
n:
sense:
- id: 'mare_nostrum%1:17:00::'
synset: 09369905-n
- id: 'mare_nostrum%1:17:01::'
synset: 09372942-n
mare's nest:
n:
pronunciation:
Expand Down Expand Up @@ -36913,8 +36913,10 @@ merging:
merica:
n:
sense:
- id: 'merica%1:15:01::'
synset: 90010441-n
- exemplifies:
- 'slang%1:10:01::'
id: 'merica%1:15:01::'
synset: 09067337-n
mericarp:
n:
sense:
Expand Down
3 changes: 1 addition & 2 deletions src/yaml/noun.group.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29276,11 +29276,10 @@
and individualism instead of tradition and established doctrine
example:
- the Enlightenment brought about many humanitarian reforms
hypernym:
- 08490634-n
ili: i81574
instance_hypernym:
- 15278839-n
- 08490634-n
members:
- Age of Enlightenment
- Enlightenment
Expand Down
Loading

0 comments on commit 7429974

Please sign in to comment.