Skip to content

Commit

Permalink
fix missing properties domains/ranges (drop support for python 3.9)
Browse files Browse the repository at this point in the history
  • Loading branch information
Binh Vu committed Mar 24, 2024
1 parent f7a9b25 commit 0b72047
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 82 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CHANGE LOG

## [Unreleased]
## [7.0.0] (2024-03-24)

### Added

Expand All @@ -11,6 +11,11 @@
### Changed

- Reuse code: `GenericDB.get_default_props` now calls `ont_property.get_default_props`.
- Drop support for Python 3.9 to use new features in dataclass

### Fixed

- Fix domains/ranges of ontology properties

## [6.5.2] (2024-03-08)

Expand Down
8 changes: 4 additions & 4 deletions kgdata/dbpedia/datasets/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
from functools import partial
from urllib.parse import urlparse

from rdflib import OWL, RDF, RDFS, BNode, Literal, URIRef
from sm.misc.funcs import assert_not_null

from kgdata.dataset import Dataset
from kgdata.db import deser_from_dict, ser_to_dict
from kgdata.dbpedia.config import DBpediaDirCfg
Expand All @@ -15,6 +12,8 @@
from kgdata.models.multilingual import MultiLingualString, MultiLingualStringList
from kgdata.models.ont_property import OntologyProperty
from kgdata.splitter import split_a_list
from rdflib import OWL, RDF, RDFS, BNode, Literal, URIRef
from sm.misc.funcs import assert_not_null

rdf_type = str(RDF.type)
rdfs_label = str(RDFS.label)
Expand Down Expand Up @@ -76,7 +75,8 @@ def to_prop(resource: RDFResource, default_lang: str = "en") -> OntologyProperty
equivalent_properties=[
str(term) for term in resource.props.get(str(OWL.equivalentProperty), [])
],
subjects=[str(term) for term in resource.props.get(str(RDFS.domain), [])],
domains=[str(term) for term in resource.props.get(str(RDFS.domain), [])],
ranges=[str(term) for term in resource.props.get(str(RDFS.range), [])],
inverse_properties=[],
instanceof=[str(term) for term in resource.props.get(rdf_type, [])],
ancestors={},
Expand Down
13 changes: 1 addition & 12 deletions kgdata/models/ont_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,8 @@
from rdflib import OWL, RDFS


@dataclass
@dataclass(kw_only=True, slots=True)
class OntologyClass:
__slots__ = (
"id",
"label",
"description",
"aliases",
"parents",
"properties",
"different_froms",
"equivalent_classes",
"ancestors",
)
id: str
label: MultiLingualString
description: MultiLingualString
Expand Down
42 changes: 18 additions & 24 deletions kgdata/models/ont_property.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,30 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Mapping
from typing import Mapping, Optional

from kgdata.models.multilingual import MultiLingualString, MultiLingualStringList
from rdflib import RDF, RDFS, XSD


@dataclass
@dataclass(kw_only=True, slots=True)
class OntologyProperty:
__slots__ = (
"id",
"label",
"description",
"aliases",
"datatype",
"parents",
"related_properties",
"equivalent_properties",
"subjects",
"inverse_properties",
"instanceof",
"ancestors",
)
id: str
label: MultiLingualString
description: MultiLingualString
aliases: MultiLingualStringList
datatype: str
instanceof: list[str]
parents: list[str]
ancestors: dict[str, int]
inverse_properties: list[str]
related_properties: list[str]
equivalent_properties: list[str]
subjects: list[str]
inverse_properties: list[str]
instanceof: list[str]
ancestors: dict[str, int]

# domains
domains: Optional[list[str]]
# ranges
ranges: Optional[list[str]]

@staticmethod
def empty(id: str):
Expand All @@ -47,7 +37,8 @@ def empty(id: str):
parents=[],
related_properties=[],
equivalent_properties=[],
subjects=[],
domains=None,
ranges=None,
inverse_properties=[],
instanceof=[],
ancestors={},
Expand Down Expand Up @@ -95,7 +86,8 @@ def to_dict(self):
"parents": self.parents,
"related_properties": self.related_properties,
"equivalent_properties": self.equivalent_properties,
"subjects": self.subjects,
"domains": self.domains,
"ranges": self.ranges,
"inverse_properties": self.inverse_properties,
"instanceof": self.instanceof,
"ancestors": self.ancestors,
Expand All @@ -119,7 +111,8 @@ def get_default_props() -> list[OntologyProperty]:
parents=[],
related_properties=[],
equivalent_properties=[],
subjects=[],
domains=None,
ranges=None,
inverse_properties=[],
instanceof=[],
ancestors={},
Expand All @@ -135,7 +128,8 @@ def get_default_props() -> list[OntologyProperty]:
parents=[],
related_properties=[],
equivalent_properties=[],
subjects=[],
domains=None,
ranges=None,
inverse_properties=[],
instanceof=[str(RDF.Property)],
ancestors={},
Expand Down
13 changes: 1 addition & 12 deletions kgdata/wikidata/models/wdentity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,8 @@
from kgdata.wikidata.models.wdvalue import WDValue


@dataclass
@dataclass(slots=True)
class WDEntity:
__slots__ = (
"id",
"type",
"datatype",
"label",
"description",
"aliases",
"props",
"sitelinks",
)

id: str
# possible values ["item", "property"]
type: Literal["item", "property"]
Expand Down
86 changes: 79 additions & 7 deletions kgdata/wikidata/models/wdproperty.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from dataclasses import dataclass
from typing import Literal, Mapping

from kgdata.models.multilingual import MultiLingualString, MultiLingualStringList
from kgdata.models.ont_property import OntologyProperty
from kgdata.wikidata.models.wdentity import WDEntity
from kgdata.wikidata.models.wdstatement import WDStatement

# wikibase-lexeme, monolingualtext, wikibase-sense, url, wikibase-property,
# wikibase-form, external-id, time, commonsMedia, quantity, wikibase-item, musical-notation,
Expand All @@ -30,9 +32,10 @@
]


@dataclass
@dataclass(kw_only=True, slots=True)
class WDProperty(OntologyProperty):
datatype: WDDataType
constraints: list[WDStatement]

@staticmethod
def from_entity(ent: WDEntity):
Expand All @@ -56,10 +59,49 @@ def from_entity(ent: WDEntity):
else:
assert False, f"Unknown type: {stmt.value.to_dict()}"

subjects = []
for stmt in ent.props.get("P1629", []):
assert stmt.value.is_entity_id(stmt.value)
subjects.append(stmt.value.as_entity_id())
constraints = ent.props.get("P2302", [])
domains = None
ranges = None
for stmt in constraints:
entid = stmt.value.as_entity_id_safe()
# subject type constraint
if entid == "Q21503250":
try:
# domains so it must have class -- if not, it's bad and we can ignore
if "P2308" not in stmt.qualifiers:
continue
# and the relation must be instanceof or (instanceof or subclassof), or subclassof
assert "P2309" in stmt.qualifiers, (ent.id, stmt)
relations = [
x.as_entity_id_safe() for x in stmt.qualifiers["P2309"]
]
for relation in relations:
assert relation in ["Q21503252", "Q30208840", "Q21514624"], (
ent.id,
stmt,
)
except:
continue
domains = [x.as_entity_id_safe() for x in stmt.qualifiers["P2308"]]

# value-type constraint
if entid == "Q21510865":
try:
# if ranges are classes
assert "P2308" in stmt.qualifiers, (ent.id, stmt)
assert "P2309" in stmt.qualifiers, (ent.id, stmt)
# and the relation must be instanceof or (instanceof or subclassof), or subclassof
relations = [
x.as_entity_id_safe() for x in stmt.qualifiers["P2309"]
]
for relation in relations:
assert relation in ["Q21503252", "Q30208840", "Q21514624"], (
ent.id,
stmt,
)
except:
continue
ranges = [x.as_entity_id_safe() for x in stmt.qualifiers["P2308"]]

inverse_properties = []
for stmt in ent.props.get("P1696", []):
Expand All @@ -80,10 +122,12 @@ def from_entity(ent: WDEntity):
parents=sorted(parents),
related_properties=sorted(related_properties),
equivalent_properties=sorted(equivalent_properties),
subjects=sorted(subjects),
domains=domains,
ranges=ranges,
inverse_properties=sorted(inverse_properties),
instanceof=sorted(instanceof),
ancestors={},
constraints=constraints,
)

def is_object_property(self):
Expand All @@ -110,7 +154,8 @@ def to_base(self):
parents=self.parents,
related_properties=self.related_properties,
equivalent_properties=self.equivalent_properties,
subjects=self.subjects,
domains=self.domains,
ranges=self.ranges,
inverse_properties=self.inverse_properties,
instanceof=self.instanceof,
ancestors=self.ancestors,
Expand All @@ -119,6 +164,33 @@ def to_base(self):
def __str__(self):
return f"{self.label} ({self.id})"

def to_dict(self):
return {
"id": self.id,
"label": self.label.to_dict(),
"description": self.description.to_dict(),
"datatype": self.datatype,
"aliases": self.aliases.to_dict(),
"parents": self.parents,
"related_properties": self.related_properties,
"equivalent_properties": self.equivalent_properties,
"domains": self.domains,
"ranges": self.ranges,
"inverse_properties": self.inverse_properties,
"instanceof": self.instanceof,
"ancestors": self.ancestors,
"constraints": [s.to_dict() for s in self.constraints],
}

@classmethod
def from_dict(cls, obj):
obj["label"] = MultiLingualString(**obj["label"])
obj["description"] = MultiLingualString(**obj["description"])
obj["aliases"] = MultiLingualStringList(**obj["aliases"])
obj["ancestors"] = obj["ancestors"]
obj["constraints"] = [WDStatement.from_dict(x) for x in obj["constraints"]]
return cls(**obj)


def normalize_wikidata_datatype(datatype: WDDataType) -> str:
if datatype == "wikibase-property" or datatype == "wikibase-item":
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "kgdata"
version = "6.5.2"
version = "7.0.0"
description = "Library to process dumps of knowledge graphs (Wikipedia, DBpedia, Wikidata)"
readme = "README.md"
authors = [{ name = "Binh Vu", email = "binh@toan2.com" }]
Expand All @@ -10,7 +10,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
]

requires-python = ">=3.9"
requires-python = ">=3.10"

dependencies = [
'orjson >= 3.9.0, < 4.0.0',
Expand Down
40 changes: 20 additions & 20 deletions scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ function wikidata_db {

# dbpedia_dataset generic_extractor_dump
# dbpedia_dataset mapping_extractor_dump
dbpedia_dataset ontology_dump
dbpedia_dataset classes
dbpedia_dataset properties
dbpedia_dataset entities
dbpedia_dataset entity_redirections
dbpedia_dataset entity_labels
dbpedia_dataset entity_metadata
dbpedia_dataset entity_all_types
dbpedia_dataset entity_degrees
dbpedia_dataset entity_types_and_degrees
dbpedia_dataset meta_graph
dbpedia_dataset meta_graph_stats
# dbpedia_dataset ontology_dump
# dbpedia_dataset classes
# dbpedia_dataset properties
# dbpedia_dataset entities
# dbpedia_dataset entity_redirections
# dbpedia_dataset entity_labels
# dbpedia_dataset entity_metadata
# dbpedia_dataset entity_all_types
# dbpedia_dataset entity_degrees
# dbpedia_dataset entity_types_and_degrees
# dbpedia_dataset meta_graph
# dbpedia_dataset meta_graph_stats

# ======================================================================
# WIKIDATA Datasets
Expand All @@ -65,7 +65,7 @@ dbpedia_dataset meta_graph_stats
# wikidata_dataset entity_types

# wikidata_dataset classes
# wikidata_dataset properties
wikidata_dataset properties

# wikidata_dataset class_count
# wikidata_dataset property_count
Expand Down Expand Up @@ -103,18 +103,18 @@ dbpedia_dataset meta_graph_stats
# ======================================================================
# DBpedia Databases

dbpedia_db classes
dbpedia_db properties
dbpedia_db entities
dbpedia_db entity_labels
dbpedia_db entity_metadata
dbpedia_db entity_redirections
# dbpedia_db classes
# dbpedia_db properties
# dbpedia_db entities
# dbpedia_db entity_labels
# dbpedia_db entity_metadata
# dbpedia_db entity_redirections

# ======================================================================
# WIKIDATA Databases

# wikidata_db classes
# wikidata_db properties
wikidata_db properties
# wikidata_db entities
# wikidata_db entity_labels
# wikidata_db entity_metadata
Expand Down

0 comments on commit 0b72047

Please sign in to comment.