Skip to content

Commit

Permalink
refactor drepr & fix code
Browse files Browse the repository at this point in the history
  • Loading branch information
Binh Vu committed May 14, 2024
1 parent c964683 commit dd7f80e
Show file tree
Hide file tree
Showing 24 changed files with 132 additions and 88 deletions.
2 changes: 1 addition & 1 deletion extensions/sand_drepr/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ readme = "README.md"

[tool.poetry.dependencies]
python = "3.9"

drepr-v2 = "^1.3.4"

[build-system]
requires = ["poetry-core"]
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
19 changes: 6 additions & 13 deletions extensions/sand_drepr/sand_drepr/dreprmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import orjson
import sm.outputs.semantic_model as O
from dependency_injector.wiring import Provide, inject
from drepr.engine import MemoryOutput, OutputFormat, ResourceDataString, execute
from drepr.models import (
from drepr.models.prelude import (
AlignedStep,
Attr,
CSVProp,
Expand All @@ -25,21 +24,15 @@
)
from kgdata.misc.resource import RDFResource
from rdflib import RDF, Graph, URIRef
from sand_drepr.resources import get_entity_resource, get_table_resource
from sand_drepr.semanticmodel import get_drepr_sm, get_entity_data_nodes
from sand_drepr.transformation import get_transformation, has_transformation
from slugify import slugify
from sm.misc.funcs import assert_not_null
from sm.namespaces.prelude import KnowledgeGraphNamespace

from sand.config import AppConfig
from sand.extension_interface.export import IExport
from sand.extensions.export.drepr.resources import (
get_entity_resource,
get_table_resource,
)
from sand.extensions.export.drepr.transformation import (
get_transformation,
has_transformation,
)
from sand.helpers.namespace import NamespaceService
from sand.models.ontology import OntProperty, OntPropertyAR, OntPropertyDataType
from sand.models.table import Table, TableRow
Expand All @@ -49,18 +42,18 @@ def get_drepr_model(
table_columns: list[str],
table_size: int,
sm: O.SemanticModel,
kgns: KnowledgeGraphNamespace,
kgns: NamespaceService,
kgns_prefixes: dict[str, str],
ontprop_ar: Mapping[str, OntProperty],
ident_props: list[str],
ident_props: set[str],
) -> DRepr:
"""Create a D-REPR model of the dataset.
Args:
table_columns: list of column names
table_size: number of rows in the table (exclude header)
sm: the semantic model we want to convert
kgns: the knowledge graph namespace
ns: the graph namespace
kgns_prefixes: the prefixes of the knowledge graph namespace
ontprop_ar: mapping from the id to ontology property
ident_props: list of properties that telling a data node contains entities (e.g., rdfs:label)
Expand Down
113 changes: 67 additions & 46 deletions extensions/sand_drepr/sand_drepr/main.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,42 @@
from __future__ import annotations

import re
from collections import defaultdict
from io import BytesIO, StringIO
from typing import List, Set
from typing import List, Sequence, Set, cast

import orjson
import sm.outputs.semantic_model as O
from dependency_injector.wiring import Provide, inject
from drepr.engine import MemoryOutput, OutputFormat, ResourceDataString, execute
from drepr.models import (
from drepr.main import convert
from drepr.models.prelude import (
AlignedStep,
Alignment,
Attr,
CSVProp,
DRepr,
IndexExpr,
OutputFormat,
Path,
PMap,
Preprocessing,
PreprocessingType,
RangeAlignment,
RangeExpr,
Resource,
ResourceDataString,
ResourceType,
)
from kgdata.misc.resource import RDFResource
from rdflib import RDF, Graph, URIRef
from sand_drepr.resources import get_entity_resource, get_table_resource
from sand_drepr.semanticmodel import get_drepr_sm, get_entity_data_nodes
from sand_drepr.transformation import get_transformation, has_transformation
from slugify import slugify
from sm.misc.funcs import assert_not_null
from sm.misc.funcs import assert_isinstance, assert_not_null

from sand.config import AppConfig
from sand.extension_interface.export import IExport
from sand.extensions.export.drepr.resources import (
get_entity_resource,
get_table_resource,
)
from sand.extensions.export.drepr.semanticmodel import (
get_drepr_sm,
get_entity_data_nodes,
)
from sand.extensions.export.drepr.transformation import (
get_transformation,
has_transformation,
)
from sand.helpers.namespace import NamespaceService
from sand.models.ontology import OntPropertyAR, OntPropertyDataType
from sand.models.table import Table, TableRow
Expand Down Expand Up @@ -76,16 +71,21 @@ def export_extra_resources(
return {}

ent_columns = {
node.col_index for node in get_entity_data_nodes(self.appcfg, sm)
node.col_index
for node in get_entity_data_nodes(
sm, self.appcfg.semantic_model.identifiers_set
)
}
entresource = get_entity_resource(
self.appcfg, self.namespace, table, rows, ent_columns
)
assert isinstance(entresource, ResourceDataString)
return {
"entity": entresource.value.decode()
if isinstance(entresource.value, bytes)
else entresource.value
"entity": (
entresource.value.decode()
if isinstance(entresource.value, bytes)
else assert_isinstance(entresource.value, str)
)
}

def export_data(
Expand All @@ -101,7 +101,10 @@ def export_data(
return ""

ent_columns = {
node.col_index for node in get_entity_data_nodes(self.appcfg, sm)
node.col_index
for node in get_entity_data_nodes(
sm, self.appcfg.semantic_model.identifiers_set
)
}
resources = {
"table": get_table_resource(table, rows),
Expand All @@ -110,20 +113,34 @@ def export_data(
),
}

content = execute(
ds_model=self.export_drepr_model(table, sm),
content = convert(
repr=self.export_drepr_model(table, sm),
resources=resources,
output=MemoryOutput(output_format),
debug=False,
format=output_format,
)
return self.post_processing(sm, content, output_format)

def export_drepr_model(self, table: Table, sm: O.SemanticModel) -> DRepr:
"""Create a D-REPR model of the dataset."""
columns = [slugify(c).replace("-", "_") for c in table.columns]
get_attr_id = lambda ci: f"{ci}__{columns[ci]}"
get_ent_attr_id = lambda ci: f"{ci}__ent__{columns[ci]}"
ent_dnodes = get_entity_data_nodes(self.appcfg, sm)

existing_attr_names = {}

def get_attr_id(ci):
cname = slugify(columns[ci]).replace("-", "_")
m = re.match(r"\d+([^\d].*)", cname)
if m is not None:
cname = m.group(1)
if existing_attr_names.get(cname, None) != ci:
return cname + "_" + str(ci)

existing_attr_names[cname] = ci
return cname

get_ent_attr_id = lambda ci: f"{get_attr_id(ci)}__ent"
ent_dnodes = get_entity_data_nodes(
sm, self.appcfg.semantic_model.identifiers_set
)

attrs = [
Attr(
Expand Down Expand Up @@ -155,12 +172,13 @@ def export_drepr_model(self, table: Table, sm: O.SemanticModel) -> DRepr:
]

dsm = get_drepr_sm(
self.appcfg,
self.namespace,
sm,
self.ontprop_ar,
get_attr_id,
get_ent_attr_id,
sm=sm,
kgns=self.namespace,
kgns_prefixes=self.namespace.kgns_prefixes,
ontprop_ar=self.ontprop_ar,
ident_props=self.appcfg.semantic_model.identifiers_set,
get_attr_id=get_attr_id,
get_ent_attr_id=get_ent_attr_id,
)

datatype_transformations = []
Expand Down Expand Up @@ -192,29 +210,32 @@ def export_drepr_model(self, table: Table, sm: O.SemanticModel) -> DRepr:
)
)

return DRepr(
resources=[
Resource(id="table", type=ResourceType.CSV, prop=CSVProp()),
Resource(id="entity", type=ResourceType.CSV, prop=CSVProp()),
],
preprocessing=datatype_transformations,
attrs=attrs,
aligns=[
aligns: list[Alignment] = []
for ci in range(1, len(table.columns)):
aligns.append(
RangeAlignment(
source=get_attr_id(0),
target=get_attr_id(ci),
aligned_steps=[AlignedStep(source_idx=0, target_idx=0)],
)
for ci in range(1, len(table.columns))
]
+ [
)
for node in ent_dnodes:
aligns.append(
RangeAlignment(
source=get_attr_id(0),
target=get_ent_attr_id(node.col_index),
aligned_steps=[AlignedStep(source_idx=0, target_idx=0)],
)
for node in ent_dnodes
)

return DRepr(
resources=[
Resource(id="table", type=ResourceType.CSV, prop=CSVProp()),
Resource(id="entity", type=ResourceType.CSV, prop=CSVProp()),
],
preprocessing=datatype_transformations,
attrs=attrs,
aligns=aligns,
sm=dsm,
)

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
try:
value = value.strip()
return float(value)
except:
return value
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
try:
value = value.strip()
return int(value)
except:
return value
2 changes: 1 addition & 1 deletion extensions/sand_drepr/sand_drepr/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List, Set
from uuid import uuid4

from drepr.models import ResourceData, ResourceDataString
from drepr.models.prelude import ResourceData, ResourceDataString

from sand.config import AppConfig
from sand.helpers.namespace import NamespaceService
Expand Down
35 changes: 21 additions & 14 deletions extensions/sand_drepr/sand_drepr/semanticmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,31 @@
from sm.misc.funcs import assert_not_null
from sm.namespaces.prelude import KnowledgeGraphNamespace

from sand.config import AppConfig
from sand.helpers.namespace import NamespaceService
from sand.models.ontology import OntProperty, OntPropertyDataType

# mapping from predefined datatypes to D-REPR datatype
datatype_mapping: Mapping[OntPropertyDataType, Optional[drepr_sm.DataType]] = {
"globe-coordinate": drepr_sm.DataType.geo_wktLiteral,
"url": drepr_sm.DataType.xsd_anyURI,
"entity": drepr_sm.DataType.xsd_anyURI,
"string": drepr_sm.DataType.xsd_string,
"integer-number": drepr_sm.DataType.xsd_int,
"decimal-number": drepr_sm.DataType.xsd_decimal,
"datetime": drepr_sm.DataType.xsd_dateTime,
"globe-coordinate": drepr_sm.DataType(
"http://www.opengis.net/ont/geosparql#wktLiteral",
{"geo": "http://www.opengis.net/ont/geosparql#"},
),
"url": drepr_sm.PredefinedDataType.xsd_anyURI.value,
"entity": drepr_sm.PredefinedDataType.drepr_uri.value,
"string": drepr_sm.PredefinedDataType.xsd_string.value,
"integer-number": drepr_sm.PredefinedDataType.xsd_int.value,
"decimal-number": drepr_sm.PredefinedDataType.xsd_decimal.value,
"datetime": drepr_sm.PredefinedDataType.xsd_dateTime.value,
}


def get_drepr_sm(
sm: O.SemanticModel,
kgns: KnowledgeGraphNamespace,
kgns: NamespaceService,
kgns_prefixes: dict[str, str],
ontprop_ar: Mapping[str, OntProperty],
ident_props: list[str],
ident_props: set[str],
get_attr_id: Callable[[int], str],
get_ent_attr_id: Callable[[int], str],
) -> drepr_sm.SemanticModel:
Expand Down Expand Up @@ -69,10 +74,10 @@ def get_drepr_sm(
)
elif isinstance(node, O.LiteralNode):
if node.datatype == O.LiteralNodeDataType.Entity:
datatype = drepr_sm.DataType.xsd_anyURI
datatype = drepr_sm.PredefinedDataType.drepr_uri.value
else:
assert node.datatype == O.LiteralNodeDataType.String
datatype = drepr_sm.DataType.xsd_string
datatype = drepr_sm.PredefinedDataType.xsd_string.value

nodes[str(node.id)] = drepr_sm.LiteralNode(
node_id=str(node.id), value=node.value, data_type=datatype
Expand All @@ -98,7 +103,7 @@ def get_drepr_sm(
nodes[new_node_id] = drepr_sm.DataNode(
node_id=new_node_id,
attr_id=get_ent_attr_id(node.col_index),
data_type=drepr_sm.DataType.xsd_anyURI,
data_type=drepr_sm.PredefinedDataType.drepr_uri.value,
)
inedges = [
inedge for inedge in sm.in_edges(node.id) if inedge.abs_uri in ident_props
Expand All @@ -113,15 +118,17 @@ def get_drepr_sm(
label="drepr:uri",
)

prefixes = kgns_prefixes.copy()
prefixes.update(drepr_sm.SemanticModel.get_default_prefixes())
return drepr_sm.SemanticModel(
nodes=nodes,
edges=edges,
prefixes=kgns_prefixes,
prefixes=prefixes,
)


def get_entity_data_nodes(
sm: O.SemanticModel, ident_props: list[str]
sm: O.SemanticModel, ident_props: set[str]
) -> List[O.DataNode]:
ent_dnodes = []
for node in sm.iter_nodes():
Expand Down
3 changes: 2 additions & 1 deletion extensions/sand_drepr/sand_drepr/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

datatype2transformation: Mapping[OntPropertyDataType, Path] = {
"globe-coordinate": (transdir / "global_coordinate.py"),
"number": (transdir / "number.py"),
"integer-number": (transdir / "integer-number.py"),
"decimal-number": (transdir / "decimal-number.py"),
}

loaded_transformations: Mapping[OntPropertyDataType, str] = {}
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "web-sand"
version = "4.1.2"
version = "4.1.4"
description = "UI for browsing/editing semantic descriptions"
authors = ["Binh Vu <binh@toan2.com>"]
repository = "https://github.com/usc-isi-i2/sand"
Expand All @@ -23,7 +23,6 @@ tornado = "^6.2"
gena = "^1.7.0"
loguru = "^0.7.0"
orjson = ">= 3.9.0, < 4.0.0"
drepr = "^2.10.0"
rsoup = "^3.1.7"
nh3 = "^0.2.13"

Expand Down
Loading

0 comments on commit dd7f80e

Please sign in to comment.