Skip to content

Commit

Permalink
Changes to ingresses to support external work (#312)
Browse files Browse the repository at this point in the history
* Allow TemplateHandler to handle edge cases in template eval

Adds two flags:
- require_optional
- fill_unused

These allow adjustment of how the template ingress handler deal with
records that don't necessarily have all the parameters for the chosen
template (fill_unused), and how the handler deal with optional
parameters in the chosen template (require_optional)

* more configuration on template ingress

* put limit on xlsx

* cleaning up parameter,s adding docstrings

* Update template.py

* Update xlsx.py

* use pathlike
  • Loading branch information
gtfierro authored Jun 26, 2024
1 parent 5633c0b commit 1f14497
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 12 deletions.
49 changes: 43 additions & 6 deletions buildingmotif/ingresses/template.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from typing import Callable, Optional

from rdflib import Graph, Literal, Namespace, URIRef
from rdflib import BNode, Graph, Literal, Namespace, URIRef
from rdflib.term import Node

from buildingmotif.dataclasses import Template
Expand All @@ -24,7 +25,9 @@ def __init__(
template: Template,
mapper: Optional[Callable[[str], str]],
upstream: RecordIngressHandler,
fill_unused: bool = False,
inline: bool = False,
require_optional_args: bool = True,
):
"""
Create a new TemplateIngress handler
Expand All @@ -41,13 +44,23 @@ def __init__(
:param inline: if True, inline the template before evaluating it on
each row, defaults to False
:type inline: bool, optional
:param require_optional_args: if True, require that optional arguments in the
chosen template be provided by the upstream ingress handler,
defaults to False
:type require_optional_args: bool, optional
:param fill_unused: if True, mint URIs for any unbound parameters in
the template for each input from the upstream ingress handler,
defaults to False
:type fill_unused: bool, optional
"""
self.mapper = mapper if mapper else lambda x: x
self.upstream = upstream
self.require_optional_args = require_optional_args
if inline:
self.template = template.inline_dependencies()
else:
self.template = template
self.fill_unused = fill_unused

def graph(self, ns: Namespace) -> Graph:
g = Graph()
Expand All @@ -60,10 +73,20 @@ def graph(self, ns: Namespace) -> Graph:
assert records is not None
for rec in records:
bindings = {self.mapper(k): _get_term(v, ns) for k, v in rec.fields.items()}
graph = self.template.evaluate(bindings, require_optional_args=True)
if not isinstance(graph, Graph):
bindings, graph = graph.fill(ns, include_optional=True)
g += graph
graph = self.template.evaluate(
bindings, require_optional_args=self.require_optional_args
)
# if it is a graph then all expected params were provided and we are done!
if isinstance(graph, Graph):
g += graph
continue
# here, we know that the 'graph' variable is actually a Template. If fill_unused
# is True, we use 'fill' on the template to generate a new graph
if self.fill_unused:
_, graph = graph.fill(ns, include_optional=self.require_optional_args)
g += graph
continue
raise Exception(f"Paramaters {graph.parameters} are still unused!")
return g


Expand All @@ -82,6 +105,7 @@ def __init__(
mapper: Optional[Callable[[str], str]],
upstream: RecordIngressHandler,
inline=False,
require_optional_args: bool = True,
):
"""
Create a new TemplateIngress handler
Expand All @@ -98,14 +122,20 @@ def __init__(
:type mapper: Optional[Callable[[str], str]]
:param upstream: the ingress handler from which to source records
:type upstream: RecordIngressHandler
:param inline: if True, inline the template before evaluating it on
each row, defaults to False
:type inline: bool, optional
:param require_optional_args: if True, require that optional arguments in the
chosen template be provided by the upstream ingress handler,
defaults to False
:type require_optional_args: bool, optional
"""
self.chooser = chooser
self.mapper = mapper if mapper else lambda x: x
self.upstream = upstream
self.inline = inline
self.require_optional_args = require_optional_args

def graph(self, ns: Namespace) -> Graph:
g = Graph()
Expand All @@ -118,10 +148,15 @@ def graph(self, ns: Namespace) -> Graph:
assert records is not None
for rec in records:
template = self.chooser(rec)
if template is None:
logging.warning(f"Chooser function does not give a template for {rec}")
continue
if self.inline:
template = template.inline_dependencies()
bindings = {self.mapper(k): _get_term(v, ns) for k, v in rec.fields.items()}
graph = template.evaluate(bindings)
graph = template.evaluate(
bindings, require_optional_args=self.require_optional_args
)
if not isinstance(graph, Graph):
_, graph = graph.fill(ns)
g += graph
Expand All @@ -130,6 +165,8 @@ def graph(self, ns: Namespace) -> Graph:

def _get_term(field_value: str, ns: Namespace) -> Node:
assert isinstance(ns, Namespace), f"{ns} must be a rdflib.Namespace instance"
if isinstance(field_value, (URIRef, Literal, BNode)):
return field_value
try:
uri = URIRef(ns[field_value])
uri.n3() # raises an exception if invalid URI
Expand Down
17 changes: 11 additions & 6 deletions buildingmotif/ingresses/xlsx.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from functools import cached_property
from pathlib import Path
from typing import List
from os import PathLike
from typing import List, Optional

from buildingmotif.ingresses.base import Record, RecordIngressHandler

Expand All @@ -18,15 +18,18 @@ class XLSXIngress(RecordIngressHandler):
field of each Record gives the name of the sheet.
"""

def __init__(self, filename: Path):
def __init__(self, filename: PathLike, limit: Optional[int] = -1):
"""
Path to the .xlsx file to be ingested
:param filename: Path to a .xlsx file
:type filename: Path
:type filename: PathLike
:param limit: The maximum number of rows to read from each sheet. If -1 (default), reads all rows.
:type limit: Optional[int], optional
"""

self.filename = filename
self.limit = limit or -1

@cached_property
def records(self) -> List[Record]:
Expand All @@ -39,11 +42,13 @@ def records(self) -> List[Record]:
:rtype: List[Record]
"""
records = []
wb = load_workbook(self.filename) # noqa
# using data_only means that the cells will contain data, rather than a formula.
wb = load_workbook(self.filename, data_only=True) # noqa
for sheetname in wb.sheetnames:
sheet = wb[sheetname]
columns = [sheet.cell(1, c + 1).value for c in range(sheet.max_column)]
for row in range(2, sheet.max_row + 1):
upper_range = sheet.max_row + 1 if self.limit < 0 else self.limit
for row in range(2, upper_range):
fields = {
columns[c]: sheet.cell(row, c + 1).value
for c in range(sheet.max_column)
Expand Down

0 comments on commit 1f14497

Please sign in to comment.