-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Added Dcat model * Bootstrapped DcatPackage.from_xml * Added mode dcat mappings * Updated model methods * Finished dcat package props * Improved model methods * Mapped dcat resource * Added todos * Renamed parsers -> loaders/dumpers * Improved model methods * Added platform * Removed platform * Removed todos * Mapped single value to graph * Added DcatPackage.from/to_graph * Added dcat namespaces * Mapped package lists * FIxed dcat model * Fixnished dcat model mapping * Sorted DcatResource props * Sorted DcatPackage props * Implemented dcat to dp * Implemented dp to dcat
- Loading branch information
Showing
33 changed files
with
856 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
# dplib-py | ||
# Data Packaging Library | ||
|
||
[![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/dplib-py/general.yaml?branch=main)](https://github.com/frictionlessdata/dplib-py/actions) | ||
[![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/dplib-py/main)](https://codecov.io/gh/frictionlessdata/dplib-py) | ||
[![Release](https://img.shields.io/pypi/v/dplib-py.svg)](https://pypi.python.org/pypi/dplib-py) | ||
[![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/dplib-py) | ||
|
||
Python implementation of the Data Package standard | ||
Python implementation of the Data Package standard and various models and utils for working with datasets. |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# from pydantic import BaseModel, ValidationError | ||
# from pydantic_core import ErrorDetails | ||
|
||
# def schema_check(cls, descriptor: Dict[str, Any]): | ||
# errors: List[ErrorDetails] = [] | ||
# try: | ||
# cls.model_validate(descriptor) | ||
# except ValidationError as e: | ||
# errors = e.errors() | ||
# return errors | ||
|
||
|
||
def schema_check(): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
class Error(Exception): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import os | ||
import shutil | ||
import tempfile | ||
from pathlib import Path | ||
from typing import Any, Optional | ||
|
||
import fsspec # type: ignore | ||
|
||
from ..error import Error | ||
|
||
|
||
def read_file(path: str, *, mode: str = "rt", encoding: str = "utf-8") -> str: | ||
try: | ||
with fsspec.open(path, mode=mode, encoding=encoding) as file: # type: ignore | ||
return file.read() # type: ignore | ||
except Exception as exception: | ||
raise Error(f'Cannot read file "{path}": {exception}') | ||
|
||
|
||
def write_file(path: str, body: Any, *, mode: str = "wt", encoding: str = "utf-8"): | ||
try: | ||
eff_enc = encoding if mode == "wt" else None | ||
with tempfile.NamedTemporaryFile(mode, delete=False, encoding=eff_enc) as file: | ||
file.write(body) | ||
file.flush() | ||
move_file(file.name, path, mode=0o644) | ||
except Exception as exception: | ||
raise Error(f'Cannot write file "{path}": {exception}') | ||
|
||
|
||
def move_file(source: str, target: str, *, mode: Optional[int] = None): | ||
try: | ||
Path(target).parent.mkdir(parents=True, exist_ok=True) | ||
shutil.move(source, target) | ||
if mode: | ||
os.chmod(target, 0o644) | ||
except Exception as exception: | ||
raise Error(f'Cannot move file "{source}:{target}": {exception}') | ||
|
||
|
||
def infer_format(path: str): | ||
format = Path(path).suffix[1:] | ||
if format == "yml": | ||
format = "yaml" | ||
return format or None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,67 @@ | ||
from __future__ import annotations | ||
|
||
import json | ||
import pprint | ||
from typing import Any, Dict, List | ||
from importlib import import_module | ||
from typing import Optional | ||
|
||
from pydantic import BaseModel, ValidationError | ||
from pydantic_core import ErrorDetails | ||
from pydantic import BaseModel | ||
from typing_extensions import Self | ||
|
||
from . import types | ||
from .error import Error | ||
from .helpers.file import infer_format, read_file, write_file | ||
|
||
|
||
class Model(BaseModel, extra="forbid", validate_assignment=True): | ||
custom: types.IData = {} | ||
custom: types.IDict = {} | ||
|
||
def __str__(self) -> str: | ||
return repr(self) | ||
|
||
def __repr__(self) -> str: | ||
return pprint.pformat(self.to_dict(), sort_dicts=False) | ||
|
||
# Validators | ||
|
||
# TODO: rebase on validate_yaml/json/dict? | ||
@classmethod | ||
def validate_descriptor(cls, descriptor: Dict[str, Any]): | ||
errors: List[ErrorDetails] = [] | ||
try: | ||
cls.model_validate(descriptor) | ||
except ValidationError as e: | ||
errors = e.errors() | ||
return errors | ||
# Converters | ||
|
||
# Mappers | ||
def to_path(self, path: str, *, format: Optional[str] = None): | ||
format = format or infer_format(path) | ||
if not format: | ||
raise Error(f"Cannot infer format from path: {path}") | ||
text = self.to_text(format=format) | ||
write_file(path, text) | ||
|
||
@classmethod | ||
def from_yaml(cls, path: str): | ||
pass | ||
def from_path(cls, path: str, *, format: Optional[str] = None) -> Self: | ||
format = format or infer_format(path) | ||
if not format: | ||
raise Error(f"Cannot infer format from path: {path}") | ||
text = read_file(path) | ||
return cls.from_text(text, format=format) # type: ignore | ||
|
||
@classmethod | ||
def to_yaml(cls, path: str): | ||
pass | ||
def to_text(self, *, format: str) -> str: | ||
data = self.to_dict() | ||
if format == "json": | ||
return json.dumps(data) | ||
elif format == "yaml": | ||
yaml = import_module("yaml") | ||
return yaml.dump(data) | ||
raise Error(f"Cannot convert to text for format: {format}") | ||
|
||
@classmethod | ||
def from_json(cls, path: str): | ||
pass | ||
def from_text(cls, text: str, *, format: str) -> Self: | ||
if format == "json": | ||
data = json.loads(text) | ||
return cls.from_dict(data) | ||
elif format == "yaml": | ||
yaml = import_module("yaml") | ||
data = yaml.load(text) | ||
return cls.from_dict(data) | ||
raise Error(f"Cannot create from text with format: {format}") | ||
|
||
@classmethod | ||
def to_json(cls, path: str): | ||
pass | ||
def to_dict(self): | ||
return self.model_dump(mode="json", exclude_unset=True, exclude_none=True) | ||
|
||
@classmethod | ||
def from_dict(cls, data: types.IData): | ||
def from_dict(cls, data: types.IDict) -> Self: | ||
return cls(**data) | ||
|
||
def to_dict(self): | ||
return self.model_dump(mode="json", exclude_unset=True, exclude_none=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .package import DcatPackage | ||
from .resource import DcatResource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from typing import Any | ||
|
||
from rdflib import Graph, URIRef | ||
|
||
from .helpers import create_node | ||
from .types import ISubject | ||
|
||
|
||
def id(g: Graph, identifier: str, *, predicate: URIRef, object: URIRef): | ||
subject = URIRef(identifier) | ||
g.add((subject, predicate, object)) | ||
return subject | ||
|
||
|
||
def node(g: Graph, value: Any, *, subject: ISubject, predicate: URIRef): | ||
object = create_node(value) | ||
g.add((subject, predicate, object)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from typing import Any, Union | ||
from urllib.parse import quote | ||
|
||
from rdflib import Literal, URIRef | ||
|
||
|
||
# https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/profiles.py | ||
def create_node(value: Any) -> Union[URIRef, Literal]: | ||
try: | ||
stripped_value = value.strip() | ||
if stripped_value.startswith("http://") or stripped_value.startswith("https://"): | ||
# only encode this limited subset of characters to avoid more complex URL parsing | ||
# (e.g. valid ? in query string vs. ? as value). | ||
# can be applied multiple times, as encoded %xy is left untouched. Therefore, no | ||
# unquote is necessary beforehand. | ||
quotechars = " !\"$'()*,;<>[]{|}\\^`" | ||
for c in quotechars: | ||
value = value.replace(c, quote(c)) | ||
# although all invalid chars checked by rdflib should have been quoted, try to serialize | ||
# the object. If it breaks, use Literal instead. | ||
value = URIRef(value) | ||
value.n3() | ||
# URI is fine, return the object | ||
return value | ||
else: | ||
return Literal(value) | ||
except Exception: | ||
# In case something goes wrong: use Literal | ||
return Literal(value) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from typing import List, Optional | ||
|
||
from rdflib import Graph, Literal, URIRef | ||
|
||
from .types import IStringNode, ISubject | ||
|
||
|
||
def id(g: Graph, *, predicate: URIRef, object: URIRef) -> Optional[URIRef]: | ||
try: | ||
id = g.value(predicate=predicate, object=object) | ||
if isinstance(id, URIRef): | ||
return id | ||
except Exception: | ||
pass | ||
|
||
|
||
def node(g: Graph, *, subject: ISubject, predicate: URIRef) -> Optional[IStringNode]: | ||
default_lang = "en" | ||
items = list(g.objects(subject, predicate)) | ||
|
||
# Prefer the default language | ||
for item in items: | ||
if isinstance(item, Literal): | ||
if item.language and item.language == default_lang: | ||
return item | ||
|
||
# Otherwise, return the first item | ||
for item in items: | ||
if isinstance(item, (URIRef, Literal)): | ||
return item | ||
|
||
|
||
def string(g: Graph, *, subject: ISubject, predicate: URIRef) -> Optional[str]: | ||
value = node(g, subject=subject, predicate=predicate) | ||
if value: | ||
return str(value) | ||
|
||
|
||
def integer(g: Graph, *, subject: ISubject, predicate: URIRef) -> Optional[int]: | ||
value = node(g, subject=subject, predicate=predicate) | ||
if value: | ||
try: | ||
return int(value) | ||
except Exception: | ||
pass | ||
|
||
|
||
def nodes(g: Graph, *, subject: ISubject, predicate: URIRef) -> List[IStringNode]: | ||
return [ | ||
item | ||
for item in g.objects(subject, predicate) | ||
if isinstance(item, (URIRef, Literal)) | ||
] | ||
|
||
|
||
def strings(g: Graph, *, subject: ISubject, predicate: URIRef) -> List[str]: | ||
return [str(item) for item in nodes(g, subject=subject, predicate=predicate)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from rdflib import Namespace | ||
from rdflib.namespace import FOAF, RDF | ||
|
||
ADMS = Namespace("http://www.w3.org/ns/adms#") | ||
DCAT = Namespace("http://www.w3.org/ns/dcat#") | ||
DCT = Namespace("http://purl.org/dc/terms/") | ||
OWL = Namespace("http://www.w3.org/2002/07/owl#") | ||
|
||
ACCESS_URL = DCAT.accessURL | ||
ACCURAL_PERIODICITY = DCT.accrualPeriodicity | ||
ALTERNATE_IDENTIFIER = ADMS.identifier | ||
BYTE_SIZE = DCAT.byteSize | ||
COMFORMS_TO = DCT.conformsTo | ||
DATASET = DCAT.Dataset | ||
DESCRIPTION = DCT.description | ||
DISTRIBUTION = DCAT.distribution | ||
DOWNLOAD_URL = DCAT.downloadURL | ||
HAS_VERSION = DCT.hasVersion | ||
HOMEPAGE = FOAF.homepage | ||
IDENTIFIER = DCT.identifier | ||
ISSUED = DCT.issued | ||
IS_VERSION_OF = DCT.isVersionOf | ||
KEYWORD = DCAT.keyword | ||
LANDING_PAGE = DCAT.landingPage | ||
LANGUAGE = DCT.language | ||
LICENSE = DCT.license | ||
MEDIA_TYPE = DCAT.mediaType | ||
MODIFIED = DCT.modified | ||
PAGE = FOAF.page | ||
PROVENANCE = DCT.provenance | ||
RELATED_RESOURCE = DCT.relation | ||
SAMPLE = ADMS.sample | ||
SOURCE = DCT.source | ||
THEME = DCAT.theme | ||
TITLE = DCT.title | ||
TYPE = RDF.type | ||
VERSION = OWL.versionInfo | ||
|
||
BINDINGS = { | ||
"adms": ADMS, | ||
"dcat": DCAT, | ||
"dct": DCT, | ||
"owl": OWL, | ||
} |
Oops, something went wrong.