From 4ad6f9f2974a63b58c641c8c66314d4df6d4fa48 Mon Sep 17 00:00:00 2001
From: roll <roll@users.noreply.github.com>
Date: Thu, 7 Dec 2023 15:50:43 +0000
Subject: [PATCH] Dcat mapper (#4)

* Added Dcat model

* Bootstrapped DcatPackage.from_xml

* Added mode dcat mappings

* Updated model methods

* Finished dcat package props

* Improved model methods

* Mapped dcat resource

* Added todos

* Renamed parsers -> loaders/dumpers

* Improved model methods

* Added platform

* Removed platform

* Removed todos

* Mapped single value to graph

* Added DcatPackage.from/to_graph

* Added dcat namespaces

* Mapped package lists

* FIxed dcat model

* Fixnished dcat model mapping

* Sorted DcatResource props

* Sorted DcatPackage props

* Implemented dcat to dp

* Implemented dp to dcat
---
 README.md                                |   4 +-
 dplib/actions/__init__.py                |   0
 dplib/actions/schema/__init__.py         |   0
 dplib/actions/schema/check.py            |  14 +
 dplib/error.py                           |   2 +
 dplib/helpers/file.py                    |  45 +++
 dplib/helpers/resource.py                |   4 +-
 dplib/model.py                           |  73 +++--
 dplib/models/resource/resource.py        |   8 +-
 dplib/plugins/ckan/models/package.py     |   2 +-
 dplib/plugins/ckan/models/resource.py    |   6 +-
 dplib/plugins/cli/__init__.py            |   0
 dplib/plugins/datacite/models/package.py |   2 +-
 dplib/plugins/dcat/models/__init__.py    |   2 +
 dplib/plugins/dcat/models/dumpers.py     |  17 ++
 dplib/plugins/dcat/models/helpers.py     |  29 ++
 dplib/plugins/dcat/models/loaders.py     |  57 ++++
 dplib/plugins/dcat/models/namespaces.py  |  44 +++
 dplib/plugins/dcat/models/package.py     | 356 +++++++++++++++++++++++
 dplib/plugins/dcat/models/resource.py    | 208 +++++++++++++
 dplib/plugins/dcat/models/types.py       |   6 +
 dplib/plugins/github/models/package.py   |   2 +-
 dplib/plugins/github/models/resource.py  |   6 +-
 dplib/plugins/pandas/models/field.py     |   2 +-
 dplib/plugins/pandas/models/schema.py    |   2 +-
 dplib/plugins/polars/models/field.py     |   2 +-
 dplib/plugins/polars/models/schema.py    |   2 +-
 dplib/plugins/sql/models/field.py        |   2 +-
 dplib/plugins/sql/models/schema.py       |   2 +-
 dplib/plugins/zenodo/models/package.py   |   2 +-
 dplib/plugins/zenodo/models/resource.py  |   6 +-
 dplib/types.py                           |   2 +-
 pyproject.toml                           |   6 +-
 33 files changed, 856 insertions(+), 59 deletions(-)
 create mode 100644 dplib/actions/__init__.py
 create mode 100644 dplib/actions/schema/__init__.py
 create mode 100644 dplib/actions/schema/check.py
 create mode 100644 dplib/error.py
 create mode 100644 dplib/helpers/file.py
 create mode 100644 dplib/plugins/cli/__init__.py
 create mode 100644 dplib/plugins/dcat/models/dumpers.py
 create mode 100644 dplib/plugins/dcat/models/helpers.py
 create mode 100644 dplib/plugins/dcat/models/loaders.py
 create mode 100644 dplib/plugins/dcat/models/namespaces.py
 create mode 100644 dplib/plugins/dcat/models/types.py

diff --git a/README.md b/README.md
index eaec119..415aa92 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
-# dplib-py
+# Data Packaging Library
 
 [![Build](https://img.shields.io/github/actions/workflow/status/frictionlessdata/dplib-py/general.yaml?branch=main)](https://github.com/frictionlessdata/dplib-py/actions)
 [![Coverage](https://img.shields.io/codecov/c/github/frictionlessdata/dplib-py/main)](https://codecov.io/gh/frictionlessdata/dplib-py)
 [![Release](https://img.shields.io/pypi/v/dplib-py.svg)](https://pypi.python.org/pypi/dplib-py)
 [![Codebase](https://img.shields.io/badge/codebase-github-brightgreen)](https://github.com/frictionlessdata/dplib-py)
 
-Python implementation of the Data Package standard
+Python implementation of the Data Package standard and various models and utils for working with datasets.
diff --git a/dplib/actions/__init__.py b/dplib/actions/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dplib/actions/schema/__init__.py b/dplib/actions/schema/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dplib/actions/schema/check.py b/dplib/actions/schema/check.py
new file mode 100644
index 0000000..1f12771
--- /dev/null
+++ b/dplib/actions/schema/check.py
@@ -0,0 +1,14 @@
+#  from pydantic import BaseModel, ValidationError
+#  from pydantic_core import ErrorDetails
+
+#  def schema_check(cls, descriptor: Dict[str, Any]):
+#  errors: List[ErrorDetails] = []
+#  try:
+#  cls.model_validate(descriptor)
+#  except ValidationError as e:
+#  errors = e.errors()
+#  return errors
+
+
+def schema_check():
+    pass
diff --git a/dplib/error.py b/dplib/error.py
new file mode 100644
index 0000000..9c69c25
--- /dev/null
+++ b/dplib/error.py
@@ -0,0 +1,2 @@
+class Error(Exception):
+    pass
diff --git a/dplib/helpers/file.py b/dplib/helpers/file.py
new file mode 100644
index 0000000..87e6ce7
--- /dev/null
+++ b/dplib/helpers/file.py
@@ -0,0 +1,45 @@
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any, Optional
+
+import fsspec  # type: ignore
+
+from ..error import Error
+
+
+def read_file(path: str, *, mode: str = "rt", encoding: str = "utf-8") -> str:
+    try:
+        with fsspec.open(path, mode=mode, encoding=encoding) as file:  # type: ignore
+            return file.read()  # type: ignore
+    except Exception as exception:
+        raise Error(f'Cannot read file "{path}": {exception}')
+
+
+def write_file(path: str, body: Any, *, mode: str = "wt", encoding: str = "utf-8"):
+    try:
+        eff_enc = encoding if mode == "wt" else None
+        with tempfile.NamedTemporaryFile(mode, delete=False, encoding=eff_enc) as file:
+            file.write(body)
+            file.flush()
+        move_file(file.name, path, mode=0o644)
+    except Exception as exception:
+        raise Error(f'Cannot write file "{path}": {exception}')
+
+
+def move_file(source: str, target: str, *, mode: Optional[int] = None):
+    try:
+        Path(target).parent.mkdir(parents=True, exist_ok=True)
+        shutil.move(source, target)
+        if mode:
+            os.chmod(target, 0o644)
+    except Exception as exception:
+        raise Error(f'Cannot move file "{source}:{target}": {exception}')
+
+
+def infer_format(path: str):
+    format = Path(path).suffix[1:]
+    if format == "yml":
+        format = "yaml"
+    return format or None
diff --git a/dplib/helpers/resource.py b/dplib/helpers/resource.py
index 26f2721..89aace4 100644
--- a/dplib/helpers/resource.py
+++ b/dplib/helpers/resource.py
@@ -3,5 +3,5 @@
 from slugify import slugify
 
 
-def path_to_name(path: str) -> str:
-    return slugify(Path(path).stem, separator="_")
+def slugify_name(name: str) -> str:
+    return slugify(Path(name).stem, separator="_")
diff --git a/dplib/model.py b/dplib/model.py
index 9a1e5b2..a4c22da 100644
--- a/dplib/model.py
+++ b/dplib/model.py
@@ -1,14 +1,20 @@
+from __future__ import annotations
+
+import json
 import pprint
-from typing import Any, Dict, List
+from importlib import import_module
+from typing import Optional
 
-from pydantic import BaseModel, ValidationError
-from pydantic_core import ErrorDetails
+from pydantic import BaseModel
+from typing_extensions import Self
 
 from . import types
+from .error import Error
+from .helpers.file import infer_format, read_file, write_file
 
 
 class Model(BaseModel, extra="forbid", validate_assignment=True):
-    custom: types.IData = {}
+    custom: types.IDict = {}
 
     def __str__(self) -> str:
         return repr(self)
@@ -16,39 +22,46 @@ def __str__(self) -> str:
     def __repr__(self) -> str:
         return pprint.pformat(self.to_dict(), sort_dicts=False)
 
-    # Validators
-
-    # TODO: rebase on validate_yaml/json/dict?
-    @classmethod
-    def validate_descriptor(cls, descriptor: Dict[str, Any]):
-        errors: List[ErrorDetails] = []
-        try:
-            cls.model_validate(descriptor)
-        except ValidationError as e:
-            errors = e.errors()
-        return errors
+    # Converters
 
-    # Mappers
+    def to_path(self, path: str, *, format: Optional[str] = None):
+        format = format or infer_format(path)
+        if not format:
+            raise Error(f"Cannot infer format from path: {path}")
+        text = self.to_text(format=format)
+        write_file(path, text)
 
     @classmethod
-    def from_yaml(cls, path: str):
-        pass
+    def from_path(cls, path: str, *, format: Optional[str] = None) -> Self:
+        format = format or infer_format(path)
+        if not format:
+            raise Error(f"Cannot infer format from path: {path}")
+        text = read_file(path)
+        return cls.from_text(text, format=format)  # type: ignore
 
-    @classmethod
-    def to_yaml(cls, path: str):
-        pass
+    def to_text(self, *, format: str) -> str:
+        data = self.to_dict()
+        if format == "json":
+            return json.dumps(data)
+        elif format == "yaml":
+            yaml = import_module("yaml")
+            return yaml.dump(data)
+        raise Error(f"Cannot convert to text for format: {format}")
 
     @classmethod
-    def from_json(cls, path: str):
-        pass
+    def from_text(cls, text: str, *, format: str) -> Self:
+        if format == "json":
+            data = json.loads(text)
+            return cls.from_dict(data)
+        elif format == "yaml":
+            yaml = import_module("yaml")
+            data = yaml.load(text)
+            return cls.from_dict(data)
+        raise Error(f"Cannot create from text with format: {format}")
 
-    @classmethod
-    def to_json(cls, path: str):
-        pass
+    def to_dict(self):
+        return self.model_dump(mode="json", exclude_unset=True, exclude_none=True)
 
     @classmethod
-    def from_dict(cls, data: types.IData):
+    def from_dict(cls, data: types.IDict) -> Self:
         return cls(**data)
-
-    def to_dict(self):
-        return self.model_dump(mode="json", exclude_unset=True, exclude_none=True)
diff --git a/dplib/models/resource/resource.py b/dplib/models/resource/resource.py
index b6f32e1..fb92261 100644
--- a/dplib/models/resource/resource.py
+++ b/dplib/models/resource/resource.py
@@ -17,7 +17,7 @@ class Resource(Model):
     profile: Optional[str] = None
 
     path: Optional[str] = None
-    data: Optional[types.IData] = None
+    data: Optional[types.IDict] = None
 
     dialect: Optional[Dialect] = None
     schema: Optional[Schema] = None  # type: ignore
@@ -29,9 +29,9 @@ class Resource(Model):
     encoding: Optional[str] = None
     bytes: Optional[int] = None
     hash: Optional[str] = None
-    sources: Optional[List[Source]] = None
-    licenses: Optional[List[License]] = None
-    contributors: Optional[List[Contributor]] = None
+    sources: List[Source] = []
+    licenses: List[License] = []
+    contributors: List[Contributor] = []
 
     @property
     def parsed_hash(self) -> Optional[ParsedHash]:
diff --git a/dplib/plugins/ckan/models/package.py b/dplib/plugins/ckan/models/package.py
index 76c9afa..fa89dc1 100644
--- a/dplib/plugins/ckan/models/package.py
+++ b/dplib/plugins/ckan/models/package.py
@@ -35,7 +35,7 @@ class CkanPackage(Model):
     metadata_created: Optional[str] = None
     metadata_modified: Optional[str] = None
 
-    # Mappers
+    # Converters
 
     def to_dp(self):
         package = Package()
diff --git a/dplib/plugins/ckan/models/resource.py b/dplib/plugins/ckan/models/resource.py
index 71054b3..674e7ea 100644
--- a/dplib/plugins/ckan/models/resource.py
+++ b/dplib/plugins/ckan/models/resource.py
@@ -2,7 +2,7 @@
 
 from typing import Optional
 
-from dplib.helpers.resource import path_to_name
+from dplib.helpers.resource import slugify_name
 from dplib.model import Model
 from dplib.models import Resource
 
@@ -19,10 +19,10 @@ class CkanResource(Model):
     mimetype: Optional[str] = None
     size: Optional[int] = None
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Resource:
-        resource = Resource(path=self.name, name=path_to_name(self.name))
+        resource = Resource(path=self.name, name=slugify_name(self.name))
 
         # Format
         if self.format:
diff --git a/dplib/plugins/cli/__init__.py b/dplib/plugins/cli/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dplib/plugins/datacite/models/package.py b/dplib/plugins/datacite/models/package.py
index 13074c3..523b25a 100644
--- a/dplib/plugins/datacite/models/package.py
+++ b/dplib/plugins/datacite/models/package.py
@@ -33,7 +33,7 @@ class DatacitePackage(Model):
     subjects: List[DataciteSubject] = []
     titles: List[DataciteTitle] = []
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Package:
         package = Package()
diff --git a/dplib/plugins/dcat/models/__init__.py b/dplib/plugins/dcat/models/__init__.py
index e69de29..9ac46c1 100644
--- a/dplib/plugins/dcat/models/__init__.py
+++ b/dplib/plugins/dcat/models/__init__.py
@@ -0,0 +1,2 @@
+from .package import DcatPackage
+from .resource import DcatResource
diff --git a/dplib/plugins/dcat/models/dumpers.py b/dplib/plugins/dcat/models/dumpers.py
new file mode 100644
index 0000000..b3b158e
--- /dev/null
+++ b/dplib/plugins/dcat/models/dumpers.py
@@ -0,0 +1,17 @@
+from typing import Any
+
+from rdflib import Graph, URIRef
+
+from .helpers import create_node
+from .types import ISubject
+
+
+def id(g: Graph, identifier: str, *, predicate: URIRef, object: URIRef):
+    subject = URIRef(identifier)
+    g.add((subject, predicate, object))
+    return subject
+
+
+def node(g: Graph, value: Any, *, subject: ISubject, predicate: URIRef):
+    object = create_node(value)
+    g.add((subject, predicate, object))
diff --git a/dplib/plugins/dcat/models/helpers.py b/dplib/plugins/dcat/models/helpers.py
new file mode 100644
index 0000000..521e51e
--- /dev/null
+++ b/dplib/plugins/dcat/models/helpers.py
@@ -0,0 +1,29 @@
+from typing import Any, Union
+from urllib.parse import quote
+
+from rdflib import Literal, URIRef
+
+
+# https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/profiles.py
+def create_node(value: Any) -> Union[URIRef, Literal]:
+    try:
+        stripped_value = value.strip()
+        if stripped_value.startswith("http://") or stripped_value.startswith("https://"):
+            # only encode this limited subset of characters to avoid more complex URL parsing
+            # (e.g. valid ? in query string vs. ? as value).
+            # can be applied multiple times, as encoded %xy is left untouched. Therefore, no
+            # unquote is necessary beforehand.
+            quotechars = " !\"$'()*,;<>[]{|}\\^`"
+            for c in quotechars:
+                value = value.replace(c, quote(c))
+            # although all invalid chars checked by rdflib should have been quoted, try to serialize
+            # the object. If it breaks, use Literal instead.
+            value = URIRef(value)
+            value.n3()
+            # URI is fine, return the object
+            return value
+        else:
+            return Literal(value)
+    except Exception:
+        # In case something goes wrong: use Literal
+        return Literal(value)
diff --git a/dplib/plugins/dcat/models/loaders.py b/dplib/plugins/dcat/models/loaders.py
new file mode 100644
index 0000000..261d508
--- /dev/null
+++ b/dplib/plugins/dcat/models/loaders.py
@@ -0,0 +1,57 @@
+from typing import List, Optional
+
+from rdflib import Graph, Literal, URIRef
+
+from .types import IStringNode, ISubject
+
+
+def id(g: Graph, *, predicate: URIRef, object: URIRef) -> Optional[URIRef]:
+    try:
+        id = g.value(predicate=predicate, object=object)
+        if isinstance(id, URIRef):
+            return id
+    except Exception:
+        pass
+
+
+def node(g: Graph, *, subject: ISubject, predicate: URIRef) -> Optional[IStringNode]:
+    default_lang = "en"
+    items = list(g.objects(subject, predicate))
+
+    # Prefer the default language
+    for item in items:
+        if isinstance(item, Literal):
+            if item.language and item.language == default_lang:
+                return item
+
+    # Otherwise, return the first item
+    for item in items:
+        if isinstance(item, (URIRef, Literal)):
+            return item
+
+
+def string(g: Graph, *, subject: ISubject, predicate: URIRef) -> Optional[str]:
+    value = node(g, subject=subject, predicate=predicate)
+    if value:
+        return str(value)
+
+
+def integer(g: Graph, *, subject: ISubject, predicate: URIRef) -> Optional[int]:
+    value = node(g, subject=subject, predicate=predicate)
+    if value:
+        try:
+            return int(value)
+        except Exception:
+            pass
+
+
+def nodes(g: Graph, *, subject: ISubject, predicate: URIRef) -> List[IStringNode]:
+    return [
+        item
+        for item in g.objects(subject, predicate)
+        if isinstance(item, (URIRef, Literal))
+    ]
+
+
+def strings(g: Graph, *, subject: ISubject, predicate: URIRef) -> List[str]:
+    return [str(item) for item in nodes(g, subject=subject, predicate=predicate)]
diff --git a/dplib/plugins/dcat/models/namespaces.py b/dplib/plugins/dcat/models/namespaces.py
new file mode 100644
index 0000000..08e4604
--- /dev/null
+++ b/dplib/plugins/dcat/models/namespaces.py
@@ -0,0 +1,44 @@
+from rdflib import Namespace
+from rdflib.namespace import FOAF, RDF
+
+ADMS = Namespace("http://www.w3.org/ns/adms#")
+DCAT = Namespace("http://www.w3.org/ns/dcat#")
+DCT = Namespace("http://purl.org/dc/terms/")
+OWL = Namespace("http://www.w3.org/2002/07/owl#")
+
+ACCESS_URL = DCAT.accessURL
+ACCURAL_PERIODICITY = DCT.accrualPeriodicity
+ALTERNATE_IDENTIFIER = ADMS.identifier
+BYTE_SIZE = DCAT.byteSize
+COMFORMS_TO = DCT.conformsTo
+DATASET = DCAT.Dataset
+DESCRIPTION = DCT.description
+DISTRIBUTION = DCAT.distribution
+DOWNLOAD_URL = DCAT.downloadURL
+HAS_VERSION = DCT.hasVersion
+HOMEPAGE = FOAF.homepage
+IDENTIFIER = DCT.identifier
+ISSUED = DCT.issued
+IS_VERSION_OF = DCT.isVersionOf
+KEYWORD = DCAT.keyword
+LANDING_PAGE = DCAT.landingPage
+LANGUAGE = DCT.language
+LICENSE = DCT.license
+MEDIA_TYPE = DCAT.mediaType
+MODIFIED = DCT.modified
+PAGE = FOAF.page
+PROVENANCE = DCT.provenance
+RELATED_RESOURCE = DCT.relation
+SAMPLE = ADMS.sample
+SOURCE = DCT.source
+THEME = DCAT.theme
+TITLE = DCT.title
+TYPE = RDF.type
+VERSION = OWL.versionInfo
+
+BINDINGS = {
+    "adms": ADMS,
+    "dcat": DCAT,
+    "dct": DCT,
+    "owl": OWL,
+}
diff --git a/dplib/plugins/dcat/models/package.py b/dplib/plugins/dcat/models/package.py
index e69de29..b4487ea 100644
--- a/dplib/plugins/dcat/models/package.py
+++ b/dplib/plugins/dcat/models/package.py
@@ -0,0 +1,356 @@
+from __future__ import annotations
+
+from typing import List, Optional
+
+from rdflib import BNode, Graph, URIRef
+
+from dplib.error import Error
+from dplib.model import Model
+from dplib.models import Package
+
+from . import dumpers, loaders
+from . import namespaces as ns
+from .resource import DcatResource
+
+# References:
+# - https://www.w3.org/TR/vocab-dcat-2/
+# - https://joinup.ec.europa.eu/asset/dcat_application_profile
+# - https://github.com/ckan/ckanext-dcat/blob/master/ckanext/dcat/profiles.py
+
+
+class DcatPackage(Model):
+    identifier: Optional[str] = None
+    distributions: List[DcatResource] = []
+
+    accural_periodicity: Optional[str] = None
+    alternate_identifiers: List[str] = []
+    comforms_to: List[str] = []
+    description: Optional[str] = None
+    has_versions: List[str] = []
+    homepage: Optional[str] = None
+    issued: Optional[str] = None
+    is_version_of: List[str] = []
+    keywords: List[str] = []
+    landing_page: Optional[str] = None
+    languages: List[str] = []
+    modified: Optional[str] = None
+    pages: List[str] = []
+    provenance: Optional[str] = None
+    related_resources: List[str] = []
+    samples: List[str] = []
+    sources: List[str] = []
+    themes: List[str] = []
+    title: Optional[str] = None
+    version: Optional[str] = None
+
+    # Converters
+
+    def to_text(self, *, format: str):
+        g = self.to_graph()
+        return g.serialize(format=format)
+
+    @classmethod
+    def from_text(cls, text: str, *, format: str):
+        g = Graph()
+        g.parse(data=text, format=format)
+        return cls.from_graph(g)
+
+    def to_graph(self):
+        g = Graph()
+        for prefix, namespace in ns.BINDINGS.items():
+            g.bind(prefix, namespace)
+
+        # Identifier
+        if not self.identifier:
+            raise Error(f"Cannot dump DCAT package without identifier: {self}")
+        id = dumpers.id(g, self.identifier, predicate=ns.TYPE, object=ns.DATASET)
+
+        # Accural periodicity
+        if self.accural_periodicity:
+            dumpers.node(
+                g, self.accural_periodicity, subject=id, predicate=ns.ACCURAL_PERIODICITY
+            )
+
+        # Alternate identifiers
+        for identifier in self.alternate_identifiers:
+            dumpers.node(g, identifier, subject=id, predicate=ns.ALTERNATE_IDENTIFIER)
+
+        # Conforms to
+        for conforms_to in self.comforms_to:
+            dumpers.node(g, conforms_to, subject=id, predicate=ns.COMFORMS_TO)
+
+        # Description
+        if self.description:
+            dumpers.node(g, self.description, subject=id, predicate=ns.DESCRIPTION)
+
+        # Has versions
+        for has_version in self.has_versions:
+            dumpers.node(g, has_version, subject=id, predicate=ns.HAS_VERSION)
+
+        # Homepage
+        if self.homepage:
+            dumpers.node(g, self.homepage, subject=id, predicate=ns.HOMEPAGE)
+
+        # Issued
+        if self.issued:
+            dumpers.node(g, self.issued, subject=id, predicate=ns.ISSUED)
+
+        # Is version of
+        for is_version_of in self.is_version_of:
+            dumpers.node(g, is_version_of, subject=id, predicate=ns.IS_VERSION_OF)
+
+        # Keywords
+        for keyword in self.keywords:
+            dumpers.node(g, keyword, subject=id, predicate=ns.KEYWORD)
+
+        # Landing page
+        if self.landing_page:
+            dumpers.node(g, self.landing_page, subject=id, predicate=ns.LANDING_PAGE)
+
+        # Languages
+        for language in self.languages:
+            dumpers.node(g, language, subject=id, predicate=ns.LANGUAGE)
+
+        # Modified
+        if self.modified:
+            dumpers.node(g, self.modified, subject=id, predicate=ns.MODIFIED)
+
+        # Pages
+        for page in self.pages:
+            dumpers.node(g, page, subject=id, predicate=ns.PAGE)
+
+        # Provenance
+        if self.provenance:
+            dumpers.node(g, self.provenance, subject=id, predicate=ns.PROVENANCE)
+
+        # Related resources
+        for related_resource in self.related_resources:
+            dumpers.node(g, related_resource, subject=id, predicate=ns.RELATED_RESOURCE)
+
+        # Samples
+        for sample in self.samples:
+            dumpers.node(g, sample, subject=id, predicate=ns.SAMPLE)
+
+        # Sources
+        for source in self.sources:
+            dumpers.node(g, source, subject=id, predicate=ns.SOURCE)
+
+        # Themes
+        for theme in self.themes:
+            dumpers.node(g, theme, subject=id, predicate=ns.THEME)
+
+        # Title
+        if self.title:
+            dumpers.node(g, self.title, subject=id, predicate=ns.TITLE)
+
+        # Version
+        if self.version:
+            dumpers.node(g, self.version, subject=id, predicate=ns.VERSION)
+
+        # Distributions
+        for distribution in self.distributions:
+            distribution_id = BNode()
+            g.add((id, ns.DISTRIBUTION, distribution_id))
+            g.add((distribution_id, ns.TYPE, ns.DISTRIBUTION))
+            distribution.to_graph(g, id=distribution_id)
+
+        return g
+
+    @classmethod
+    def from_graph(cls, g: Graph):
+        package = DcatPackage()
+
+        # Identifier
+        id = loaders.id(g, predicate=ns.TYPE, object=ns.DATASET)
+        if not id:
+            raise Error(f"Cannot load DCAT package without identifier: {g}")
+        package.identifier = str(id)
+
+        # Accural periodicity
+        periodicity = loaders.string(g, subject=id, predicate=ns.ACCURAL_PERIODICITY)
+        if periodicity:
+            package.accural_periodicity = periodicity
+
+        # Alternate identifiers
+        identifiers = loaders.strings(g, subject=id, predicate=ns.ALTERNATE_IDENTIFIER)
+        if identifiers:
+            package.alternate_identifiers = identifiers
+
+        # Conforms to
+        conforms_to = loaders.strings(g, subject=id, predicate=ns.COMFORMS_TO)
+        if conforms_to:
+            package.comforms_to = conforms_to
+
+        # Description
+        description = loaders.string(g, subject=id, predicate=ns.DESCRIPTION)
+        if description:
+            package.description = description
+
+        # Has versions
+        has_versions = loaders.strings(g, subject=id, predicate=ns.HAS_VERSION)
+        if has_versions:
+            package.has_versions = has_versions
+
+        # Homepage
+        homepage = loaders.string(g, subject=id, predicate=ns.HOMEPAGE)
+        if homepage:
+            package.homepage = homepage
+
+        # Issued
+        issued = loaders.string(g, subject=id, predicate=ns.ISSUED)
+        if issued:
+            package.issued = issued
+
+        # Is version of
+        is_version_of = loaders.strings(g, subject=id, predicate=ns.IS_VERSION_OF)
+        if is_version_of:
+            package.is_version_of = is_version_of
+
+        # Keywords
+        keywords = loaders.strings(g, subject=id, predicate=ns.KEYWORD)
+        if keywords:
+            package.keywords = keywords
+
+        # Landing page
+        landing_page = loaders.string(g, subject=id, predicate=ns.LANDING_PAGE)
+        if landing_page:
+            package.landing_page = landing_page
+
+        # Languages
+        languages = loaders.strings(g, subject=id, predicate=ns.LANGUAGE)
+        if languages:
+            package.languages = languages
+
+        # Modified
+        modified = loaders.string(g, subject=id, predicate=ns.MODIFIED)
+        if modified:
+            package.modified = modified
+
+        # Pages
+        pages = loaders.strings(g, subject=id, predicate=ns.PAGE)
+        if pages:
+            package.pages = pages
+
+        # Provenance
+        provenance = loaders.string(g, subject=id, predicate=ns.PROVENANCE)
+        if provenance:
+            package.provenance = provenance
+
+        # Related resources
+        related_resources = loaders.strings(g, subject=id, predicate=ns.RELATED_RESOURCE)
+        if related_resources:
+            package.related_resources = related_resources
+
+        # Samples
+        samples = loaders.strings(g, subject=id, predicate=ns.SAMPLE)
+        if samples:
+            package.samples = samples
+
+        # Sources
+        sources = loaders.strings(g, subject=id, predicate=ns.SOURCE)
+        if sources:
+            package.sources = sources
+
+        # Themes
+        themes = loaders.strings(g, subject=id, predicate=ns.THEME)
+        if themes:
+            package.themes = themes
+
+        # Title
+        title = loaders.string(g, subject=id, predicate=ns.TITLE)
+        if title:
+            package.title = title
+
+        # Version
+        version = loaders.string(g, subject=id, predicate=ns.VERSION)
+        if version:
+            package.version = version
+
+        # Distributions
+        distributions = g.objects(subject=id, predicate=ns.DISTRIBUTION)
+        for distribution in distributions:
+            if isinstance(distribution, (URIRef, BNode)):
+                resource = DcatResource.from_graph(g, id=distribution)
+                package.distributions.append(resource)
+
+        return package
+
+    def to_dp(self):
+        package = Package()
+
+        # Id
+        if self.identifier:
+            package.id = self.identifier
+
+        # Title
+        if self.title:
+            package.title = self.title
+
+        # Description
+        if self.description:
+            package.description = self.description
+
+        # Version
+        if self.version:
+            package.version = self.version
+
+        # Homepage
+        if self.homepage:
+            package.homepage = self.homepage
+
+        # Created
+        if self.issued:
+            if "T" in self.issued:
+                package.created = self.issued
+
+        # Keywords
+        for keyword in self.keywords:
+            package.keywords.append(keyword)
+
+        # Resources
+        for distribution in self.distributions:
+            resource = distribution.to_dp()
+            if resource:
+                package.resources.append(resource)
+
+        return package
+
+    @classmethod
+    def from_dp(cls, package: Package):
+        dcat = DcatPackage()
+
+        # Identifier
+        if package.id:
+            dcat.identifier = package.id
+
+        # Title
+        if package.title:
+            dcat.title = package.title
+
+        # Description
+        if package.description:
+            dcat.description = package.description
+
+        # Version
+        if package.version:
+            dcat.version = package.version
+
+        # Homepage
+        if package.homepage:
+            dcat.homepage = package.homepage
+
+        # Issued
+        if package.created:
+            dcat.issued = package.created
+
+        # Keywords
+        for keyword in package.keywords:
+            dcat.keywords.append(keyword)
+
+        # Resources
+        for resource in package.resources:
+            distribution = DcatResource.from_dp(resource)
+            if distribution:
+                dcat.distributions.append(distribution)
+
+        return dcat
diff --git a/dplib/plugins/dcat/models/resource.py b/dplib/plugins/dcat/models/resource.py
index e69de29..16c0937 100644
--- a/dplib/plugins/dcat/models/resource.py
+++ b/dplib/plugins/dcat/models/resource.py
@@ -0,0 +1,208 @@
+from __future__ import annotations
+
+from typing import List, Optional
+
+from rdflib import BNode, Graph
+
+from dplib.helpers.resource import slugify_name
+from dplib.model import Model
+from dplib.models import License, Resource
+
+from . import dumpers, loaders
+from . import namespaces as ns
+from .types import ISubject
+
+
+class DcatResource(Model):
+    access_url: Optional[str] = None
+    byte_size: Optional[int] = None
+    conforms_to: List[str] = []
+    description: Optional[str] = None
+    download_url: Optional[str] = None
+    issued: Optional[str] = None
+    languages: List[str] = []
+    license: Optional[str] = None
+    media_type: Optional[str] = None
+    modified: Optional[str] = None
+    pages: List[str] = []
+    title: Optional[str] = None
+
+    # Converters
+
+    def to_graph(self, g: Graph, *, id: BNode):
+        # Access URL
+        if self.access_url:
+            dumpers.node(g, self.access_url, subject=id, predicate=ns.ACCESS_URL)
+
+        # Byte size
+        if self.byte_size:
+            dumpers.node(g, self.byte_size, subject=id, predicate=ns.BYTE_SIZE)
+
+        # Conforms to
+        for conforms_to in self.conforms_to:
+            dumpers.node(g, conforms_to, subject=id, predicate=ns.COMFORMS_TO)
+
+        # Description
+        if self.description:
+            dumpers.node(g, self.description, subject=id, predicate=ns.DESCRIPTION)
+
+        # Download URL
+        if self.download_url:
+            dumpers.node(g, self.download_url, subject=id, predicate=ns.DOWNLOAD_URL)
+
+        # Issued
+        if self.issued:
+            dumpers.node(g, self.issued, subject=id, predicate=ns.ISSUED)
+
+        # Languages
+        for language in self.languages:
+            dumpers.node(g, language, subject=id, predicate=ns.LANGUAGE)
+
+        # License
+        if self.license:
+            dumpers.node(g, self.license, subject=id, predicate=ns.LICENSE)
+
+        # Media type
+        if self.media_type:
+            dumpers.node(g, self.media_type, subject=id, predicate=ns.MEDIA_TYPE)
+
+        # Modified
+        if self.modified:
+            dumpers.node(g, self.modified, subject=id, predicate=ns.MODIFIED)
+
+        # Pages
+        for page in self.pages:
+            dumpers.node(g, page, subject=id, predicate=ns.PAGE)
+
+        # Title
+        if self.title:
+            dumpers.node(g, self.title, subject=id, predicate=ns.TITLE)
+
+        return g
+
+    @classmethod
+    def from_graph(cls, g: Graph, *, id: ISubject) -> DcatResource:
+        resource = DcatResource()
+
+        # Access URL
+        access_url = loaders.string(g, subject=id, predicate=ns.ACCESS_URL)
+        if access_url:
+            resource.access_url = access_url
+
+        # Byte size
+        byte_size = loaders.integer(g, subject=id, predicate=ns.BYTE_SIZE)
+        if byte_size:
+            resource.byte_size = byte_size
+
+        # Conforms to
+        conforms_to = loaders.strings(g, subject=id, predicate=ns.COMFORMS_TO)
+        if conforms_to:
+            resource.conforms_to = conforms_to
+
+        # Description
+        description = loaders.string(g, subject=id, predicate=ns.DESCRIPTION)
+        if description:
+            resource.description = description
+
+        # Download URL
+        download_url = loaders.string(g, subject=id, predicate=ns.DOWNLOAD_URL)
+        if download_url:
+            resource.download_url = download_url
+
+        # Issued
+        issued = loaders.string(g, subject=id, predicate=ns.ISSUED)
+        if issued:
+            resource.issued = issued
+
+        # Languages
+        languages = loaders.strings(g, subject=id, predicate=ns.LANGUAGE)
+        if languages:
+            resource.languages = languages
+
+        # License
+        license = loaders.string(g, subject=id, predicate=ns.LICENSE)
+        if license:
+            resource.license = license
+
+        # Media type
+        media_type = loaders.string(g, subject=id, predicate=ns.MEDIA_TYPE)
+        if media_type:
+            resource.media_type = media_type
+
+        # Modified
+        modified = loaders.string(g, subject=id, predicate=ns.MODIFIED)
+        if modified:
+            resource.modified = modified
+
+        # Pages
+        pages = loaders.strings(g, subject=id, predicate=ns.PAGE)
+        if pages:
+            resource.pages = pages
+
+        # Title
+        title = loaders.string(g, subject=id, predicate=ns.TITLE)
+        if title:
+            resource.title = title
+
+        return resource
+
+    def to_dp(self) -> Optional[Resource]:
+        if not self.download_url:
+            return
+        resource = Resource(path=self.download_url, name=slugify_name(self.download_url))
+
+        # Title
+        if self.title:
+            resource.title = self.title
+
+        # Description
+        if self.description:
+            resource.description = self.description
+
+        # Media type
+        if self.media_type:
+            resource.mediatype = self.media_type
+
+        # Bytes
+        if self.byte_size:
+            resource.bytes = self.byte_size
+
+        # Licenses
+        if self.license:
+            license = License(path=self.license)
+            resource.licenses.append(license)
+
+        return resource
+
+    @classmethod
+    def from_dp(cls, resource: Resource) -> DcatResource:
+        dcat = DcatResource()
+
+        # Download URL
+        # TODO: improve logic -- use basepath and allow only urls
+        if resource.path:
+            dcat.download_url = resource.path
+
+        # Title
+        if resource.title:
+            dcat.title = resource.title
+
+        # Description
+        if resource.description:
+            dcat.description = resource.description
+
+        # Media type
+        if resource.mediatype:
+            dcat.media_type = resource.mediatype
+
+        # Bytes
+        if resource.bytes:
+            dcat.byte_size = resource.bytes
+
+        # Licenses
+        if resource.licenses:
+            license = resource.licenses[0]
+            if license.path:
+                dcat.license = license.path
+
+        return dcat
diff --git a/dplib/plugins/dcat/models/types.py b/dplib/plugins/dcat/models/types.py
new file mode 100644
index 0000000..df27f04
--- /dev/null
+++ b/dplib/plugins/dcat/models/types.py
@@ -0,0 +1,6 @@
+from typing import Union
+
+from rdflib import BNode, Literal, URIRef
+
+ISubject = Union[URIRef, BNode]
+IStringNode = Union[URIRef, Literal]
diff --git a/dplib/plugins/github/models/package.py b/dplib/plugins/github/models/package.py
index cd19b34..eeccb86 100644
--- a/dplib/plugins/github/models/package.py
+++ b/dplib/plugins/github/models/package.py
@@ -29,7 +29,7 @@ class GithubPackage(Model):
     updated_at: Optional[str] = None
     topics: List[str] = []
 
-    # Mappers
+    # Converters
 
     def to_dp(self):
         package = Package()
diff --git a/dplib/plugins/github/models/resource.py b/dplib/plugins/github/models/resource.py
index 8254043..055ccc2 100644
--- a/dplib/plugins/github/models/resource.py
+++ b/dplib/plugins/github/models/resource.py
@@ -2,7 +2,7 @@
 
 from typing import Literal, Optional
 
-from dplib.helpers.resource import path_to_name
+from dplib.helpers.resource import slugify_name
 from dplib.model import Model
 from dplib.models import Resource
 
@@ -18,10 +18,10 @@ class GithubResource(Model):
     html_url: Optional[str] = None
     download_url: Optional[str] = None
 
-    # Mappers
+    # Converters
 
     def to_dp(self):
-        resource = Resource(path=self.path, name=path_to_name(self.path))
+        resource = Resource(path=self.path, name=slugify_name(self.path))
 
         # Bytes
         if self.size:
diff --git a/dplib/plugins/pandas/models/field.py b/dplib/plugins/pandas/models/field.py
index c96e036..96d137d 100644
--- a/dplib/plugins/pandas/models/field.py
+++ b/dplib/plugins/pandas/models/field.py
@@ -17,7 +17,7 @@ class PandasField(Model, arbitrary_types_allowed=True):
     dtype: Any
     dvalue: Optional[Any] = None
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Field:
         field = Field(name=self.name)
diff --git a/dplib/plugins/pandas/models/schema.py b/dplib/plugins/pandas/models/schema.py
index 8b87b9b..2a7c4b5 100644
--- a/dplib/plugins/pandas/models/schema.py
+++ b/dplib/plugins/pandas/models/schema.py
@@ -13,7 +13,7 @@
 class PandasSchema(Model, arbitrary_types_allowed=True):
     df: pd.DataFrame
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Schema:
         schema = Schema()
diff --git a/dplib/plugins/polars/models/field.py b/dplib/plugins/polars/models/field.py
index 8d42513..f2c3dbc 100644
--- a/dplib/plugins/polars/models/field.py
+++ b/dplib/plugins/polars/models/field.py
@@ -13,7 +13,7 @@ class PolarsField(Model, arbitrary_types_allowed=True):
     dtype: Any
     #  dtype: pl.PolarsDataType
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Field:
         field = Field(name=self.name)
diff --git a/dplib/plugins/polars/models/schema.py b/dplib/plugins/polars/models/schema.py
index 665c571..d10494d 100644
--- a/dplib/plugins/polars/models/schema.py
+++ b/dplib/plugins/polars/models/schema.py
@@ -13,7 +13,7 @@
 class PolarsSchema(Model, arbitrary_types_allowed=True):
     df: pl.DataFrame
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Schema:
         schema = Schema()
diff --git a/dplib/plugins/sql/models/field.py b/dplib/plugins/sql/models/field.py
index 1f5f39d..1e18fd0 100644
--- a/dplib/plugins/sql/models/field.py
+++ b/dplib/plugins/sql/models/field.py
@@ -19,7 +19,7 @@
 class SqlField(Model, arbitrary_types_allowed=True):
     column: Column[Any]
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Field:
         field = Field(name=self.column.name)
diff --git a/dplib/plugins/sql/models/schema.py b/dplib/plugins/sql/models/schema.py
index 86b4fc2..5cca4c7 100644
--- a/dplib/plugins/sql/models/schema.py
+++ b/dplib/plugins/sql/models/schema.py
@@ -15,7 +15,7 @@
 class SqlSchema(Model, arbitrary_types_allowed=True):
     table: Table
 
-    # Mappers
+    # Converters
 
     def to_dp(self, *, with_metadata: bool = False) -> Schema:
         schema = Schema()
diff --git a/dplib/plugins/zenodo/models/package.py b/dplib/plugins/zenodo/models/package.py
index c37dad4..76b4e02 100644
--- a/dplib/plugins/zenodo/models/package.py
+++ b/dplib/plugins/zenodo/models/package.py
@@ -28,7 +28,7 @@ class ZenodoPackage(Model):
     updated: Optional[str] = None
     links: Dict[str, str] = {}
 
-    # Mappers
+    # Converters
 
     def to_dp(self):
         package = Package()
diff --git a/dplib/plugins/zenodo/models/resource.py b/dplib/plugins/zenodo/models/resource.py
index c5161ac..816dd44 100644
--- a/dplib/plugins/zenodo/models/resource.py
+++ b/dplib/plugins/zenodo/models/resource.py
@@ -2,7 +2,7 @@
 
 from typing import Optional
 
-from dplib.helpers.resource import path_to_name
+from dplib.helpers.resource import slugify_name
 from dplib.model import Model
 from dplib.models import Resource
 
@@ -15,10 +15,10 @@ class ZenodoResource(Model):
     mimetype: Optional[str] = None
     size: Optional[int] = None
 
-    # Mappers
+    # Converters
 
     def to_dp(self) -> Resource:
-        resource = Resource(path=self.key, name=path_to_name(self.key))
+        resource = Resource(path=self.key, name=slugify_name(self.key))
 
         # Format
         if self.ext:
diff --git a/dplib/types.py b/dplib/types.py
index ffffc11..530bf73 100644
--- a/dplib/types.py
+++ b/dplib/types.py
@@ -1,3 +1,3 @@
 from typing import Any, Dict
 
-IData = Dict[str, Any]
+IDict = Dict[str, Any]
diff --git a/pyproject.toml b/pyproject.toml
index b397335..03f2a4e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,12 +33,16 @@ classifiers = [
 dependencies = [
     "pydantic>=2.0",
     "python-slugify>=6.0",
+    "fsspec[http]>=2023.1.0",
+    "typing-extensions>=4.0",
 ]
 
 [project.optional-dependencies]
-sql = ["sqlalchemy>=1.4"]
+dcat = ["rdflib>=6.0"]
 pandas = ["pandas>=1.0", "pandas-stubs>=1.0", "numpy>=1.0", "isodate>=0.6"]
 polars = ["polars-lts-cpu>=0.10"]
+sql = ["sqlalchemy>=1.4"]
+yaml = ["pyyaml>=5.0"]
 dev = [
     "moto",
     "ruff",