Skip to content

Commit

Permalink
Merge pull request #10 from mjanez/develop
Browse files Browse the repository at this point in the history
Improvements: datadictionaries & new harvesters
  • Loading branch information
mjanez authored Sep 7, 2023
2 parents b65b3e5 + d72bfc2 commit cc8bd05
Show file tree
Hide file tree
Showing 23 changed files with 3,017 additions and 1,018 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ log/*
ckan-ogc/conf/*
!ckan-ogc/conf/config.yaml.template
.pdm-python
.env.*
2 changes: 1 addition & 1 deletion Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ ENV TIMEOUT=300
ENV SSL_UNVERIFIED_MODE=False

RUN apt-get -q -y update && \
apt-get install -y wget procps && \
apt-get install -y wget procps git && \
DEBIAN_FRONTEND=noninteractive apt-get -yq install gettext-base && \
wget -O /wait-for https://raw.githubusercontent.com/eficode/wait-for/v2.2.3/wait-for && \
chmod +x /wait-for && \
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ services:
build:
context: .
dockerfile: Dockerfile
image: ghcr.io/mjanez/ckan-ogc:latest
#image: ghcr.io/mjanez/ckan-ogc:latest
env_file:
- .env
logging:
Expand Down
14 changes: 12 additions & 2 deletions ogc2ckan/ckan_datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

class Distribution:
# Dataset Distribution Fields: https://github.com/project-open-data/project-open-data.github.io/blob/master/v1.1/metadata-resources.md#dataset-distribution-fields
def __init__(self, url, name, format, created=None, issued=None, modified=None, media_type=None, license=None, license_id=None, rights=None, description=None, language=None, conformance=None, reference_system=None, encoding='UTF-8'):
def __init__(self, url, name, format, id=None, created=None, issued=None, modified=None, media_type=None, license=None, license_id=None, rights=None, description=None, language=None, conformance=None, reference_system=None, encoding='UTF-8'):
self.url = url
self.id = id
self.name = name
self.format = format
self.media_type = media_type
Expand All @@ -23,10 +24,14 @@ def __init__(self, url, name, format, created=None, issued=None, modified=None,
self.modified = modified
self.conformance = conformance
self.encoding = encoding
self.reference_system = reference_system

def set_url(self, url):
self.url = url

def set_id(self, id):
self.id = id

def set_name(self, name):
self.name = name

Expand Down Expand Up @@ -63,12 +68,16 @@ def set_modified(self, modified):
def set_conformance(self, conformance):
self.conformance = conformance

def set_reference_system(self, reference_system):
self.reference_system = reference_system

def set_encoding(self, encoding):
self.encoding = encoding

def to_dict(self):
return {'url': self.url,
'name': self.name,
'id': self.id,
'format': self.format,
'mimetype': self.media_type,
'license': self.license,
Expand All @@ -80,7 +89,8 @@ def to_dict(self):
'issued': self.issued,
'modified': self.modified,
'conforms_to': self.conformance,
'encoding': self.encoding,
'encoding': self.encoding,
'reference_system': self.reference_system,
}

class Dataset:
Expand Down
9 changes: 7 additions & 2 deletions ogc2ckan/ckan_datasets/ckan_datasets.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
from ckan_datasets.base import Dataset as BaseDataset, Distribution as BaseDistribution
from ckan_datasets.geodcatap import Dataset as GeoDataset, Distribution as GeoDistribution
from ckan_datasets.resources.datadictionary import DataDictionary as BaseDataDictionary, DataDictionaryField as BaseDataDictionaryField

# CKAN Schemas available
CKAN_DATASET_SCHEMAS = {
"geodcatap": {
"dataset": GeoDataset,
"distribution": GeoDistribution
"distribution": GeoDistribution,
"datadictionary": BaseDataDictionary,
"datadictionaryfield": BaseDataDictionaryField
},
"default": {
"dataset": BaseDataset,
"distribution": BaseDistribution
"distribution": BaseDistribution,
"datadictionary": BaseDataDictionary,
"datadictionaryfield": BaseDataDictionaryField
}
}

17 changes: 16 additions & 1 deletion ogc2ckan/ckan_datasets/geodcatap.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

class Distribution:
# Dataset Distribution Fields: https://github.com/project-open-data/project-open-data.github.io/blob/master/v1.1/metadata-resources.md#dataset-distribution-fields
def __init__(self, url, name, format, created=None, issued=None, modified=None, media_type=None, license=None, license_id=None, rights=None, description=None, language=None, conformance=None, reference_system=None, encoding='UTF-8'):
def __init__(self, url, name, format, id=None, created=None, issued=None, modified=None, media_type=None, license=None, license_id=None, rights=None, description=None, language=None, conformance=None, reference_system=None, encoding='UTF-8'):
self.url = url
self.id = id
self.name = name
self.format = format
self.media_type = media_type
Expand All @@ -27,6 +28,9 @@ def __init__(self, url, name, format, created=None, issued=None, modified=None,
def set_url(self, url):
self.url = url

def set_id(self, id):
self.id = id

def set_name(self, name):
self.name = name

Expand Down Expand Up @@ -72,6 +76,7 @@ def set_encoding(self, encoding):
def to_dict(self):
return {'url': self.url,
'name': self.name,
'id': self.id,
'format': self.format,
'mimetype': self.media_type,
'license': self.license,
Expand Down Expand Up @@ -118,6 +123,7 @@ def __init__(self, ckan_id, name, owner_org, license_id):
self.language = None
self.theme = None
self.theme_es = None
self.theme_eu = None
self.topic = "http://inspire.ec.europa.eu/metadata-codelist/TopicCategory/biota"
self.keywords = []
self.keywords_uri = []
Expand All @@ -132,6 +138,7 @@ def __init__(self, ckan_id, name, owner_org, license_id):
self.modified = None
self.valid = None
self.provenance = None
self.purpose = None
self.lineage_source = []
self.lineage_process_steps = []
self.source = None
Expand Down Expand Up @@ -227,6 +234,9 @@ def set_theme(self, theme):

def set_theme_es(self, theme_es):
self.theme_es = theme_es

def set_theme_eu(self, theme_eu):
self.theme_eu = theme_eu

def set_topic(self, topic):
self.topic = topic
Expand Down Expand Up @@ -270,6 +280,9 @@ def set_modified(self, modified):
def set_provenance(self, provenance):
self.provenance = provenance

def set_purpose(self, purpose):
self.purpose = purpose

def set_lineage_source(self, lineage_source):
self.lineage_source = lineage_source

Expand Down Expand Up @@ -392,9 +405,11 @@ def dataset_dict(self):
'spatial_resolution_in_meters': self.spatial_resolution_in_meters,
'language': self.language,
'theme_es': self.theme_es,
'theme_eu': self.theme_eu,
'theme': self.theme,
'identifier': self.identifier,
'provenance': self.provenance,
'purpose': self.purpose,
'lineage_source': self.lineage_source,
'lineage_process_steps': self.lineage_process_steps,
'source': self.source,
Expand Down
140 changes: 140 additions & 0 deletions ogc2ckan/ckan_datasets/resources/datadictionary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Metadata of a Dataset and Distributions. CKAN Data Dictionary Fields https://docs.ckan.org/en/2.9/maintaining/datastore.html#fields

## Import libraries
import urllib.parse
import json

class DataDictionaryField:
"""
Metadata of a distribution. CKAN Data Dictionary Fields.
See https://docs.ckan.org/en/2.9/maintaining/datastore.html#fields for more information.
Attributes:
id (str): The ID (fieldname) of the data dictionary field
type (str): The type of the data dictionary field.
fields (list): A list of DataDictionaryField objects.
label (str): A human-friendly label for this column.
notes (str): A full description for this column in markdown format.
type_override (str): The type to be used the next time DataPusher/xloader is run to load data into this column.
"""
def __init__(self, id, type = 'text', label = '', notes = '', type_override = ''):
self.id = id
self.type = type
self.label = label
self.notes = notes
self.type_override = type_override

def set_id(self, id):
self.id = id

def set_type(self, type):
self.type = type

def set_label(self, label):
self.label = label

def set_notes(self, notes):
self.notes = notes

def set_type_override(self, type_override):
self.type_override = type_override

def to_dict(self):
return {
'fields': [{
'id': self.id,
'type': self.type,
'info': {
'label': self.label,
'notes': self.notes,
'type_override': self.type_override
}
}]
}

class DataDictionary:
"""
Metadata of a Dataset and Distributions. CKAN Data Dictionary Fields.
See https://docs.ckan.org/en/2.9/maintaining/datastore.html#fields for more information.
Attributes:
resource_id (str): The ID of the resource (distribution) to which the data dictionary fields belong.
"""
def __init__(self, resource_id = None):
"""
Initialize the object with default values.
"""
self.resource_id = resource_id
self.datadictionary_fields = []

def set_datadictionary_resource_id(self, resource_id):
self.resource_id = resource_id

def set_datadictionary_fields(self, datadictionary_fields):
self.datadictionary_fields = datadictionary_fields

def add_datadictionary_field(self, datadictionary_field):
self.datadictionary_fields.append(datadictionary_field)

def dataset_dict(self):
'''
Return a dictionary representation of the dataset.
See
CKAN API 'resource_dictionary_create': https://docs.ckan.org/en/2.9/maintaining/datastore.html#fields
Example:
{
"resource_id":"7a50a2c8-7af5-46bc-b87d-272978c58a78 - REQUIRED",
"fields": [{
"id": "name - REQUIRED",
"type": "text - REQUIRED",
"info": {
"label": "",
"notes": "",
"type_override": ""
}
},{
"id": "time",
"type": "time",
"info": {
"label": "Time Label",
"notes": "This is the time field",
"type_override": "timestamp"
}
}
]
}
'''

# Put the details of the dataset we're going to create into a dict.
dataset_dict = {
"resource_id": self.resource_id,
"fields": [
{
"id": field.id,
"type": field.type,
"info": {
"label": field.label,
"notes": field.notes,
"type_override": field.type_override
}
}
for field in self.datadictionary_fields
]
}

return dataset_dict

def generate_data(self):
"""
Generate data for posting to CKAN.
"""
dataset_dict = self.dataset_dict()
# Use the json module to dump the dictionary to a string for posting.
quoted_data = urllib.parse.quote(json.dumps(dataset_dict))
byte_data = quoted_data.encode('utf-8')
return byte_data
30 changes: 12 additions & 18 deletions ogc2ckan/config/ckan_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from bs4 import BeautifulSoup

# custom functions
from model.harvest_schema import validate_config_file
from config.ogc2ckan_config import get_log_module, load_yaml
from mappings.default_ogc2ckan_config import OGC2CKAN_CKANINFO_CONFIG, OGC2CKAN_DBDSN_CONFIG, OGC2CKAN_HARVESTER_CONFIG

Expand Down Expand Up @@ -93,23 +92,18 @@ def config_getParameters(config_file):
- ckan_info: Default CKAN configuration dictionary
- harvest_servers: Harvest servers information
'''
if not validate_config_file(config_file):
raise Exception(f"{log_module}:{config_file} does not comply with the schemas of: 'ogc2ckan/model/harvest_schema.py'")

else:
logging.info(f"{log_module}:The 'config_file': {config_file} comply with the schemas of: 'ogc2ckan/model/harvest_schema.py'")
with open(config_file, encoding='utf-8') as stream:
config = yaml.safe_load(stream)

ckan_info = CKANInfo()
db_dsn = DBDsn()
harvest_servers = [ObjectFromListDicts(**d) for d in config.get('harvest_servers')]

return (
ckan_info,
harvest_servers,
db_dsn,
)
with open(config_file, encoding='utf-8') as stream:
config = yaml.safe_load(stream)

ckan_info = CKANInfo()
db_dsn = DBDsn()
harvest_servers = [ObjectFromListDicts(**d) for d in config.get('harvest_servers')]

return (
ckan_info,
harvest_servers,
db_dsn,
)

def config_getConnection(host, port, username, password, dbname):
'''
Expand Down
Loading

0 comments on commit cc8bd05

Please sign in to comment.