Skip to content

Commit

Permalink
feature(serializer): add udata JSON abilities (#291)
Browse files Browse the repository at this point in the history
  • Loading branch information
Guts authored Jun 4, 2024
2 parents 9212c8d + 6f5d76a commit 3fef6ac
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 9 deletions.
9 changes: 8 additions & 1 deletion dicogis/cli/cmd_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ def get_serializer_from_parameters(
localized_strings=localized_strings,
opt_size_prettify=opt_prettify_size,
)
elif output_format == "udata":
output_serializer = MetadatasetSerializerJson(
flavor="udata",
output_path=output_path,
localized_strings=localized_strings,
opt_size_prettify=opt_prettify_size,
)
else:
logger.error(
NotImplementedError(
Expand Down Expand Up @@ -124,7 +131,7 @@ def determine_output_path(
final_output_path = Path(
f"DicoGIS_PostGIS_{pg_srv_names}_{date.today()}.xlsx"
)
elif output_format == "json":
elif output_format in ("json", "udata"):
if isinstance(input_folder, Path):
final_output_path = Path(f"DicoGIS_{input_folder.name}_{date.today()}")
elif isinstance(pg_services, list):
Expand Down
8 changes: 8 additions & 0 deletions dicogis/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,19 @@ class AvailableLocales(str, ExtendedEnum):
spanish = "ES"


class JsonFlavors(str, ExtendedEnum):
"""JSON flavors."""

dicogis = "dicogis"
udata = "udata"


class OutputFormats(str, ExtendedEnum):
"""Supported output formats."""

excel = "excel"
json = "json"
udata = "udata"


class FormatsVector(ExtendedEnum):
Expand Down
49 changes: 43 additions & 6 deletions dicogis/export/to_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
import logging
from dataclasses import asdict
from pathlib import Path
from typing import Literal

# project
from dicogis.__about__ import __version__
from dicogis.constants import JsonFlavors
from dicogis.export.base_serializer import MetadatasetSerializerBase
from dicogis.models.metadataset import MetaDataset
from dicogis.utils.slugger import sluggy
Expand All @@ -36,12 +39,16 @@ class MetadatasetSerializerJson(MetadatasetSerializerBase):

def __init__(
self,
# custom
flavor: Literal[JsonFlavors.dicogis, JsonFlavors.udata] = "dicogis",
# inherited
localized_strings: dict | None = None,
output_path: Path | None = None,
opt_size_prettify: bool = True,
) -> None:
"""Store metadata into JSON files."""
output_path.mkdir(parents=True, exist_ok=True)
self.flavor = flavor

super().__init__(
localized_strings=localized_strings,
Expand All @@ -62,6 +69,27 @@ def json_encoder_for_unsupported_types(self, obj_to_encode: object) -> str:
if isinstance(obj_to_encode, Path):
return str(obj_to_encode)

def as_udata(self, metadataset: MetaDataset) -> dict:
"""Serialize metadaset in a data structure matching udata dataset schema.
Args:
metadataset: input metadataset to serialize
Returns:
serialized as dict
"""

return {
"title": metadataset.name,
"slug": sluggy(text_to_slugify=metadataset.slug),
"description": metadataset.as_markdown_description,
"extras": {"dicogis_version": __version__},
"tags": [
metadataset.format_gdal_long_name,
metadataset.format_gdal_short_name,
],
}

def serialize_metadaset(self, metadataset: MetaDataset) -> Path:
"""Serialize input metadataset as JSON file stored in output_path.
Expand All @@ -74,13 +102,22 @@ def serialize_metadaset(self, metadataset: MetaDataset) -> Path:
output_json_filepath = self.output_path.joinpath(
f"{sluggy(metadataset.name)}.json"
)

with output_json_filepath.open(mode="w", encoding="UTF-8") as out_json:
json.dump(
asdict(metadataset),
out_json,
default=self.json_encoder_for_unsupported_types,
sort_keys=True,
)
if self.flavor == "dicogis":
json.dump(
asdict(metadataset),
out_json,
default=self.json_encoder_for_unsupported_types,
sort_keys=True,
)
elif self.flavor == "udata":
json.dump(
self.as_udata(metadataset),
out_json,
default=self.json_encoder_for_unsupported_types,
sort_keys=True,
)

logger.debug(
f"Metadataset {metadataset.path} serialized into JSON file "
Expand Down
1 change: 1 addition & 0 deletions dicogis/export/to_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ class MetadatasetSerializerXlsx(MetadatasetSerializerBase):

def __init__(
self,
# inherited
localized_strings: dict | None = None,
output_path: Path | None = None,
opt_raw_path: bool = False,
Expand Down
120 changes: 118 additions & 2 deletions dicogis/models/metadataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# package
from dicogis.models.database_connection import DatabaseConnection
from dicogis.models.feature_attributes import AttributeField
from dicogis.utils.formatters import convert_octets
from dicogis.utils.slugger import sluggy

# ############################################################################
# ######### Classes #############
Expand Down Expand Up @@ -56,14 +58,62 @@ class MetaDataset:
crs_registry_code: str | None = None
crs_type: str | None = None

geometry_type: str | None = None
# properties
is_3d: bool = False
# processing
processing_succeeded: bool | None = None
processing_error_msg: str | None = ""
processing_error_type: str | None = ""

@property
def as_markdown_description(self) -> str:
"""Concatenate some informations as a markdown description.
Returns:
markdown description of the metadataset
"""
description = f"- Dataset type: {self.dataset_type}\n"

if self.format_gdal_long_name:
description += f"- Format: {self.format_gdal_long_name}\n"

if isinstance(self, MetaVectorDataset) and self.geometry_type:
description += f"- Geometry type: {self.geometry_type}\n"

if isinstance(self, MetaVectorDataset) and self.features_objects_count:
description += f"- Features objects count: {self.features_objects_count}\n"

if (
self.crs_name
or self.crs_registry
or self.crs_registry_code
or self.crs_type
):
description += (
f"- Spatial Reference System: {self.crs_name} {self.crs_type}"
)
if self.crs_registry_code:
description += f"{self.crs_registry}:{self.crs_registry_code}\n"
else:
description += "\n"

if self.storage_size:
description += f"- Size: {convert_octets(self.storage_size)}\n"

if self.files_dependencies:
description += f"- Related files: {', '.join(self.files_dependencies)}\n"

if isinstance(self, MetaVectorDataset) and self.feature_attributes:
description += (
f"\n\n## {self.count_feature_attributes} Feature attributes\n"
)
description += self.as_markdown_feature_attributes
elif isinstance(self, MetaRasterDataset):
description += "\n\n## Image metadata\n"
description += self.as_markdown_image_metadata

return description

@property
def path_as_str(self) -> str | None:
"""Helper to have an universal path resolver independent of source type.
Expand All @@ -77,7 +127,11 @@ def path_as_str(self) -> str | None:
self.database_connection, DatabaseConnection
):
if self.database_connection.service_name is not None:
out_connection_string = self.database_connection.pg_connection_uri
out_connection_string = (
self.database_connection.pg_connection_uri.split(
"&application_name="
)[0]
)
else:
out_connection_string = (
f"postgresql://{self.database_connection.user_name}"
Expand All @@ -89,13 +143,30 @@ def path_as_str(self) -> str | None:

return None

@property
def slug(self) -> str:
"""Concatenate some attributes to build a slug.
Returns:
slugified metadataset name and other attributes
"""
to_slug = f"{self.name}"

if isinstance(self, MetaDatabaseTable):
to_slug += f" {self.schema_name}.{self.database_connection.database_name} "
elif isinstance(self, (MetaVectorDataset, MetaRasterDataset)):
to_slug += f"{self.parent_folder_name}"

return sluggy(to_slug)


@dataclass
class MetaVectorDataset(MetaDataset):
"""Vector dataset abstraction model."""

feature_attributes: list[AttributeField] | None = None
features_objects_count: int = 0
geometry_type: str | None = None

@property
def count_feature_attributes(self) -> int | None:
Expand All @@ -110,6 +181,28 @@ def count_feature_attributes(self) -> int | None:

return None

@property
def as_markdown_feature_attributes(self) -> str:
"""Return feature attributes as a Markdown table.
Returns:
string containing markdown table
"""
if self.feature_attributes is None:
return ""

out_markdown = "| name | type | length | precision |\n"
out_markdown += "| :---- | :-: | :----: | :-------: |\n"
for feature_attribute in self.feature_attributes:
out_markdown += (
f"| {feature_attribute.name} | "
f"{feature_attribute.data_type} | "
f"{feature_attribute.length} | "
f"{feature_attribute.precision} |\n"
)

return out_markdown


@dataclass
class MetaDatabaseFlat(MetaDataset):
Expand Down Expand Up @@ -177,3 +270,26 @@ class MetaRasterDataset(MetaDataset):
orientation: float | None = None
color_space: str | None = None
compression_rate: int | None = None

@property
def as_markdown_image_metadata(self) -> str:
"""Return raster metadata as a Markdown table.
Returns:
string containing markdown table
"""
out_markdown = "| Key | value |\n"
out_markdown += "| :---- | :-: |\n"
out_markdown += f"| Bands count | {self.bands_count}\n"
out_markdown += f"| Columns count | {self.columns_count}\n"
out_markdown += f"| Rows count | {self.rows_count}\n"
out_markdown += f"| Compression rate | {self.compression_rate}\n"

return out_markdown


# ############################################################################
# #### Stand alone program ########
# #################################
if __name__ == "__main__":
"""standalone execution"""

0 comments on commit 3fef6ac

Please sign in to comment.