Skip to content

Commit

Permalink
Merge pull request #32 from fabianazioti/lccs_08
Browse files Browse the repository at this point in the history
preparing to lccs-db 0.8.0
  • Loading branch information
raphaelrpl authored Jan 6, 2022
2 parents 2ff1a9c + ae22a71 commit e46986a
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 68 deletions.
111 changes: 45 additions & 66 deletions sample_db_utils/core/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
import pandas as pd
from geoalchemy2 import shape
from geopandas import GeoDataFrame, GeoSeries
from lccs_db.models import LucClass, LucClassificationSystem
from lccs_db.models import db as _db
from osgeo import ogr, osr
from shapely.geometry import Point
from shapely import wkt
from shapely.geometry import Point
from shapely.wkt import loads as geom_from_wkt
from werkzeug.datastructures import FileStorage

Expand Down Expand Up @@ -66,6 +68,27 @@ def load(self, file):
def load_classes(self, file):
"""Load sample classes in memory."""

def validate_classes(self, unique_classes):
"""Validate if classes exist in classification system."""
if self.system:
system_id = self.system.id
elif self.storager.classification_system_id is not None:
system_id = self.storager.classification_system_id
else:
raise RuntimeError("Missing Classification System ")

classes = _db.session.query(LucClass.id). \
join(LucClassificationSystem, LucClass.classification_system_id == LucClassificationSystem.id) \
.filter(LucClassificationSystem.id == system_id).all()

classes_lists = [x[0] for x in classes]

not_exist = list(set(unique_classes) - set(classes_lists) & set(unique_classes))

if len(not_exist) > 0:
raise RuntimeError(f"The classes: {', '.join([str(elem) for elem in not_exist])} "
f"does not exist in the classification system!")

@abstractmethod
def get_files(self):
"""Retrieve list of files to load."""
Expand Down Expand Up @@ -101,7 +124,7 @@ class CSV(Driver):
The config describes how to read the dataset in order to
create a Brazil Data Cube sample. The `mappings`
must include at least the required fields to fill
a sample, such latitude, longitude and class_name fields.
a sample, such latitude, longitude and class_id fields.
"""

def __init__(self, entries, mappings, storager=None, **kwargs):
Expand Down Expand Up @@ -144,7 +167,6 @@ def build_data_set(self, csv):
GeoDataFrame CSV with geospatial location
"""

if 'longitude' in self.mappings and 'latitude' in self.mappings:
geom_column = [
Point(xy) for xy in zip(csv[self.mappings['longitude']], csv[self.mappings['longitude']])
Expand Down Expand Up @@ -196,7 +218,7 @@ def build_data_set(self, csv):

def get_unique_classes(self, csv):
"""Retrieve distinct sample classes from CSV datasource."""
return csv[self.mappings['class_name']].unique()
return csv[self.mappings['class_id']].unique()

def load(self, file):
"""Load file."""
Expand All @@ -205,36 +227,19 @@ def load(self, file):
else:
csv = pd.read_csv(file)

self.load_classes(csv)

res = self.build_data_set(csv)

self._data_sets.extend(res.T.to_dict().values())

def load_classes(self, file):
"""Load classes of a file."""
self.storager.load()

unique_classes = self.get_unique_classes(file)

samples_to_save = []

stored_keys = self.storager.samples_map_id.keys()

for class_name in unique_classes:
if class_name in stored_keys:
continue
self.validate_classes(unique_classes)

sample_class = {
"name": class_name,
"description": class_name,
"code": class_name,
"class_system_id": self.system.id,
}

samples_to_save.append(sample_class)

if samples_to_save:
self.storager.store_classes(samples_to_save)
self.storager.load()
return


class Shapefile(Driver):
Expand All @@ -251,38 +256,34 @@ def __init__(self, entries, mappings, storager=None, **kwargs):
self.mappings = copy_mappings
self.entries = entries
self.temporary_folder = TemporaryDirectory()
self.class_name = None
self.class_id = None
self.start_date = None
self.end_date = None
self.collection_date = None
self.crs = None

def get_unique_classes(self, ogr_file, layer_name):
"""Retrieve distinct sample classes from shapefile datasource."""
classes = self.mappings.get('class_name')
classes = self.mappings.get('class_id')

if isinstance(classes, str):
classes = [self.mappings['class_name']]
classes = self.mappings['class_id']

else:
return classes['value']

layer = ogr_file.GetLayer(layer_name)

if layer.GetFeatureCount() == 0:
return []

f = layer.GetFeature(0)

fields = [
f.GetFieldDefnRef(i).GetName() for i in range(f.GetFieldCount())
]
unique_c = ogr_file.ExecuteSQL(f'SELECT DISTINCT {classes} FROM {layer_name}')

for possibly_class in classes:
if possibly_class in fields:
self.class_name = possibly_class
result = []
for i, feature in enumerate(unique_c):
result.append(feature.GetField(0))

return ogr_file.ExecuteSQL(
'SELECT DISTINCT "{}" FROM {}'.format(
possibly_class, layer_name))
return []
return result

def get_files(self):
"""Get files."""
Expand Down Expand Up @@ -350,6 +351,8 @@ def load(self, file):
if dataSource is None:
raise Exception("Could not open {}".format(file))
else:
self.load_classes(dataSource)

for layer_id in range(dataSource.GetLayerCount()):
gdal_layer = dataSource.GetLayer(layer_id)

Expand All @@ -372,33 +375,9 @@ def load_classes(self, file):
"""Load classes of a file."""
# Retrieves Layer Name from Data set filename
layer_name = Path(file.GetName()).stem
# Load Storager classes in memory
self.storager.load()

unique_classes = self.get_unique_classes(file, layer_name)

samples_to_save = []

for feature_id in range(unique_classes.GetFeatureCount()):
feature = unique_classes.GetFeature(feature_id)
class_name = feature.GetField(0)

if class_name is None:
class_name = "None"

# When class already registered, skips
if class_name.capitalize() in self.storager.samples_map_id.keys():
continue

sample_class = {
"name": class_name.capitalize(),
"description": class_name,
"code": class_name.upper(),
"class_system_id": self.system.id
}

samples_to_save.append(sample_class)
self.validate_classes(unique_classes)

if samples_to_save:
self.storager.store_classes(samples_to_save)
self.storager.load()
return
2 changes: 1 addition & 1 deletion sample_db_utils/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
and parsed by ``setup.py``.
"""

__version__ = '0.6.1'
__version__ = '0.9.0'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
'GeoAlchemy2>=0.6.2',
'shapely>=1.6',
'GDAL>=2.2',
'lccs-db @ git+https://github.com/brazil-data-cube/lccs-db.git@v0.6.0',
'lccs-db @ git+https://github.com/brazil-data-cube/lccs-db.git@master',
]

packages = find_packages()
Expand Down

0 comments on commit e46986a

Please sign in to comment.