From d4dbc878a147186d718f6258242b0e69330ead9f Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Thu, 25 Nov 2021 16:11:47 -0300 Subject: [PATCH 1/7] adding validade classes --- sample_db_utils/core/driver.py | 70 +++++++++++----------------------- 1 file changed, 22 insertions(+), 48 deletions(-) diff --git a/sample_db_utils/core/driver.py b/sample_db_utils/core/driver.py index 85295b4..95b4e5c 100644 --- a/sample_db_utils/core/driver.py +++ b/sample_db_utils/core/driver.py @@ -28,6 +28,8 @@ from sample_db_utils.core.utils import (get_date_from_str, is_stream, reproject, unzip, validate_mappings) +from lccs_db.models import LucClass, LucClassificationSystem, db as _db + def get_date_from_str(date, date_ref=None): """Build date from str.""" @@ -66,6 +68,18 @@ def load(self, file): def load_classes(self, file): """Load sample classes in memory.""" + def validate_classes(self, unique_classes): + """Validate if classes exist in classification system.""" + classes = _db.session.query(LucClass.id).\ + join(LucClassificationSystem, LucClass.classification_system_id == LucClassificationSystem.id)\ + .filter(LucClassificationSystem.id == self.system.id).all() + + not_exist = list(set(unique_classes) - set(classes) & set(unique_classes)) + + if len(not_exist) > 0: + raise RuntimeError(f"The classes: {', '.join([str(elem) for elem in not_exist])} " + f"does not exist in the classification system!") + @abstractmethod def get_files(self): """Retrieve list of files to load.""" @@ -144,7 +158,6 @@ def build_data_set(self, csv): GeoDataFrame CSV with geospatial location """ - if 'longitude' in self.mappings and 'latitude' in self.mappings: geom_column = [ Point(xy) for xy in zip(csv[self.mappings['longitude']], csv[self.mappings['longitude']]) @@ -205,36 +218,19 @@ def load(self, file): else: csv = pd.read_csv(file) + self.load_classes(csv) + res = self.build_data_set(csv) self._data_sets.extend(res.T.to_dict().values()) def load_classes(self, file): """Load classes of a file.""" - self.storager.load() - unique_classes = self.get_unique_classes(file) - samples_to_save = [] - - stored_keys = self.storager.samples_map_id.keys() - - for class_name in unique_classes: - if class_name in stored_keys: - continue + self.validate_classes(unique_classes) - sample_class = { - "name": class_name, - "description": class_name, - "code": class_name, - "class_system_id": self.system.id, - } - - samples_to_save.append(sample_class) - - if samples_to_save: - self.storager.store_classes(samples_to_save) - self.storager.load() + return class Shapefile(Driver): @@ -350,6 +346,8 @@ def load(self, file): if dataSource is None: raise Exception("Could not open {}".format(file)) else: + self.load_classes(dataSource) + for layer_id in range(dataSource.GetLayerCount()): gdal_layer = dataSource.GetLayer(layer_id) @@ -372,33 +370,9 @@ def load_classes(self, file): """Load classes of a file.""" # Retrieves Layer Name from Data set filename layer_name = Path(file.GetName()).stem - # Load Storager classes in memory - self.storager.load() unique_classes = self.get_unique_classes(file, layer_name) - samples_to_save = [] - - for feature_id in range(unique_classes.GetFeatureCount()): - feature = unique_classes.GetFeature(feature_id) - class_name = feature.GetField(0) - - if class_name is None: - class_name = "None" - - # When class already registered, skips - if class_name.capitalize() in self.storager.samples_map_id.keys(): - continue - - sample_class = { - "name": class_name.capitalize(), - "description": class_name, - "code": class_name.upper(), - "class_system_id": self.system.id - } - - samples_to_save.append(sample_class) + self.validate_classes(unique_classes) - if samples_to_save: - self.storager.store_classes(samples_to_save) - self.storager.load() + return From 36d57fd3f41807f28734f0ae9cb9a4dbe42c8380 Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Thu, 25 Nov 2021 16:12:59 -0300 Subject: [PATCH 2/7] updating lccs-db version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8d21349..e0bf959 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ 'GeoAlchemy2>=0.6.2', 'shapely>=1.6', 'GDAL>=2.2', - 'lccs-db @ git+https://github.com/brazil-data-cube/lccs-db.git@v0.6.0', + 'lccs-db @ git+https://github.com/brazil-data-cube/lccs-db.git@master', ] packages = find_packages() From bc402ae19e28393220fe9ffd30c83f240a5074eb Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Thu, 25 Nov 2021 16:49:04 -0300 Subject: [PATCH 3/7] updating version --- sample_db_utils/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_db_utils/version.py b/sample_db_utils/version.py index 8b83893..07b7175 100644 --- a/sample_db_utils/version.py +++ b/sample_db_utils/version.py @@ -12,4 +12,4 @@ and parsed by ``setup.py``. """ -__version__ = '0.6.1' +__version__ = '0.8.0' From edae32111b5f37dbdc740c4a25d16ecc9c6c88a3 Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Mon, 29 Nov 2021 08:57:22 -0300 Subject: [PATCH 4/7] updating version --- sample_db_utils/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_db_utils/version.py b/sample_db_utils/version.py index 07b7175..a1aeb49 100644 --- a/sample_db_utils/version.py +++ b/sample_db_utils/version.py @@ -12,4 +12,4 @@ and parsed by ``setup.py``. """ -__version__ = '0.8.0' +__version__ = '0.9.0' From 057910ccaaf8da8b2f2ba375dbc4e538a880af3d Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Mon, 29 Nov 2021 14:22:19 -0300 Subject: [PATCH 5/7] change to class_id --- sample_db_utils/core/driver.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sample_db_utils/core/driver.py b/sample_db_utils/core/driver.py index 95b4e5c..6b440a0 100644 --- a/sample_db_utils/core/driver.py +++ b/sample_db_utils/core/driver.py @@ -115,7 +115,7 @@ class CSV(Driver): The config describes how to read the dataset in order to create a Brazil Data Cube sample. The `mappings` must include at least the required fields to fill - a sample, such latitude, longitude and class_name fields. + a sample, such latitude, longitude and class_id fields. """ def __init__(self, entries, mappings, storager=None, **kwargs): @@ -209,7 +209,7 @@ def build_data_set(self, csv): def get_unique_classes(self, csv): """Retrieve distinct sample classes from CSV datasource.""" - return csv[self.mappings['class_name']].unique() + return csv[self.mappings['class_id']].unique() def load(self, file): """Load file.""" @@ -247,7 +247,7 @@ def __init__(self, entries, mappings, storager=None, **kwargs): self.mappings = copy_mappings self.entries = entries self.temporary_folder = TemporaryDirectory() - self.class_name = None + self.class_id = None self.start_date = None self.end_date = None self.collection_date = None @@ -255,10 +255,10 @@ def __init__(self, entries, mappings, storager=None, **kwargs): def get_unique_classes(self, ogr_file, layer_name): """Retrieve distinct sample classes from shapefile datasource.""" - classes = self.mappings.get('class_name') + classes = self.mappings.get('class_id') if isinstance(classes, str): - classes = [self.mappings['class_name']] + classes = [self.mappings['class_id']] layer = ogr_file.GetLayer(layer_name) @@ -273,7 +273,7 @@ def get_unique_classes(self, ogr_file, layer_name): for possibly_class in classes: if possibly_class in fields: - self.class_name = possibly_class + self.class_id = possibly_class return ogr_file.ExecuteSQL( 'SELECT DISTINCT "{}" FROM {}'.format( From 77c56815986ed0ee8d5a73cd827102ad74c008f4 Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Thu, 9 Dec 2021 14:28:32 -0300 Subject: [PATCH 6/7] fixing get id for classification system --- sample_db_utils/core/driver.py | 39 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/sample_db_utils/core/driver.py b/sample_db_utils/core/driver.py index 6b440a0..e504ab9 100644 --- a/sample_db_utils/core/driver.py +++ b/sample_db_utils/core/driver.py @@ -70,11 +70,20 @@ def load_classes(self, file): def validate_classes(self, unique_classes): """Validate if classes exist in classification system.""" - classes = _db.session.query(LucClass.id).\ - join(LucClassificationSystem, LucClass.classification_system_id == LucClassificationSystem.id)\ - .filter(LucClassificationSystem.id == self.system.id).all() + if self.system: + system_id = self.system.id + elif self.storager.classification_system_id is not None: + system_id = self.storager.classification_system_id + else: + raise RuntimeError("Missing Classification System ") + + classes = _db.session.query(LucClass.id). \ + join(LucClassificationSystem, LucClass.classification_system_id == LucClassificationSystem.id) \ + .filter(LucClassificationSystem.id == system_id).all() - not_exist = list(set(unique_classes) - set(classes) & set(unique_classes)) + classes_lists = [x[0] for x in classes] + + not_exist = list(set(unique_classes) - set(classes_lists) & set(unique_classes)) if len(not_exist) > 0: raise RuntimeError(f"The classes: {', '.join([str(elem) for elem in not_exist])} " @@ -258,27 +267,23 @@ def get_unique_classes(self, ogr_file, layer_name): classes = self.mappings.get('class_id') if isinstance(classes, str): - classes = [self.mappings['class_id']] + classes = self.mappings['class_id'] + + else: + return classes['value'] layer = ogr_file.GetLayer(layer_name) if layer.GetFeatureCount() == 0: return [] - f = layer.GetFeature(0) - - fields = [ - f.GetFieldDefnRef(i).GetName() for i in range(f.GetFieldCount()) - ] + unique_c = ogr_file.ExecuteSQL(f'SELECT DISTINCT {classes} FROM {layer_name}') - for possibly_class in classes: - if possibly_class in fields: - self.class_id = possibly_class + result = [] + for i, feature in enumerate(unique_c): + result.append(feature.GetField(0)) - return ogr_file.ExecuteSQL( - 'SELECT DISTINCT "{}" FROM {}'.format( - possibly_class, layer_name)) - return [] + return result def get_files(self): """Get files.""" From ae22a719bd38d45192b497b74b10629dcab99a04 Mon Sep 17 00:00:00 2001 From: Fabiana Zioti Date: Thu, 9 Dec 2021 14:34:03 -0300 Subject: [PATCH 7/7] fixing import order --- sample_db_utils/core/driver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sample_db_utils/core/driver.py b/sample_db_utils/core/driver.py index e504ab9..5848126 100644 --- a/sample_db_utils/core/driver.py +++ b/sample_db_utils/core/driver.py @@ -19,17 +19,17 @@ import pandas as pd from geoalchemy2 import shape from geopandas import GeoDataFrame, GeoSeries +from lccs_db.models import LucClass, LucClassificationSystem +from lccs_db.models import db as _db from osgeo import ogr, osr -from shapely.geometry import Point from shapely import wkt +from shapely.geometry import Point from shapely.wkt import loads as geom_from_wkt from werkzeug.datastructures import FileStorage from sample_db_utils.core.utils import (get_date_from_str, is_stream, reproject, unzip, validate_mappings) -from lccs_db.models import LucClass, LucClassificationSystem, db as _db - def get_date_from_str(date, date_ref=None): """Build date from str."""