From 38fce6e48dd423a3a18100257961fef85f5fc25a Mon Sep 17 00:00:00 2001 From: Maarten Plieger Date: Mon, 23 Sep 2024 21:07:51 +0200 Subject: [PATCH] Rewrite of updatelayer metadata from two nested maps into two sets --- adagucserverEC/CDBFileScanner.cpp | 2 +- adagucserverEC/utils/ConfigurationUtils.cpp | 26 +++++++ adagucserverEC/utils/ConfigurationUtils.h | 3 + adagucserverEC/utils/UpdateLayerMetadata.cpp | 77 ++++---------------- adagucserverEC/utils/UpdateLayerMetadata.h | 2 - python/lib/adaguc/runAdaguc.py | 2 +- python/python_fastapi_server/main.py | 2 +- 7 files changed, 48 insertions(+), 66 deletions(-) diff --git a/adagucserverEC/CDBFileScanner.cpp b/adagucserverEC/CDBFileScanner.cpp index 5d909912..7d855f67 100644 --- a/adagucserverEC/CDBFileScanner.cpp +++ b/adagucserverEC/CDBFileScanner.cpp @@ -968,7 +968,7 @@ int CDBFileScanner::updatedb(CDataSource *dataSource, CT::string *_tailPath, CT: return 1; } fileList.push_back(fileName); - // CDBDebug("Queried file from database with filename [%s]", fileName.c_str()); + CDBDebug("Queried file from database with filename [%s]", fileName.c_str()); } } else { fileList = searchFileNames(dataSource->cfgLayer->FilePath[0]->value.c_str(), filter.c_str(), tailPath.c_str()); diff --git a/adagucserverEC/utils/ConfigurationUtils.cpp b/adagucserverEC/utils/ConfigurationUtils.cpp index 90ff9c5b..8e359436 100644 --- a/adagucserverEC/utils/ConfigurationUtils.cpp +++ b/adagucserverEC/utils/ConfigurationUtils.cpp @@ -19,3 +19,29 @@ bool checkIfPathIsFile(CT::string filePath) { return (filePath.endsWith(".nc") || filePath.endsWith(".h5") || filePath.endsWith(".hdf5") || filePath.endsWith(".he5") || filePath.endsWith(".png") || filePath.endsWith(".csv") || filePath.endsWith(".geojson") || filePath.endsWith(".json") || filePath.startsWith("http://") || filePath.startsWith("https://") || filePath.startsWith("dodsc://")); } + +void serverLogFunctionNothing(const char *) {} + +/* Set config file from environment variable ADAGUC_CONFIG */ +int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset) { + char *configfile = getenv("ADAGUC_CONFIG"); + if (configfile != NULL) { + CT::string configWithAdditionalDataset = configfile; + if (additionalDataset != nullptr && strlen(additionalDataset) > 0) { + configWithAdditionalDataset.concat(","); + configWithAdditionalDataset.concat(additionalDataset); + } + int status = request->setConfigFile(configWithAdditionalDataset.c_str()); + + /* Check logging level */ + if (request->getServerParams()->isDebugLoggingEnabled() == false) { + setDebugFunction(serverLogFunctionNothing); + } + + return status; + } else { + CDBError("No configuration file is set. Please set ADAGUC_CONFIG environment variable accordingly."); + return 1; + } + return 0; +} diff --git a/adagucserverEC/utils/ConfigurationUtils.h b/adagucserverEC/utils/ConfigurationUtils.h index 3d865600..dd62f216 100644 --- a/adagucserverEC/utils/ConfigurationUtils.h +++ b/adagucserverEC/utils/ConfigurationUtils.h @@ -4,9 +4,12 @@ #include #include #include +#include std::vector getEnabledDatasetsConfigurations(CServerParams *srvParam); bool checkIfPathIsFile(CT::string filePath); +int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset = nullptr); + #endif diff --git a/adagucserverEC/utils/UpdateLayerMetadata.cpp b/adagucserverEC/utils/UpdateLayerMetadata.cpp index acf7acf6..486d35c7 100644 --- a/adagucserverEC/utils/UpdateLayerMetadata.cpp +++ b/adagucserverEC/utils/UpdateLayerMetadata.cpp @@ -7,40 +7,15 @@ #include #include "LayerUtils.h" -void serverLogFunctionNothing(const char *) {} - -/* Set config file from environment variable ADAGUC_CONFIG */ -int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset) { - char *configfile = getenv("ADAGUC_CONFIG"); - if (configfile != NULL) { - CT::string configWithAdditionalDataset = configfile; - if (additionalDataset != nullptr && strlen(additionalDataset) > 0) { - configWithAdditionalDataset.concat(","); - configWithAdditionalDataset.concat(additionalDataset); - } - int status = request->setConfigFile(configWithAdditionalDataset.c_str()); - - /* Check logging level */ - if (request->getServerParams()->isDebugLoggingEnabled() == false) { - setDebugFunction(serverLogFunctionNothing); - } - - return status; - } else { - CDBError("No configuration file is set. Please set ADAGUC_CONFIG environment variable accordingly."); - return 1; - } - return 0; -} +typedef std::pair DatasetAndLayerPair; int updateLayerMetadata(CRequest &request) { CServerParams *srvParam = request.getServerParams(); auto datasetList = getEnabledDatasetsConfigurations(srvParam); - // TODO: Remove datasets and layers in metadatable which don't have a matching configuration // TODO: Remove dimension tables which don't have a matching configuration - std::map> dataSetConfigsWithLayers; + std::set dataSetConfigsWithLayers; for (auto &dataset : datasetList) { CT::string datasetAsCTString = dataset.c_str(); @@ -52,8 +27,7 @@ int updateLayerMetadata(CRequest &request) { CDBDebug("\n\n *********************************** Updating metadatatable for dataset [%s] **************************************************", dataset.c_str()); } CRequest requestPerDataset; - // dataset = "/data/adaguc-datasets/test.uwcw_ha43_dini_5p5km_10x8.xml"; - // dataset = "/data/adaguc-datasets/testdata.xml"; + int status = setCRequestConfigFromEnvironment(&requestPerDataset, dataset.c_str()); if (status != 0) { CDBError("Unable to read configuration file"); @@ -67,7 +41,7 @@ int updateLayerMetadata(CRequest &request) { layerName.print("ID_%s", tmp.c_str()); } - dataSetConfigsWithLayers[datasetBaseName].insert(layerName.c_str()); + dataSetConfigsWithLayers.insert(std::make_pair(datasetBaseName, layerName.c_str())); } CT::string layerPathToScan; CT::string tailPath; @@ -76,10 +50,9 @@ int updateLayerMetadata(CRequest &request) { CDBError("Error occured in updating the database"); continue; } - // return 0; } - // Check for datasets which are not configured anymore. + // Check for datasets and or layers which are not configured anymore. json dataset; json layer; @@ -88,43 +61,25 @@ int updateLayerMetadata(CRequest &request) { return 1; } auto records = layerMetaDataStore->getRecords(); - std::map> datasetNamesFromDB; + + std::set datasetNamesFromDB; + for (auto record : records) { CT::string *datasetName = record->get("datasetname"); CT::string *layerName = record->get("layername"); if (datasetName != nullptr && layerName != nullptr) { - datasetNamesFromDB[datasetName->c_str()].insert(layerName->c_str()); + datasetNamesFromDB.insert(std::make_pair(datasetName->c_str(), layerName->c_str())); } } - std::map> layersToDeleteFromMetadataTable; - for (auto datasetFromDB : datasetNamesFromDB) { - for (auto layerNameDb : datasetFromDB.second) { - bool hasLayer = false; - for (auto datasetConfig : dataSetConfigsWithLayers) { - if (datasetConfig.first == datasetFromDB.first) { - for (auto layerNameConfig : datasetConfig.second) { - if (layerNameConfig == layerNameDb) { - hasLayer = true; - break; - } - } - } - } - if (!hasLayer) { - // CDBDebug("NO: %s/%s ", datasetFromDB.first.c_str(), layerNameDb.c_str()); - layersToDeleteFromMetadataTable[datasetFromDB.first].insert(layerNameDb); - } else { - // CDBDebug("YES: %s/%s ", datasetFromDB.first.c_str(), layerNameDb.c_str()); - } - } - } + std::set layersToDeleteFromMetadataTable; - for (auto dataset : layersToDeleteFromMetadataTable) { - for (auto layer : dataset.second) { - CDBDebug("DROP: %s/%s ", dataset.first.c_str(), layer.c_str()); - CDBFactory::getDBAdapter(srvParam->cfg)->dropLayerFromLayerMetadataStore(dataset.first.c_str(), layer.c_str()); - } + std::set_difference(datasetNamesFromDB.begin(), datasetNamesFromDB.end(), dataSetConfigsWithLayers.begin(), dataSetConfigsWithLayers.end(), + std::inserter(layersToDeleteFromMetadataTable, layersToDeleteFromMetadataTable.end())); + + for (auto p : layersToDeleteFromMetadataTable) { + CDBDebug("DROP: %s/%s ", p.first.c_str(), p.second.c_str()); + CDBFactory::getDBAdapter(srvParam->cfg)->dropLayerFromLayerMetadataStore(p.first.c_str(), p.second.c_str()); } return 0; diff --git a/adagucserverEC/utils/UpdateLayerMetadata.h b/adagucserverEC/utils/UpdateLayerMetadata.h index b07be634..a08787fa 100644 --- a/adagucserverEC/utils/UpdateLayerMetadata.h +++ b/adagucserverEC/utils/UpdateLayerMetadata.h @@ -2,8 +2,6 @@ #define UPDATELAYERMETADATA_H #include -int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset = nullptr); - int updateLayerMetadata(CRequest &request); #endif \ No newline at end of file diff --git a/python/lib/adaguc/runAdaguc.py b/python/lib/adaguc/runAdaguc.py index 5551b14e..d38fa00a 100644 --- a/python/lib/adaguc/runAdaguc.py +++ b/python/lib/adaguc/runAdaguc.py @@ -106,7 +106,7 @@ def updateLayerMetadata(self): adagucenv = self.getAdagucEnv() status, data, headers = asyncio.run( self.runADAGUCServer( - args=["--updatelayermetadata"], env=adagucenv, isCGI=False, showLogOnError = False + args=["--updatelayermetadata"], env=adagucenv, isCGI=False, showLogOnError = True ) ) diff --git a/python/python_fastapi_server/main.py b/python/python_fastapi_server/main.py index 509fed2c..d72d3e10 100644 --- a/python/python_fastapi_server/main.py +++ b/python/python_fastapi_server/main.py @@ -42,7 +42,7 @@ async def lifespan(_fastapiapp: FastAPI): logger.info("=== Starting AsyncIO Scheduler ===") # start scheduler to refresh collections & docs every minute scheduler = AsyncIOScheduler() - scheduler.add_job(update_layermetadatatable, "cron", [], minute="*/1", jitter=0, max_instances=1, coalesce=True) + scheduler.add_job(update_layermetadatatable, "cron", [], minute="*", jitter=0, max_instances=1, coalesce=True) scheduler.start() yield