Skip to content

Commit

Permalink
Rewrite of updatelayer metadata from two nested maps into two sets
Browse files Browse the repository at this point in the history
  • Loading branch information
maartenplieger committed Sep 23, 2024
1 parent fdad104 commit 38fce6e
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 66 deletions.
2 changes: 1 addition & 1 deletion adagucserverEC/CDBFileScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,7 @@ int CDBFileScanner::updatedb(CDataSource *dataSource, CT::string *_tailPath, CT:
return 1;
}
fileList.push_back(fileName);
// CDBDebug("Queried file from database with filename [%s]", fileName.c_str());
CDBDebug("Queried file from database with filename [%s]", fileName.c_str());
}
} else {
fileList = searchFileNames(dataSource->cfgLayer->FilePath[0]->value.c_str(), filter.c_str(), tailPath.c_str());
Expand Down
26 changes: 26 additions & 0 deletions adagucserverEC/utils/ConfigurationUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,29 @@ bool checkIfPathIsFile(CT::string filePath) {
return (filePath.endsWith(".nc") || filePath.endsWith(".h5") || filePath.endsWith(".hdf5") || filePath.endsWith(".he5") || filePath.endsWith(".png") || filePath.endsWith(".csv") ||
filePath.endsWith(".geojson") || filePath.endsWith(".json") || filePath.startsWith("http://") || filePath.startsWith("https://") || filePath.startsWith("dodsc://"));
}

void serverLogFunctionNothing(const char *) {}

/* Set config file from environment variable ADAGUC_CONFIG */
int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset) {
char *configfile = getenv("ADAGUC_CONFIG");
if (configfile != NULL) {
CT::string configWithAdditionalDataset = configfile;
if (additionalDataset != nullptr && strlen(additionalDataset) > 0) {
configWithAdditionalDataset.concat(",");
configWithAdditionalDataset.concat(additionalDataset);
}
int status = request->setConfigFile(configWithAdditionalDataset.c_str());

/* Check logging level */
if (request->getServerParams()->isDebugLoggingEnabled() == false) {
setDebugFunction(serverLogFunctionNothing);
}

return status;
} else {
CDBError("No configuration file is set. Please set ADAGUC_CONFIG environment variable accordingly.");
return 1;
}
return 0;
}
3 changes: 3 additions & 0 deletions adagucserverEC/utils/ConfigurationUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
#include <vector>
#include <string>
#include <CServerParams.h>
#include <CRequest.h>

std::vector<std::string> getEnabledDatasetsConfigurations(CServerParams *srvParam);

bool checkIfPathIsFile(CT::string filePath);

int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset = nullptr);

#endif
77 changes: 16 additions & 61 deletions adagucserverEC/utils/UpdateLayerMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,15 @@
#include <json_adaguc.h>
#include "LayerUtils.h"

void serverLogFunctionNothing(const char *) {}

/* Set config file from environment variable ADAGUC_CONFIG */
int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset) {
char *configfile = getenv("ADAGUC_CONFIG");
if (configfile != NULL) {
CT::string configWithAdditionalDataset = configfile;
if (additionalDataset != nullptr && strlen(additionalDataset) > 0) {
configWithAdditionalDataset.concat(",");
configWithAdditionalDataset.concat(additionalDataset);
}
int status = request->setConfigFile(configWithAdditionalDataset.c_str());

/* Check logging level */
if (request->getServerParams()->isDebugLoggingEnabled() == false) {
setDebugFunction(serverLogFunctionNothing);
}

return status;
} else {
CDBError("No configuration file is set. Please set ADAGUC_CONFIG environment variable accordingly.");
return 1;
}
return 0;
}
typedef std::pair<std::string, std::string> DatasetAndLayerPair;

int updateLayerMetadata(CRequest &request) {
CServerParams *srvParam = request.getServerParams();

auto datasetList = getEnabledDatasetsConfigurations(srvParam);
// TODO: Remove datasets and layers in metadatable which don't have a matching configuration
// TODO: Remove dimension tables which don't have a matching configuration

std::map<std::string, std::set<std::string>> dataSetConfigsWithLayers;
std::set<DatasetAndLayerPair> dataSetConfigsWithLayers;

for (auto &dataset : datasetList) {
CT::string datasetAsCTString = dataset.c_str();
Expand All @@ -52,8 +27,7 @@ int updateLayerMetadata(CRequest &request) {
CDBDebug("\n\n *********************************** Updating metadatatable for dataset [%s] **************************************************", dataset.c_str());
}
CRequest requestPerDataset;
// dataset = "/data/adaguc-datasets/test.uwcw_ha43_dini_5p5km_10x8.xml";
// dataset = "/data/adaguc-datasets/testdata.xml";

int status = setCRequestConfigFromEnvironment(&requestPerDataset, dataset.c_str());
if (status != 0) {
CDBError("Unable to read configuration file");
Expand All @@ -67,7 +41,7 @@ int updateLayerMetadata(CRequest &request) {
layerName.print("ID_%s", tmp.c_str());
}

dataSetConfigsWithLayers[datasetBaseName].insert(layerName.c_str());
dataSetConfigsWithLayers.insert(std::make_pair(datasetBaseName, layerName.c_str()));
}
CT::string layerPathToScan;
CT::string tailPath;
Expand All @@ -76,10 +50,9 @@ int updateLayerMetadata(CRequest &request) {
CDBError("Error occured in updating the database");
continue;
}
// return 0;
}

// Check for datasets which are not configured anymore.
// Check for datasets and or layers which are not configured anymore.
json dataset;
json layer;

Expand All @@ -88,43 +61,25 @@ int updateLayerMetadata(CRequest &request) {
return 1;
}
auto records = layerMetaDataStore->getRecords();
std::map<std::string, std::set<std::string>> datasetNamesFromDB;

std::set<DatasetAndLayerPair> datasetNamesFromDB;

for (auto record : records) {
CT::string *datasetName = record->get("datasetname");
CT::string *layerName = record->get("layername");
if (datasetName != nullptr && layerName != nullptr) {
datasetNamesFromDB[datasetName->c_str()].insert(layerName->c_str());
datasetNamesFromDB.insert(std::make_pair(datasetName->c_str(), layerName->c_str()));
}
}

std::map<std::string, std::set<std::string>> layersToDeleteFromMetadataTable;
for (auto datasetFromDB : datasetNamesFromDB) {
for (auto layerNameDb : datasetFromDB.second) {
bool hasLayer = false;
for (auto datasetConfig : dataSetConfigsWithLayers) {
if (datasetConfig.first == datasetFromDB.first) {
for (auto layerNameConfig : datasetConfig.second) {
if (layerNameConfig == layerNameDb) {
hasLayer = true;
break;
}
}
}
}
if (!hasLayer) {
// CDBDebug("NO: %s/%s ", datasetFromDB.first.c_str(), layerNameDb.c_str());
layersToDeleteFromMetadataTable[datasetFromDB.first].insert(layerNameDb);
} else {
// CDBDebug("YES: %s/%s ", datasetFromDB.first.c_str(), layerNameDb.c_str());
}
}
}
std::set<DatasetAndLayerPair> layersToDeleteFromMetadataTable;

for (auto dataset : layersToDeleteFromMetadataTable) {
for (auto layer : dataset.second) {
CDBDebug("DROP: %s/%s ", dataset.first.c_str(), layer.c_str());
CDBFactory::getDBAdapter(srvParam->cfg)->dropLayerFromLayerMetadataStore(dataset.first.c_str(), layer.c_str());
}
std::set_difference(datasetNamesFromDB.begin(), datasetNamesFromDB.end(), dataSetConfigsWithLayers.begin(), dataSetConfigsWithLayers.end(),
std::inserter(layersToDeleteFromMetadataTable, layersToDeleteFromMetadataTable.end()));

for (auto p : layersToDeleteFromMetadataTable) {
CDBDebug("DROP: %s/%s ", p.first.c_str(), p.second.c_str());
CDBFactory::getDBAdapter(srvParam->cfg)->dropLayerFromLayerMetadataStore(p.first.c_str(), p.second.c_str());
}

return 0;
Expand Down
2 changes: 0 additions & 2 deletions adagucserverEC/utils/UpdateLayerMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
#define UPDATELAYERMETADATA_H
#include <CRequest.h>

int setCRequestConfigFromEnvironment(CRequest *request, const char *additionalDataset = nullptr);

int updateLayerMetadata(CRequest &request);

#endif
2 changes: 1 addition & 1 deletion python/lib/adaguc/runAdaguc.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def updateLayerMetadata(self):
adagucenv = self.getAdagucEnv()
status, data, headers = asyncio.run(
self.runADAGUCServer(
args=["--updatelayermetadata"], env=adagucenv, isCGI=False, showLogOnError = False
args=["--updatelayermetadata"], env=adagucenv, isCGI=False, showLogOnError = True
)
)

Expand Down
2 changes: 1 addition & 1 deletion python/python_fastapi_server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ async def lifespan(_fastapiapp: FastAPI):
logger.info("=== Starting AsyncIO Scheduler ===")
# start scheduler to refresh collections & docs every minute
scheduler = AsyncIOScheduler()
scheduler.add_job(update_layermetadatatable, "cron", [], minute="*/1", jitter=0, max_instances=1, coalesce=True)
scheduler.add_job(update_layermetadatatable, "cron", [], minute="*", jitter=0, max_instances=1, coalesce=True)
scheduler.start()

yield
Expand Down

0 comments on commit 38fce6e

Please sign in to comment.