From 92147bac5f035efc986e9fc374f1c2247ab41439 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 15 Jun 2022 13:54:54 +0200 Subject: [PATCH 01/21] add new trait for layer collection listing and return workflows instead of IDs --- services/src/contexts/in_memory.rs | 13 +- services/src/datasets/add_from_directory.rs | 21 +-- services/src/handlers/layers.rs | 5 +- services/src/layers/add_from_directory.rs | 22 +-- services/src/layers/layer.rs | 10 +- services/src/layers/listing.rs | 22 +++ services/src/layers/mod.rs | 1 + services/src/layers/storage.rs | 60 ++++--- services/src/pro/contexts/in_memory.rs | 13 +- services/src/pro/contexts/postgres.rs | 53 +++--- .../src/pro/datasets/add_from_directory.rs | 18 +- services/src/pro/layers/postgres_layer_db.rs | 164 +++++++++++------- services/src/workflows/workflow.rs | 9 + 13 files changed, 215 insertions(+), 196 deletions(-) create mode 100644 services/src/layers/listing.rs diff --git a/services/src/contexts/in_memory.rs b/services/src/contexts/in_memory.rs index 1dcc5815e..8c8a7eae3 100644 --- a/services/src/contexts/in_memory.rs +++ b/services/src/contexts/in_memory.rs @@ -60,24 +60,17 @@ impl InMemoryContext { exe_ctx_tiling_spec: TilingSpecification, query_ctx_chunk_size: ChunkByteSize, ) -> Self { - let mut workflow_registry = HashMapRegistry::default(); let mut layer_db = HashMapLayerDb::default(); - add_layers_from_directory(&mut layer_db, &mut workflow_registry, layer_defs_path).await; + add_layers_from_directory(&mut layer_db, layer_defs_path).await; add_layer_collections_from_directory(&mut layer_db, layer_collection_defs_path).await; let mut dataset_db = HashMapDatasetDb::default(); - add_datasets_from_directory( - &mut dataset_db, - &mut layer_db, - &mut workflow_registry, - dataset_defs_path, - ) - .await; + add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; add_providers_from_directory(&mut dataset_db, provider_defs_path).await; Self { project_db: Default::default(), - workflow_registry: Arc::new(workflow_registry), + workflow_registry: Default::default(), layer_db: Arc::new(layer_db), session: Default::default(), thread_pool: create_rayon_thread_pool(0), diff --git a/services/src/datasets/add_from_directory.rs b/services/src/datasets/add_from_directory.rs index 2cb94df6f..05c97b505 100644 --- a/services/src/datasets/add_from_directory.rs +++ b/services/src/datasets/add_from_directory.rs @@ -9,7 +9,6 @@ use crate::datasets::storage::MetaDataDefinition; use crate::layers::layer::{AddLayer, AddLayerCollection, LayerCollectionId}; use crate::layers::storage::LayerDb; use crate::util::user_input::UserInput; -use crate::workflows::registry::WorkflowRegistry; use crate::workflows::workflow::Workflow; use crate::{contexts::MockableSession, datasets::storage::DatasetDb}; use crate::{datasets::storage::ExternalDatasetProviderDefinition, error::Result}; @@ -41,11 +40,10 @@ pub async fn add_dataset_layer_collection(layer_db: &mut L) -> Resul Ok(()) } -pub async fn add_dataset_as_layer( +pub async fn add_dataset_as_layer( def: DatasetDefinition, dataset: DatasetId, layer_db: &mut L, - workflow_db: &mut W, ) -> Result<()> { let workflow = match def.meta_data { MetaDataDefinition::MockMetaData(_) => Workflow { @@ -81,8 +79,6 @@ pub async fn add_dataset_as_layer( }, }; - let workflow = workflow_db.register(workflow).await?; - let layer = AddLayer { name: def.properties.name, description: def.properties.description, @@ -99,26 +95,18 @@ pub async fn add_dataset_as_layer( Ok(()) } -pub async fn add_datasets_from_directory< - S: MockableSession, - D: DatasetDb, - L: LayerDb, - W: WorkflowRegistry, ->( +pub async fn add_datasets_from_directory, L: LayerDb>( dataset_db: &mut D, layer_db: &mut L, - workflow_db: &mut W, file_path: PathBuf, ) { async fn add_dataset_definition_from_dir_entry< S: MockableSession, D: DatasetDb, L: LayerDb, - W: WorkflowRegistry, >( db: &mut D, layer_db: &mut L, - workflow_db: &mut W, entry: &DirEntry, ) -> Result<()> { let def: DatasetDefinition = @@ -132,7 +120,7 @@ pub async fn add_datasets_from_directory< ) .await?; // TODO: add as system user - add_dataset_as_layer(def, id, layer_db, workflow_db).await?; + add_dataset_as_layer(def, id, layer_db).await?; Ok(()) } @@ -152,8 +140,7 @@ pub async fn add_datasets_from_directory< match entry { Ok(entry) if entry.path().extension() == Some(OsStr::new("json")) => { if let Err(e) = - add_dataset_definition_from_dir_entry(dataset_db, layer_db, workflow_db, &entry) - .await + add_dataset_definition_from_dir_entry(dataset_db, layer_db, &entry).await { warn!( "Skipped adding dataset from directory entry: {:?} error: {}", diff --git a/services/src/handlers/layers.rs b/services/src/handlers/layers.rs index 3f445c430..4d76e354d 100644 --- a/services/src/handlers/layers.rs +++ b/services/src/handlers/layers.rs @@ -2,6 +2,7 @@ use actix_web::{web, FromRequest, Responder}; use crate::error::Result; use crate::layers::layer::{LayerCollectionId, LayerId}; +use crate::layers::listing::LayerCollectionProvider; use crate::layers::storage::LayerDb; use crate::util::user_input::UserInput; use crate::{contexts::Context, layers::layer::LayerCollectionListOptions}; @@ -22,7 +23,7 @@ async fn list_root_collections_handler( ) -> Result { let db = ctx.layer_db_ref(); let collection = db - .get_root_collection_items(options.into_inner().validated()?) + .root_collection_items(options.into_inner().validated()?) .await?; Ok(web::Json(collection)) @@ -35,7 +36,7 @@ async fn list_collection_handler( ) -> Result { let collection = ctx .layer_db_ref() - .get_collection_items(id.into_inner(), options.into_inner().validated()?) + .collection_items(id.into_inner(), options.into_inner().validated()?) .await?; Ok(web::Json(collection)) diff --git a/services/src/layers/add_from_directory.rs b/services/src/layers/add_from_directory.rs index df88de4c9..33abc00f7 100644 --- a/services/src/layers/add_from_directory.rs +++ b/services/src/layers/add_from_directory.rs @@ -7,38 +7,28 @@ use std::{ }; use crate::error::Result; -use crate::{ - layers::layer::{ - AddLayer, AddLayerCollection, LayerCollectionDefinition, LayerCollectionId, LayerDefinition, - }, - workflows::registry::WorkflowRegistry, +use crate::layers::layer::{ + AddLayer, AddLayerCollection, LayerCollectionDefinition, LayerCollectionId, LayerDefinition, }; use crate::{layers::storage::LayerDb, util::user_input::UserInput}; use log::{info, warn}; -pub async fn add_layers_from_directory( - layer_db: &mut L, - workflow_db: &mut W, - file_path: PathBuf, -) { - async fn add_layer_from_dir_entry( +pub async fn add_layers_from_directory(layer_db: &mut L, file_path: PathBuf) { + async fn add_layer_from_dir_entry( layer_db: &mut L, - workflow_db: &mut W, entry: &DirEntry, ) -> Result<()> { let def: LayerDefinition = serde_json::from_reader(BufReader::new(File::open(entry.path())?))?; - let workflow_id = workflow_db.register(def.workflow).await?; - layer_db .add_layer_with_id( def.id, AddLayer { name: def.name, description: def.description, - workflow: workflow_id, + workflow: def.workflow, symbology: def.symbology, } .validated()?, @@ -58,7 +48,7 @@ pub async fn add_layers_from_directory( for entry in dir { match entry { Ok(entry) if entry.path().extension() == Some(OsStr::new("json")) => { - match add_layer_from_dir_entry(layer_db, workflow_db, &entry).await { + match add_layer_from_dir_entry(layer_db, &entry).await { Ok(_) => info!("Added layer from directory entry: {:?}", entry), Err(e) => warn!( "Skipped adding layer from directory entry: {:?} error: {}", diff --git a/services/src/layers/layer.rs b/services/src/layers/layer.rs index 434f42be0..8c00a7333 100644 --- a/services/src/layers/layer.rs +++ b/services/src/layers/layer.rs @@ -3,10 +3,7 @@ use serde::{Deserialize, Serialize}; use geoengine_datatypes::identifier; use crate::{ - error::Result, - projects::Symbology, - util::user_input::UserInput, - workflows::workflow::{Workflow, WorkflowId}, + error::Result, projects::Symbology, util::user_input::UserInput, workflows::workflow::Workflow, }; identifier!(LayerId); @@ -17,7 +14,7 @@ pub struct Layer { pub id: LayerId, pub name: String, pub description: String, - pub workflow: WorkflowId, + pub workflow: Workflow, pub symbology: Option, } @@ -26,14 +23,13 @@ pub struct LayerListing { pub id: LayerId, pub name: String, pub description: String, - pub workflow: WorkflowId, } #[derive(Debug, Serialize, Deserialize, Clone)] pub struct AddLayer { pub name: String, pub description: String, - pub workflow: WorkflowId, + pub workflow: Workflow, pub symbology: Option, } diff --git a/services/src/layers/listing.rs b/services/src/layers/listing.rs new file mode 100644 index 000000000..ea62c6595 --- /dev/null +++ b/services/src/layers/listing.rs @@ -0,0 +1,22 @@ +use async_trait::async_trait; + +use crate::util::user_input::Validated; +use crate::{error::Result, workflows::workflow::Workflow}; + +use super::layer::{CollectionItem, LayerCollectionId, LayerCollectionListOptions, LayerId}; + +#[async_trait] +pub trait LayerCollectionProvider { + async fn collection_items( + &self, + collection: LayerCollectionId, + options: Validated, + ) -> Result>; + + async fn root_collection_items( + &self, + options: Validated, + ) -> Result>; + + async fn workflow(&self, layer: LayerId) -> Result; +} diff --git a/services/src/layers/mod.rs b/services/src/layers/mod.rs index 898f90d5f..394345023 100644 --- a/services/src/layers/mod.rs +++ b/services/src/layers/mod.rs @@ -1,3 +1,4 @@ pub mod add_from_directory; pub mod layer; +pub mod listing; pub mod storage; diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index 6456c957e..46e893062 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -4,7 +4,9 @@ use super::layer::{ AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionId, LayerCollectionListOptions, LayerCollectionListing, LayerId, LayerListing, }; +use super::listing::LayerCollectionProvider; use crate::error::Result; +use crate::workflows::workflow::Workflow; use crate::{contexts::Db, util::user_input::Validated}; use async_trait::async_trait; use geoengine_datatypes::util::Identifier; @@ -19,7 +21,7 @@ pub enum LayerDbError { } #[async_trait] -pub trait LayerDb: Send + Sync { +pub trait LayerDb: LayerCollectionProvider + Send + Sync { async fn add_layer(&self, layer: Validated) -> Result; async fn add_layer_with_id(&self, id: LayerId, layer: Validated) -> Result<()>; @@ -47,18 +49,6 @@ pub trait LayerDb: Send + Sync { collection: LayerCollectionId, parent: LayerCollectionId, ) -> Result<()>; - - async fn get_collection_items( - &self, - collection: LayerCollectionId, - options: Validated, - ) -> Result>; - - // all collection items without a parent - async fn get_root_collection_items( - &self, - options: Validated, - ) -> Result>; } #[derive(Default, Debug)] @@ -107,7 +97,7 @@ impl LayerDb for HashMapLayerDb { id, name: layer.name.clone(), description: layer.description.clone(), - workflow: layer.workflow, + workflow: layer.workflow.clone(), symbology: layer.symbology.clone(), }) } @@ -169,8 +159,11 @@ impl LayerDb for HashMapLayerDb { Ok(()) } +} - async fn get_collection_items( +#[async_trait] +impl LayerCollectionProvider for HashMapLayerDb { + async fn collection_items( &self, collection: LayerCollectionId, options: Validated, @@ -215,7 +208,6 @@ impl LayerDb for HashMapLayerDb { id: *l, name: layer.name.clone(), description: layer.description.clone(), - workflow: layer.workflow, }) }); @@ -226,7 +218,7 @@ impl LayerDb for HashMapLayerDb { .collect()) } - async fn get_root_collection_items( + async fn root_collection_items( &self, options: Validated, ) -> Result> { @@ -263,7 +255,6 @@ impl LayerDb for HashMapLayerDb { id: *id, name: l.name.clone(), description: l.description.clone(), - workflow: l.workflow, })) }); @@ -273,10 +264,27 @@ impl LayerDb for HashMapLayerDb { .take(options.limit as usize) .collect()) } + + async fn workflow(&self, layer: LayerId) -> Result { + let backend = self.backend.read().await; + + let layer = backend + .layers + .get(&layer) + .ok_or(LayerDbError::NoLayerForGivenId { id: layer })?; + + Ok(layer.workflow.clone()) + } } #[cfg(test)] mod tests { + use geoengine_datatypes::primitives::Coordinate2D; + use geoengine_operators::{ + engine::{TypedOperator, VectorOperator}, + mock::{MockPointSource, MockPointSourceParams}, + }; + use crate::{util::user_input::UserInput, workflows::workflow::WorkflowId}; use super::*; @@ -285,12 +293,21 @@ mod tests { async fn it_stores_layers() -> Result<()> { let db = HashMapLayerDb::default(); - let workflow_id = WorkflowId::new(); + let _workflow_id = WorkflowId::new(); let layer = AddLayer { name: "layer".to_string(), description: "description".to_string(), - workflow: workflow_id, + workflow: Workflow { + operator: TypedOperator::Vector( + MockPointSource { + params: MockPointSourceParams { + points: vec![Coordinate2D::new(1., 2.); 3], + }, + } + .boxed(), + ), + }, symbology: None, } .validated()?; @@ -317,7 +334,7 @@ mod tests { db.add_collection_to_parent(empty_c_id, top_c_id).await?; let items = db - .get_collection_items( + .collection_items( top_c_id, LayerCollectionListOptions { offset: 0, @@ -339,7 +356,6 @@ mod tests { id: l_id, name: "layer".to_string(), description: "description".to_string(), - workflow: workflow_id }) ] ); diff --git a/services/src/pro/contexts/in_memory.rs b/services/src/pro/contexts/in_memory.rs index 329768023..f5b0d39da 100644 --- a/services/src/pro/contexts/in_memory.rs +++ b/services/src/pro/contexts/in_memory.rs @@ -57,27 +57,20 @@ impl ProInMemoryContext { exe_ctx_tiling_spec: TilingSpecification, query_ctx_chunk_size: ChunkByteSize, ) -> Self { - let mut workflow_db = HashMapRegistry::default(); let mut layer_db = HashMapLayerDb::default(); - add_layers_from_directory(&mut layer_db, &mut workflow_db, layer_defs_path).await; + add_layers_from_directory(&mut layer_db, layer_defs_path).await; add_layer_collections_from_directory(&mut layer_db, layer_collection_defs_path).await; let mut dataset_db = ProHashMapDatasetDb::default(); - add_datasets_from_directory( - &mut dataset_db, - &mut layer_db, - &mut workflow_db, - dataset_defs_path, - ) - .await; + add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; add_providers_from_directory(&mut dataset_db, provider_defs_path.clone()).await; add_providers_from_directory(&mut dataset_db, provider_defs_path.join("pro")).await; Self { user_db: Default::default(), project_db: Default::default(), - workflow_registry: Arc::new(workflow_db), + workflow_registry: Default::default(), dataset_db: Arc::new(dataset_db), layer_db: Arc::new(layer_db), thread_pool: create_rayon_thread_pool(0), diff --git a/services/src/pro/contexts/postgres.rs b/services/src/pro/contexts/postgres.rs index b4886f1ed..07f8a5de3 100644 --- a/services/src/pro/contexts/postgres.rs +++ b/services/src/pro/contexts/postgres.rs @@ -102,20 +102,14 @@ where Self::update_schema(pool.get().await?).await?; - let mut workflow_db = PostgresWorkflowRegistry::new(pool.clone()); + let workflow_db = PostgresWorkflowRegistry::new(pool.clone()); let mut layer_db = PostgresLayerDb::new(pool.clone()); - add_layers_from_directory(&mut layer_db, &mut workflow_db, layer_defs_path).await; + add_layers_from_directory(&mut layer_db, layer_defs_path).await; add_layer_collections_from_directory(&mut layer_db, layer_collection_defs_path).await; let mut dataset_db = PostgresDatasetDb::new(pool.clone()); - add_datasets_from_directory( - &mut dataset_db, - &mut layer_db, - &mut workflow_db, - dataset_defs_path, - ) - .await; + add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; add_providers_from_directory(&mut dataset_db, provider_defs_path.clone()).await; add_providers_from_directory(&mut dataset_db, provider_defs_path.join("pro")).await; @@ -400,7 +394,7 @@ where id UUID PRIMARY KEY, name text NOT NULL, description text NOT NULL, - workflow UUID REFERENCES workflows NOT NULL, + workflow json NOT NULL, symbology json ); @@ -577,6 +571,7 @@ mod tests { AddLayer, AddLayerCollection, CollectionItem, LayerCollectionListOptions, LayerCollectionListing, LayerListing, }; + use crate::layers::listing::LayerCollectionProvider; use crate::layers::storage::LayerDb; use crate::pro::datasets::{DatasetPermission, Permission, UpdateDatasetPermissions}; use crate::pro::projects::{LoadVersion, ProProjectDb, UserProjectPermission}; @@ -1736,21 +1731,17 @@ mod tests { async fn it_collects_layers() { with_temp_context(|ctx, _| async move { let layer_db = ctx.layer_db_ref(); - let workflow_db = ctx.workflow_registry_ref(); - - let workflow = workflow_db - .register(Workflow { - operator: TypedOperator::Vector( - MockPointSource { - params: MockPointSourceParams { - points: vec![Coordinate2D::new(1., 2.); 3], - }, - } - .boxed(), - ), - }) - .await - .unwrap(); + + let workflow = Workflow { + operator: TypedOperator::Vector( + MockPointSource { + params: MockPointSourceParams { + points: vec![Coordinate2D::new(1., 2.); 3], + }, + } + .boxed(), + ), + }; let layer1 = layer_db .add_layer( @@ -1758,7 +1749,7 @@ mod tests { name: "Layer1".to_string(), description: "Layer 1".to_string(), symbology: None, - workflow, + workflow: workflow.clone(), } .validated() .unwrap(), @@ -1773,7 +1764,7 @@ mod tests { name: "Layer1".to_string(), description: "Layer 1".to_string(), symbology: None, - workflow + workflow: workflow.clone() } ); @@ -1783,7 +1774,7 @@ mod tests { name: "Layer2".to_string(), description: "Layer 2".to_string(), symbology: None, - workflow, + workflow: workflow.clone(), } .validated() .unwrap(), @@ -1826,7 +1817,7 @@ mod tests { .unwrap(); let root_list = layer_db - .get_root_collection_items( + .root_collection_items( LayerCollectionListOptions { offset: 0, limit: 20, @@ -1849,13 +1840,12 @@ mod tests { id: layer2, name: "Layer2".to_string(), description: "Layer 2".to_string(), - workflow }) ] ); let collection1_list = layer_db - .get_collection_items( + .collection_items( collection1, LayerCollectionListOptions { offset: 0, @@ -1879,7 +1869,6 @@ mod tests { id: layer1, name: "Layer1".to_string(), description: "Layer 1".to_string(), - workflow }) ] ); diff --git a/services/src/pro/datasets/add_from_directory.rs b/services/src/pro/datasets/add_from_directory.rs index 81ffee2ce..4ba9318f0 100644 --- a/services/src/pro/datasets/add_from_directory.rs +++ b/services/src/pro/datasets/add_from_directory.rs @@ -8,7 +8,6 @@ use crate::{ datasets::add_from_directory::{add_dataset_as_layer, add_dataset_layer_collection}, error::Result, layers::storage::LayerDb, - workflows::registry::WorkflowRegistry, }; use crate::{ datasets::storage::DatasetDb, @@ -25,21 +24,17 @@ use super::storage::UpdateDatasetPermissions; pub async fn add_datasets_from_directory< D: DatasetDb + UpdateDatasetPermissions, L: LayerDb, - W: WorkflowRegistry, >( dataset_db: &mut D, layer_db: &mut L, - workflow_db: &mut W, file_path: PathBuf, ) { async fn add_dataset_definition_from_dir_entry< D: DatasetDb + UpdateDatasetPermissions, L: LayerDb, - W: WorkflowRegistry, >( dataset_db: &mut D, layer_db: &mut L, - workflow_db: &mut W, entry: &DirEntry, system_session: &UserSession, ) -> Result<()> { @@ -76,7 +71,7 @@ pub async fn add_datasets_from_directory< ) .await?; - add_dataset_as_layer(def, dataset_id, layer_db, workflow_db).await?; + add_dataset_as_layer(def, dataset_id, layer_db).await?; Ok(()) } @@ -96,14 +91,9 @@ pub async fn add_datasets_from_directory< for entry in dir { if let Ok(entry) = entry { - if let Err(e) = add_dataset_definition_from_dir_entry( - dataset_db, - layer_db, - workflow_db, - &entry, - &system_session, - ) - .await + if let Err(e) = + add_dataset_definition_from_dir_entry(dataset_db, layer_db, &entry, &system_session) + .await { warn!( "Skipped adding dataset from directory entry: {:?} error: {}", diff --git a/services/src/pro/layers/postgres_layer_db.rs b/services/src/pro/layers/postgres_layer_db.rs index 46368102c..2a6074316 100644 --- a/services/src/pro/layers/postgres_layer_db.rs +++ b/services/src/pro/layers/postgres_layer_db.rs @@ -1,3 +1,4 @@ +use async_trait::async_trait; use bb8_postgres::{ bb8::Pool, tokio_postgres::{ @@ -16,10 +17,11 @@ use crate::{ AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionId, LayerCollectionListOptions, LayerCollectionListing, LayerId, LayerListing, }, + listing::LayerCollectionProvider, storage::{LayerDb, LayerDbError}, }, util::user_input::Validated, - workflows::workflow::WorkflowId, + workflows::workflow::Workflow, }; pub struct PostgresLayerDb @@ -29,7 +31,7 @@ where >::TlsConnect: Send, <>::TlsConnect as TlsConnect>::Future: Send, { - conn_pool: Pool>, + pub(crate) conn_pool: Pool>, } impl PostgresLayerDb @@ -44,7 +46,7 @@ where } } -#[async_trait::async_trait] +#[async_trait] impl LayerDb for PostgresLayerDb where Tls: MakeTlsConnect + Clone + Send + Sync + 'static, @@ -74,7 +76,7 @@ where &id, &layer.name, &layer.description, - &layer.workflow, + &serde_json::to_value(&layer.workflow).context(error::SerdeJson)?, &symbology, ], ) @@ -103,7 +105,7 @@ where &id, &layer.name, &layer.description, - &layer.workflow, + &serde_json::to_value(&layer.workflow).context(error::SerdeJson)?, &symbology, ], ) @@ -137,7 +139,7 @@ where id, name: row.get(0), description: row.get(1), - workflow: row.get(2), + workflow: serde_json::from_value(row.get(2)).context(error::SerdeJson)?, symbology: serde_json::from_value(row.get(3)).context(error::SerdeJson)?, }) } @@ -228,8 +230,17 @@ where Ok(()) } +} - async fn get_collection_items( +#[async_trait] +impl LayerCollectionProvider for PostgresLayerDb +where + Tls: MakeTlsConnect + Clone + Send + Sync + 'static, + >::Stream: Send + Sync, + >::TlsConnect: Send, + <>::TlsConnect as TlsConnect>::Future: Send, +{ + async fn collection_items( &self, collection: LayerCollectionId, options: Validated, @@ -241,28 +252,28 @@ where let stmt = conn .prepare( " - SELECT id, name, description, workflow - FROM ( - SELECT - id, - name, - description, - CAST(NULL as UUID) as workflow - FROM layer_collections c JOIN collection_children cc ON (c.id = cc.child) - WHERE cc.parent = $1 - ) u UNION ( - SELECT - id, - name, - description, - workflow - FROM layers l JOIN collection_layers cl ON (l.id = cl.layer) - WHERE cl.collection = $1 - ) - ORDER BY workflow DESC, name ASC - LIMIT $2 - OFFSET $3; - ", + SELECT id, name, description, is_layer + FROM ( + SELECT + id, + name, + description, + FALSE AS is_layer + FROM layer_collections c JOIN collection_children cc ON (c.id = cc.child) + WHERE cc.parent = $1 + ) u UNION ( + SELECT + id, + name, + description, + TRUE As is_layer + FROM layers l JOIN collection_layers cl ON (l.id = cl.layer) + WHERE cl.collection = $1 + ) + ORDER BY is_layer ASC, name ASC + LIMIT $2 + OFFSET $3; + ", ) .await?; @@ -280,26 +291,26 @@ where Ok(rows .into_iter() .map(|row| { - let workflow = row.get::>(3); + let is_layer: bool = row.get(3); - match workflow { - Some(workflow) => CollectionItem::Layer(LayerListing { + if is_layer { + CollectionItem::Layer(LayerListing { id: row.get(0), name: row.get(1), description: row.get(2), - workflow, - }), - None => CollectionItem::Collection(LayerCollectionListing { + }) + } else { + CollectionItem::Collection(LayerCollectionListing { id: row.get(0), name: row.get(1), description: row.get(2), - }), + }) } }) .collect()) } - async fn get_root_collection_items( + async fn root_collection_items( &self, options: Validated, ) -> Result> { @@ -310,28 +321,28 @@ where let stmt = conn .prepare( " - SELECT id, name, description, workflow - FROM ( - SELECT - id, - name, - description, - CAST(NULL as UUID) as workflow - FROM layer_collections c LEFT JOIN collection_children cc ON (c.id = cc.child) - WHERE cc.parent IS NULL - ) a UNION ( - SELECT - id, - name, - description, - workflow - FROM layers l LEFT JOIN collection_layers cl ON (l.id = cl.layer) - WHERE cl.collection IS NULL - ) - ORDER BY workflow DESC, name ASC - LIMIT $1 - OFFSET $2; - ", + SELECT id, name, description, is_layer + FROM ( + SELECT + id, + name, + description, + FALSE AS is_layer + FROM layer_collections c LEFT JOIN collection_children cc ON (c.id = cc.child) + WHERE cc.parent IS NULL + ) a UNION ( + SELECT + id, + name, + description, + TRUE AS is_layer + FROM layers l LEFT JOIN collection_layers cl ON (l.id = cl.layer) + WHERE cl.collection IS NULL + ) + ORDER BY is_layer ASC, name ASC + LIMIT $1 + OFFSET $2; + ", ) .await?; @@ -345,22 +356,43 @@ where Ok(rows .into_iter() .map(|row| { - let workflow = row.get::>(3); + let is_layer: bool = row.get(3); - match workflow { - Some(workflow) => CollectionItem::Layer(LayerListing { + if is_layer { + CollectionItem::Layer(LayerListing { id: row.get(0), name: row.get(1), description: row.get(2), - workflow, - }), - None => CollectionItem::Collection(LayerCollectionListing { + }) + } else { + CollectionItem::Collection(LayerCollectionListing { id: row.get(0), name: row.get(1), description: row.get(2), - }), + }) } }) .collect()) } + + async fn workflow(&self, layer: LayerId) -> Result { + let conn = self.conn_pool.get().await?; + + let stmt = conn + .prepare( + " + SELECT + workflow, + FROM layers l + WHERE l.id = $1;", + ) + .await?; + + let row = conn + .query_one(&stmt, &[&layer]) + .await + .map_err(|_error| LayerDbError::NoLayerForGivenId { id: layer })?; + + Ok(serde_json::from_value(row.get(0)).context(error::SerdeJson)?) + } } diff --git a/services/src/workflows/workflow.rs b/services/src/workflows/workflow.rs index 06c529fe5..cce9e9713 100644 --- a/services/src/workflows/workflow.rs +++ b/services/src/workflows/workflow.rs @@ -23,6 +23,15 @@ pub struct Workflow { pub operator: TypedOperator, } +impl PartialEq for Workflow { + fn eq(&self, other: &Self) -> bool { + match (serde_json::to_string(self), serde_json::to_string(other)) { + (Ok(a), Ok(b)) => a == b, + _ => false, + } + } +} + #[cfg(test)] mod tests { use super::*; From 06e53559cc4528f31ee5924dd196b41f9b71a302 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Mon, 20 Jun 2022 17:10:18 +0200 Subject: [PATCH 02/21] turning external dataset provider into external layer provider (wip) --- datatypes/src/dataset.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datatypes/src/dataset.rs b/datatypes/src/dataset.rs index ba7c2ba77..9b2caf3c7 100644 --- a/datatypes/src/dataset.rs +++ b/datatypes/src/dataset.rs @@ -1,9 +1,9 @@ use crate::identifier; use serde::{Deserialize, Serialize}; -identifier!(DatasetProviderId); +identifier!(LayerProviderId); -identifier!(InternalDatasetId); +identifier!(InternalDatasetId); // TODO: rename to DatasetId as there are no external datasets anymore identifier!(StagingDatasetId); @@ -20,7 +20,7 @@ pub enum DatasetId { #[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub struct ExternalDatasetId { - pub provider_id: DatasetProviderId, + pub provider_id: LayerProviderId, pub dataset_id: String, } From 1775fa68d3acf093e1b6db980981482379b5cf3d Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Mon, 20 Jun 2022 17:10:18 +0200 Subject: [PATCH 03/21] turning external dataset provider into external layer provider (wip) --- datatypes/src/dataset.rs | 6 +- services/src/contexts/in_memory.rs | 40 +++- services/src/contexts/mod.rs | 45 ++-- services/src/datasets/add_from_directory.rs | 32 +-- services/src/datasets/external/gfbio.rs | 78 ++++--- services/src/datasets/external/mock.rs | 109 ++++++--- services/src/datasets/external/mod.rs | 18 +- services/src/datasets/in_memory.rs | 76 +------ services/src/datasets/listing.rs | 23 -- services/src/datasets/storage.rs | 108 +-------- services/src/error.rs | 27 ++- services/src/handlers/datasets.rs | 74 +++--- services/src/handlers/ebv.rs | 5 +- services/src/handlers/gfbio.rs | 1 - services/src/handlers/layers.rs | 10 +- services/src/handlers/mod.rs | 8 +- services/src/handlers/workflows.rs | 12 +- services/src/layers/add_from_directory.rs | 12 +- services/src/layers/external.rs | 74 ++++++ services/src/layers/layer.rs | 21 +- services/src/layers/listing.rs | 29 ++- services/src/layers/mod.rs | 1 + services/src/layers/storage.rs | 237 +++++++++++++++----- services/src/server.rs | 20 +- test_data/provider_defs/mock.json | 2 +- 25 files changed, 604 insertions(+), 464 deletions(-) create mode 100644 services/src/layers/external.rs diff --git a/datatypes/src/dataset.rs b/datatypes/src/dataset.rs index ba7c2ba77..9b2caf3c7 100644 --- a/datatypes/src/dataset.rs +++ b/datatypes/src/dataset.rs @@ -1,9 +1,9 @@ use crate::identifier; use serde::{Deserialize, Serialize}; -identifier!(DatasetProviderId); +identifier!(LayerProviderId); -identifier!(InternalDatasetId); +identifier!(InternalDatasetId); // TODO: rename to DatasetId as there are no external datasets anymore identifier!(StagingDatasetId); @@ -20,7 +20,7 @@ pub enum DatasetId { #[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub struct ExternalDatasetId { - pub provider_id: DatasetProviderId, + pub provider_id: LayerProviderId, pub dataset_id: String, } diff --git a/services/src/contexts/in_memory.rs b/services/src/contexts/in_memory.rs index 8c8a7eae3..dacc9b67a 100644 --- a/services/src/contexts/in_memory.rs +++ b/services/src/contexts/in_memory.rs @@ -9,7 +9,7 @@ use crate::error::Error; use crate::layers::add_from_directory::{ add_layer_collections_from_directory, add_layers_from_directory, }; -use crate::layers::storage::HashMapLayerDb; +use crate::layers::storage::{HashMapLayerDb, HashMapLayerProviderDb}; use crate::{ datasets::add_from_directory::{add_datasets_from_directory, add_providers_from_directory}, error::Result, @@ -30,6 +30,7 @@ pub struct InMemoryContext { workflow_registry: Arc, dataset_db: Arc, layer_db: Arc, + layer_provider_db: Arc, session: Db, thread_pool: Arc, exe_ctx_tiling_spec: TilingSpecification, @@ -43,6 +44,7 @@ impl TestDefault for InMemoryContext { workflow_registry: Default::default(), dataset_db: Default::default(), layer_db: Default::default(), + layer_provider_db: Default::default(), session: Default::default(), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec: TestDefault::test_default(), @@ -66,12 +68,15 @@ impl InMemoryContext { let mut dataset_db = HashMapDatasetDb::default(); add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; - add_providers_from_directory(&mut dataset_db, provider_defs_path).await; + + // TODO: load providers from directory + // add_providers_from_directory(&mut dataset_db, provider_defs_path).await; Self { project_db: Default::default(), workflow_registry: Default::default(), layer_db: Arc::new(layer_db), + layer_provider_db: Arc::new(HashMapLayerProviderDb::default()), session: Default::default(), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, @@ -89,6 +94,7 @@ impl InMemoryContext { workflow_registry: Default::default(), dataset_db: Default::default(), layer_db: Default::default(), + layer_provider_db: Default::default(), session: Default::default(), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, @@ -104,8 +110,10 @@ impl Context for InMemoryContext { type WorkflowRegistry = HashMapRegistry; type DatasetDB = HashMapDatasetDb; type LayerDB = HashMapLayerDb; + type LayerProviderDB = HashMapLayerProviderDb; type QueryContext = QueryContextImpl; - type ExecutionContext = ExecutionContextImpl; + type ExecutionContext = + ExecutionContextImpl; fn project_db(&self) -> Arc { self.project_db.clone() @@ -135,6 +143,13 @@ impl Context for InMemoryContext { &self.layer_db } + fn layer_provider_db(&self) -> Arc { + self.layer_provider_db.clone() + } + fn layer_provider_db_ref(&self) -> &Self::LayerProviderDB { + &self.layer_provider_db + } + fn query_context(&self) -> Result { Ok(QueryContextImpl { chunk_byte_size: self.query_ctx_chunk_size, @@ -143,14 +158,17 @@ impl Context for InMemoryContext { } fn execution_context(&self, session: SimpleSession) -> Result { - Ok( - ExecutionContextImpl::::new( - self.dataset_db.clone(), - self.thread_pool.clone(), - session, - self.exe_ctx_tiling_spec, - ), - ) + Ok(ExecutionContextImpl::< + SimpleSession, + HashMapDatasetDb, + HashMapLayerProviderDb, + >::new( + self.dataset_db.clone(), + self.layer_provider_db.clone(), + self.thread_pool.clone(), + session, + self.exe_ctx_tiling_spec, + )) } async fn session_by_id(&self, session_id: SessionId) -> Result { diff --git a/services/src/contexts/mod.rs b/services/src/contexts/mod.rs index fde17f64a..3577756d8 100644 --- a/services/src/contexts/mod.rs +++ b/services/src/contexts/mod.rs @@ -1,5 +1,5 @@ use crate::error::Result; -use crate::layers::storage::LayerDb; +use crate::layers::storage::{LayerDb, LayerProviderDb}; use crate::{projects::ProjectDb, workflows::registry::WorkflowRegistry}; use async_trait::async_trait; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; @@ -39,6 +39,7 @@ pub trait Context: 'static + Send + Sync + Clone { type WorkflowRegistry: WorkflowRegistry; type DatasetDB: DatasetDb; type LayerDB: LayerDb; + type LayerProviderDB: LayerProviderDb; type QueryContext: QueryContext; type ExecutionContext: ExecutionContext; @@ -54,6 +55,9 @@ pub trait Context: 'static + Send + Sync + Clone { fn layer_db(&self) -> Arc; fn layer_db_ref(&self) -> &Self::LayerDB; + fn layer_provider_db(&self) -> Arc; + fn layer_provider_db_ref(&self) -> &Self::LayerProviderDB; + fn query_context(&self) -> Result; fn execution_context(&self, session: Self::Session) -> Result; @@ -85,30 +89,35 @@ impl QueryContext for QueryContextImpl { } } -pub struct ExecutionContextImpl +pub struct ExecutionContextImpl where D: DatasetDb, + L: LayerProviderDb, S: Session, { dataset_db: Arc, + layer_provider_db: Arc, thread_pool: Arc, session: S, tiling_specification: TilingSpecification, } -impl ExecutionContextImpl +impl ExecutionContextImpl where D: DatasetDb, + L: LayerProviderDb, S: Session, { pub fn new( dataset_db: Arc, + layer_provider_db: Arc, thread_pool: Arc, session: S, tiling_specification: TilingSpecification, ) -> Self { Self { dataset_db, + layer_provider_db, thread_pool, session, tiling_specification, @@ -116,7 +125,7 @@ where } } -impl ExecutionContext for ExecutionContextImpl +impl ExecutionContext for ExecutionContextImpl where D: DatasetDb + SessionMetaDataProvider< @@ -126,6 +135,7 @@ where VectorQueryRectangle, > + SessionMetaDataProvider + SessionMetaDataProvider, + L: LayerProviderDb, S: Session, { fn thread_pool(&self) -> &Arc { @@ -139,9 +149,9 @@ where // TODO: use macro(?) for delegating meta_data function to DatasetDB to avoid redundant code #[async_trait] -impl +impl MetaDataProvider - for ExecutionContextImpl + for ExecutionContextImpl where D: DatasetDb + SessionMetaDataProvider< @@ -150,6 +160,7 @@ where VectorResultDescriptor, VectorQueryRectangle, >, + L: LayerProviderDb, S: Session, { async fn meta_data( @@ -174,8 +185,8 @@ where source: Box::new(e), }), DatasetId::External(external) => { - self.dataset_db - .dataset_provider(&self.session, external.provider_id) + self.layer_provider_db + .layer_provider(external.provider_id) .await .map_err(|e| geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), @@ -189,11 +200,12 @@ where // TODO: use macro(?) for delegating meta_data function to DatasetDB to avoid redundant code #[async_trait] -impl MetaDataProvider - for ExecutionContextImpl +impl MetaDataProvider + for ExecutionContextImpl where D: DatasetDb + SessionMetaDataProvider, + L: LayerProviderDb, S: Session, { async fn meta_data( @@ -212,8 +224,8 @@ where source: Box::new(e), }), DatasetId::External(external) => { - self.dataset_db - .dataset_provider(&self.session, external.provider_id) + self.layer_provider_db + .layer_provider(external.provider_id) .await .map_err(|e| geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), @@ -227,11 +239,12 @@ where // TODO: use macro(?) for delegating meta_data function to DatasetDB to avoid redundant code #[async_trait] -impl MetaDataProvider - for ExecutionContextImpl +impl MetaDataProvider + for ExecutionContextImpl where D: DatasetDb + SessionMetaDataProvider, + L: LayerProviderDb, S: Session, { async fn meta_data( @@ -250,8 +263,8 @@ where source: Box::new(e), }), DatasetId::External(external) => { - self.dataset_db - .dataset_provider(&self.session, external.provider_id) + self.layer_provider_db + .layer_provider(external.provider_id) .await .map_err(|e| geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), diff --git a/services/src/datasets/add_from_directory.rs b/services/src/datasets/add_from_directory.rs index 05c97b505..0b7aa262a 100644 --- a/services/src/datasets/add_from_directory.rs +++ b/services/src/datasets/add_from_directory.rs @@ -6,12 +6,14 @@ use std::{ }; use crate::datasets::storage::MetaDataDefinition; -use crate::layers::layer::{AddLayer, AddLayerCollection, LayerCollectionId}; -use crate::layers::storage::LayerDb; +use crate::error::Result; +use crate::layers::external::ExternalLayerProviderDefinition; +use crate::layers::layer::{AddLayer, AddLayerCollection}; +use crate::layers::listing::LayerCollectionId; +use crate::layers::storage::{LayerDb, LayerProviderDb}; use crate::util::user_input::UserInput; use crate::workflows::workflow::Workflow; use crate::{contexts::MockableSession, datasets::storage::DatasetDb}; -use crate::{datasets::storage::ExternalDatasetProviderDefinition, error::Result}; use super::storage::DatasetDefinition; @@ -23,8 +25,7 @@ use geoengine_operators::source::{ }; use log::warn; -const DATASET_LAYER_COLLECTION_ID: LayerCollectionId = - LayerCollectionId::from_u128(0xa762_fc70_a23f_4957_bdb5_a12f_7405_9058); +const DATASET_LAYER_COLLECTION_ID: &str = "82825554-6b41-41e8-91c7-e562162c2a08"; pub async fn add_dataset_layer_collection(layer_db: &mut L) -> Result<()> { let collection = AddLayerCollection { @@ -34,7 +35,10 @@ pub async fn add_dataset_layer_collection(layer_db: &mut L) -> Resul .validated()?; layer_db - .add_collection_with_id(DATASET_LAYER_COLLECTION_ID, collection) + .add_collection_with_id( + &LayerCollectionId(DATASET_LAYER_COLLECTION_ID.to_string()), + collection, + ) .await?; Ok(()) @@ -89,7 +93,10 @@ pub async fn add_dataset_as_layer( let layer = layer_db.add_layer(layer).await?; layer_db - .add_layer_to_collection(layer, DATASET_LAYER_COLLECTION_ID) + .add_layer_to_collection( + &layer, + &LayerCollectionId(DATASET_LAYER_COLLECTION_ID.to_string()), + ) .await?; Ok(()) @@ -156,18 +163,15 @@ pub async fn add_datasets_from_directory, L: } } -pub async fn add_providers_from_directory, S: MockableSession>( - db: &mut D, - file_path: PathBuf, -) { - async fn add_provider_definition_from_dir_entry, S: MockableSession>( +pub async fn add_providers_from_directory(db: &mut D, file_path: PathBuf) { + async fn add_provider_definition_from_dir_entry( db: &mut D, entry: &DirEntry, ) -> Result<()> { - let def: Box = + let def: Box = serde_json::from_reader(BufReader::new(File::open(entry.path())?))?; - db.add_dataset_provider(&S::mock(), def).await?; // TODO: add as system user + db.add_layer_provider(def).await?; // TODO: add as system user Ok(()) } diff --git a/services/src/datasets/external/gfbio.rs b/services/src/datasets/external/gfbio.rs index dc04fa3db..5a23ddd75 100644 --- a/services/src/datasets/external/gfbio.rs +++ b/services/src/datasets/external/gfbio.rs @@ -3,14 +3,12 @@ use std::marker::PhantomData; use crate::datasets::listing::{Provenance, ProvenanceOutput}; use crate::error::Error; +use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::layer::{CollectionItem, Layer, LayerCollectionListOptions, LayerListing}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::storage::LayerProviderId; use crate::{datasets::listing::DatasetListOptions, error::Result}; -use crate::{ - datasets::{ - listing::{DatasetListing, ExternalDatasetProvider}, - storage::ExternalDatasetProviderDefinition, - }, - util::user_input::Validated, -}; +use crate::{datasets::listing::DatasetListing, util::user_input::Validated}; use async_trait::async_trait; use bb8_postgres::bb8::{Pool, PooledConnection}; use bb8_postgres::tokio_postgres::{Config, NoTls}; @@ -32,8 +30,8 @@ use geoengine_operators::{ }; use serde::{Deserialize, Serialize}; -pub const GFBIO_PROVIDER_ID: DatasetProviderId = - DatasetProviderId::from_u128(0x907f_9f5b_0304_4a0e_a5ef_28de_62d1_c0f9); +pub const GFBIO_PROVIDER_ID: LayerProviderId = + LayerProviderId::from_u128(0x907f_9f5b_0304_4a0e_a5ef_28de_62d1_c0f9); #[derive(Clone, Debug, Serialize, Deserialize)] struct DatabaseConnectionConfig { @@ -73,8 +71,8 @@ pub struct GfbioDataProviderDefinition { #[typetag::serde] #[async_trait] -impl ExternalDatasetProviderDefinition for GfbioDataProviderDefinition { - async fn initialize(self: Box) -> Result> { +impl ExternalLayerProviderDefinition for GfbioDataProviderDefinition { + async fn initialize(self: Box) -> Result> { Ok(Box::new(GfbioDataProvider::new(self.db_config).await?)) } @@ -169,15 +167,26 @@ impl GfbioDataProvider { } #[async_trait] -impl ExternalDatasetProvider for GfbioDataProvider { - async fn list(&self, _options: Validated) -> Result> { +impl LayerCollectionProvider for GfbioDataProvider { + async fn collection_items( + &self, + collection: LayerCollectionId, + options: Validated, + ) -> Result> { + todo!() + } + + async fn root_collection_items( + &self, + options: Validated, + ) -> Result> { let conn = self.pool.get().await?; let stmt = conn .prepare(&format!( r#" - SELECT surrogate_key, "{title}", "{details}" - FROM {schema}.abcd_datasets;"#, + SELECT surrogate_key, "{title}", "{details}" + FROM {schema}.abcd_datasets;"#, title = self .column_name_to_hash .get("/DataSets/DataSet/Metadata/Description/Representation/Title") @@ -194,32 +203,31 @@ impl ExternalDatasetProvider for GfbioDataProvider { let listings: Vec<_> = rows .into_iter() - .map(|row| DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: GFBIO_PROVIDER_ID, - dataset_id: row.get::(0).to_string(), - }), - name: row.get(1), - description: row.try_get(2).unwrap_or_else(|_| "".to_owned()), - tags: vec![], - source_operator: "OgrSource".to_owned(), - result_descriptor: TypedResultDescriptor::Vector(VectorResultDescriptor { - data_type: VectorDataType::MultiPoint, - spatial_reference: SpatialReference::epsg_4326().into(), - columns: self - .column_hash_to_name - .iter() - .filter(|(_, name)| name.starts_with("/DataSets/DataSet/Units/Unit/")) - .map(|(_, name)| (name.clone(), FeatureDataType::Text)) - .collect(), - }), - symbology: None, + .map(|row| { + CollectionItem::Layer(LayerListing { + provider: GFBIO_PROVIDER_ID, + layer: row.get::(0).to_string(), + name: row.get(1), + description: row.try_get(2).unwrap_or_else(|_| "".to_owned()), + }) }) .collect(); Ok(listings) } + async fn get_layer(&self, id: LayerId) -> Result { + Ok(Layer { + id, + name: todo!(), + description: todo!(), + workflow: todo!(), + symbology: todo!(), + }) +} + +#[async_trait] +impl ExternalLayerProvider for GfbioDataProvider { async fn provenance(&self, dataset: &DatasetId) -> Result { let surrogate_key: i32 = dataset .external() diff --git a/services/src/datasets/external/mock.rs b/services/src/datasets/external/mock.rs index c443a0970..0d905f1a6 100644 --- a/services/src/datasets/external/mock.rs +++ b/services/src/datasets/external/mock.rs @@ -1,15 +1,20 @@ -use crate::datasets::listing::{ExternalDatasetProvider, ProvenanceOutput}; +use std::collections::HashMap; + +use crate::datasets::listing::ProvenanceOutput; +use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::layer::{CollectionItem, Layer, LayerCollectionListOptions, LayerListing}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::{datasets::listing::DatasetListOptions, error::Result}; use crate::{ datasets::{ listing::DatasetListing, - storage::{DatasetDefinition, ExternalDatasetProviderDefinition, MetaDataDefinition}, + storage::{DatasetDefinition, MetaDataDefinition}, }, error, util::user_input::Validated, }; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId}; +use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::{ engine::{MetaData, MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor}, @@ -19,16 +24,17 @@ use geoengine_operators::{ use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Serialize, Deserialize)] -pub struct MockExternalDataProviderDefinition { - pub id: DatasetProviderId, +pub struct MockExternalLayerProviderDefinition { + pub id: LayerProviderId, pub datasets: Vec, } #[typetag::serde] #[async_trait] -impl ExternalDatasetProviderDefinition for MockExternalDataProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl ExternalLayerProviderDefinition for MockExternalLayerProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(MockExternalDataProvider { + id: self.id, datasets: self.datasets, })) } @@ -41,53 +47,94 @@ impl ExternalDatasetProviderDefinition for MockExternalDataProviderDefinition { "MockName".to_owned() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { self.id } } #[derive(Debug)] pub struct MockExternalDataProvider { + id: LayerProviderId, datasets: Vec, } +// this provider uses dataset and layer ids interchangably +// TODO: remove this when external dataset ids are reworked +fn layer_id_from_dataset_id(id: &DatasetId) -> LayerId { + match id { + DatasetId::Internal { dataset_id } => LayerId(dataset_id.to_string()), + DatasetId::External(s) => LayerId(s.dataset_id.clone()), + } +} + +#[async_trait] +impl ExternalLayerProvider for MockExternalDataProvider { + async fn provenance(&self, dataset: &DatasetId) -> Result { + Ok(ProvenanceOutput { + dataset: dataset.clone(), + provenance: None, + }) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + #[async_trait] -impl ExternalDatasetProvider for MockExternalDataProvider { - async fn list(&self, _options: Validated) -> Result> { - // TODO: user right management - // TODO: options +impl LayerCollectionProvider for MockExternalDataProvider { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + _options: Validated, + ) -> Result> { + Ok(vec![]) // TODO: throw error instead? + } + + async fn root_collection_items( + &self, + _options: Validated, + ) -> Result> { let mut listing = vec![]; for dataset in &self.datasets { - listing.push(Ok(DatasetListing { - id: dataset + listing.push(Ok(CollectionItem::Layer(LayerListing { + provider: self.id, + layer: dataset .properties .id - .clone() - .ok_or(error::Error::MissingDatasetId)?, + .as_ref() + .ok_or(error::Error::MissingDatasetId) + .map(layer_id_from_dataset_id)?, name: dataset.properties.name.clone(), description: dataset.properties.description.clone(), - tags: vec![], - source_operator: dataset.properties.source_operator.clone(), - result_descriptor: dataset.meta_data.result_descriptor().await?, - symbology: dataset.properties.symbology.clone(), - })); + }))); } Ok(listing .into_iter() - .filter_map(|d: Result| if let Ok(d) = d { Some(d) } else { None }) + .filter_map(|d: Result<_>| if let Ok(d) = d { Some(d) } else { None }) .collect()) } - async fn provenance(&self, dataset: &DatasetId) -> Result { - Ok(ProvenanceOutput { - dataset: dataset.clone(), - provenance: None, - }) - } - - fn as_any(&self) -> &dyn std::any::Any { - self + async fn get_layer(&self, id: &LayerId) -> Result { + self.datasets + .iter() + .find(|d| { + d.properties + .id + .as_ref() + .map(layer_id_from_dataset_id) + .as_ref() + == Some(id) + }) + .ok_or(error::Error::UnknownDatasetId) + .map(|d| Layer { + id: id.clone(), + name: d.properties.name.clone(), + description: d.properties.description.clone(), + workflow: todo!(), + symbology: d.properties.symbology.clone(), + }) } } diff --git a/services/src/datasets/external/mod.rs b/services/src/datasets/external/mod.rs index 42ef79b81..989bf090d 100644 --- a/services/src/datasets/external/mod.rs +++ b/services/src/datasets/external/mod.rs @@ -1,10 +1,10 @@ -#[cfg(feature = "nfdi")] -pub mod gfbio; +// #[cfg(feature = "nfdi")] +// pub mod gfbio; pub mod mock; -#[cfg(feature = "nature40")] -pub mod nature40; -pub mod netcdfcf; -#[cfg(feature = "nfdi")] -pub mod nfdi; -#[cfg(feature = "nfdi")] -pub mod pangaea; +// #[cfg(feature = "nature40")] +// pub mod nature40; +// pub mod netcdfcf; +// #[cfg(feature = "nfdi")] +// pub mod nfdi; +// #[cfg(feature = "nfdi")] +// pub mod pangaea; diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index 050d689df..383ef8002 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -1,20 +1,13 @@ use crate::contexts::{Db, SimpleSession}; -use crate::datasets::listing::{ - DatasetListOptions, DatasetListing, DatasetProvider, ExternalDatasetProvider, OrderBy, -}; -use crate::datasets::storage::{ - AddDataset, Dataset, DatasetDb, DatasetProviderDb, DatasetProviderListOptions, - DatasetProviderListing, DatasetStore, DatasetStorer, -}; +use crate::datasets::listing::{DatasetListOptions, DatasetListing, DatasetProvider, OrderBy}; +use crate::datasets::storage::{AddDataset, Dataset, DatasetDb, DatasetStore, DatasetStorer}; use crate::error; use crate::error::Result; use crate::util::user_input::Validated; use async_trait::async_trait; +use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; -use geoengine_datatypes::{ - dataset::{DatasetId, DatasetProviderId, InternalDatasetId}, - util::Identifier, -}; +use geoengine_datatypes::util::Identifier; use geoengine_operators::engine::{ MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, }; @@ -27,7 +20,7 @@ use std::collections::HashMap; use super::listing::ProvenanceOutput; use super::{ listing::SessionMetaDataProvider, - storage::{ExternalDatasetProviderDefinition, MetaDataDefinition}, + storage::MetaDataDefinition, upload::{Upload, UploadDb, UploadId}, }; @@ -51,7 +44,6 @@ struct HashMapDatasetDbBackend { Box>, >, uploads: HashMap, - external_providers: HashMap>, } #[derive(Default)] @@ -61,59 +53,6 @@ pub struct HashMapDatasetDb { impl DatasetDb for HashMapDatasetDb {} -#[async_trait] -impl DatasetProviderDb for HashMapDatasetDb { - async fn add_dataset_provider( - &self, - _session: &SimpleSession, - provider: Box, - ) -> Result { - let id = provider.id(); - self.backend - .write() - .await - .external_providers - .insert(id, provider); - Ok(id) - } - - async fn list_dataset_providers( - &self, - _session: &SimpleSession, - _options: Validated, - ) -> Result> { - // TODO: use options - Ok(self - .backend - .read() - .await - .external_providers - .iter() - .map(|(id, d)| DatasetProviderListing { - id: *id, - type_name: d.type_name(), - name: d.name(), - }) - .collect()) - } - - async fn dataset_provider( - &self, - _session: &SimpleSession, - provider: DatasetProviderId, - ) -> Result> { - self.backend - .read() - .await - .external_providers - .get(&provider) - .cloned() - .ok_or(error::Error::UnknownProviderId)? - .initialize() - .await - } -} - #[async_trait] pub trait HashMapStorable: Send + Sync { async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor; @@ -323,10 +262,7 @@ impl DatasetProvider for HashMapDatasetDb { }) .ok_or(error::Error::UnknownDatasetId), DatasetId::External(id) => { - self.dataset_provider(session, id.provider_id) - .await? - .provenance(dataset) - .await + todo!() // TODO: throw error } } } diff --git a/services/src/datasets/listing.rs b/services/src/datasets/listing.rs index ea24fc5c8..48de84fd7 100644 --- a/services/src/datasets/listing.rs +++ b/services/src/datasets/listing.rs @@ -109,29 +109,6 @@ pub trait DatasetProvider: async fn provenance(&self, session: &S, dataset: &DatasetId) -> Result; } -/// A provider of datasets that are not hosted by Geo Engine itself but some external party -// TODO: Authorization: the provider needs to accept credentials for the external data source. -// The credentials should be generic s.t. they are independent of the Session type and -// extensible to new provider types. E.g. a key-value map of strings where the provider -// checks that the necessary information is present and how they are incorporated in -// the requests. -#[async_trait] -pub trait ExternalDatasetProvider: Send - + Sync - + std::fmt::Debug - + MetaDataProvider - + MetaDataProvider - + MetaDataProvider -{ - // TODO: authorization, filter, paging - async fn list(&self, options: Validated) -> Result>; - - async fn provenance(&self, dataset: &DatasetId) -> Result; - - /// Propagates `Any`-casting to the underlying provider - fn as_any(&self) -> &dyn std::any::Any; -} - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] pub struct ProvenanceOutput { pub dataset: DatasetId, diff --git a/services/src/datasets/storage.rs b/services/src/datasets/storage.rs index d5f1daea2..787a98ab8 100644 --- a/services/src/datasets/storage.rs +++ b/services/src/datasets/storage.rs @@ -1,5 +1,5 @@ use crate::contexts::Session; -use crate::datasets::listing::{DatasetListing, DatasetProvider, ExternalDatasetProvider}; +use crate::datasets::listing::{DatasetListing, DatasetProvider}; use crate::datasets::upload::UploadDb; use crate::datasets::upload::UploadId; use crate::error; @@ -7,7 +7,7 @@ use crate::error::Result; use crate::projects::Symbology; use crate::util::user_input::{UserInput, Validated}; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId}; +use geoengine_datatypes::dataset::DatasetId; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_operators::engine::MetaData; use geoengine_operators::source::{GdalMetaDataList, GdalMetadataNetCdfCf}; @@ -67,45 +67,6 @@ impl UserInput for AddDataset { } } -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] -#[serde(rename_all = "camelCase")] -pub struct DatasetProviderListing { - pub id: DatasetProviderId, - pub type_name: String, - pub name: String, - // more meta data (number of datasets, ...) -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub enum AddDatasetProvider { - AddMockDatasetProvider(AddMockDatasetProvider), - // TODO: geo catalog, wcs, ... -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct AddMockDatasetProvider { - pub datasets: Vec, -} - -impl UserInput for AddDatasetProvider { - fn validate(&self) -> Result<()> { - todo!() - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct DatasetProviderListOptions { - // TODO: filter - pub offset: u32, - pub limit: u32, -} - -impl UserInput for DatasetProviderListOptions { - fn validate(&self) -> Result<()> { - // TODO - Ok(()) - } -} #[derive(Deserialize, Serialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct DatasetDefinition { @@ -226,36 +187,10 @@ impl MetaDataDefinition { /// Handling of datasets provided by geo engine internally, staged and by external providers #[async_trait] pub trait DatasetDb: - DatasetStore + DatasetProvider + DatasetProviderDb + UploadDb + Send + Sync + DatasetStore + DatasetProvider + UploadDb + Send + Sync { } -/// Storage and access of external dataset providers -#[async_trait] -pub trait DatasetProviderDb { - /// Add an external dataset `provider` by `user` - // TODO: require special privilege to be able to add external dataset provider and to access external data in general - async fn add_dataset_provider( - &self, - session: &S, - provider: Box, - ) -> Result; - - /// List available providers for `user` filtered by `options` - async fn list_dataset_providers( - &self, - session: &S, - options: Validated, - ) -> Result>; - - /// Get dataset `provider` for `user` - async fn dataset_provider( - &self, - session: &S, - provider: DatasetProviderId, - ) -> Result>; -} - /// Defines the type of meta data a `DatasetDB` is able to store pub trait DatasetStorer: Send + Sync { type StorageType: Send + Sync; @@ -276,40 +211,3 @@ pub trait DatasetStore: DatasetStorer { /// for use in the `add_dataset` method fn wrap_meta_data(&self, meta: MetaDataDefinition) -> Self::StorageType; } - -#[typetag::serde(tag = "type")] -#[async_trait] -pub trait ExternalDatasetProviderDefinition: - CloneableDatasetProviderDefinition + Send + Sync + std::fmt::Debug -{ - /// create the actual provider for data listing and access - async fn initialize(self: Box) -> Result>; - - /// the type of the provider - fn type_name(&self) -> String; - - /// name of the external data source - fn name(&self) -> String; - - /// id of the provider - fn id(&self) -> DatasetProviderId; -} - -pub trait CloneableDatasetProviderDefinition { - fn clone_boxed_provider(&self) -> Box; -} - -impl CloneableDatasetProviderDefinition for T -where - T: 'static + ExternalDatasetProviderDefinition + Clone, -{ - fn clone_boxed_provider(&self) -> Box { - Box::new(self.clone()) - } -} - -impl Clone for Box { - fn clone(&self) -> Box { - self.clone_boxed_provider() - } -} diff --git a/services/src/error.rs b/services/src/error.rs index 75057c212..6d41eea7a 100644 --- a/services/src/error.rs +++ b/services/src/error.rs @@ -1,9 +1,9 @@ -use crate::workflows::workflow::WorkflowId; -use crate::{datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse}; +use crate::{handlers::ErrorResponse, workflows::workflow::WorkflowId}; +// use crate::{datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse}; use actix_web::http::StatusCode; use actix_web::HttpResponse; use geoengine_datatypes::{ - dataset::{DatasetId, DatasetProviderId}, + dataset::{DatasetId, LayerProviderId}, spatial_reference::SpatialReferenceOption, }; use snafu::prelude::*; @@ -227,7 +227,7 @@ pub enum Error { GfbioMissingAbcdField, ExpectedExternalDatasetId, InvalidExternalDatasetId { - provider: DatasetProviderId, + provider: LayerProviderId, }, #[cfg(feature = "nature40")] @@ -314,17 +314,16 @@ pub enum Error { }, MissingNFDIMetaData, - #[snafu(context(false))] - NetCdfCf4DProvider { - source: NetCdfCf4DProviderError, - }, - - #[cfg(feature = "ebv")] - #[snafu(context(false))] - EbvHandler { - source: crate::handlers::ebv::EbvError, - }, + // #[snafu(context(false))] + // NetCdfCf4DProvider { + // source: NetCdfCf4DProviderError, + // }, + // #[cfg(feature = "ebv")] + // #[snafu(context(false))] + // EbvHandler { + // source: crate::handlers::ebv::EbvError, + // }, #[cfg(feature = "nfdi")] #[snafu(display("Could not parse GFBio basket: {}", message,))] GFBioBasketParse { diff --git a/services/src/handlers/datasets.rs b/services/src/handlers/datasets.rs index 51d7c37d5..2b0c9e9b7 100644 --- a/services/src/handlers/datasets.rs +++ b/services/src/handlers/datasets.rs @@ -4,10 +4,11 @@ use std::{ path::Path, }; -use crate::datasets::listing::DatasetProvider; -use crate::datasets::storage::{AddDataset, DatasetStore, MetaDataSuggestion, SuggestMetaData}; -use crate::datasets::storage::{DatasetProviderDb, DatasetProviderListOptions}; use crate::datasets::upload::UploadRootPath; +use crate::datasets::{ + listing::DatasetProvider, + storage::{AddDataset, DatasetStore, MetaDataSuggestion, SuggestMetaData}, +}; use crate::datasets::{ storage::{CreateDataset, MetaDataDefinition}, upload::Upload, @@ -25,7 +26,7 @@ use gdal::{vector::Layer, Dataset}; use gdal::{vector::OGRFieldType, DatasetOptions}; use geoengine_datatypes::{ collections::VectorDataType, - dataset::{DatasetProviderId, InternalDatasetId}, + dataset::InternalDatasetId, primitives::{FeatureDataType, VectorQueryRectangle}, spatial_reference::{SpatialReference, SpatialReferenceOption}, }; @@ -55,41 +56,42 @@ where web::resource("/suggest").route(web::get().to(suggest_meta_data_handler::)), ), ) - .service(web::resource("/providers").route(web::get().to(list_providers_handler::))) + // .service(web::resource("/providers").route(web::get().to(list_providers_handler::))) .service(web::resource("/datasets").route(web::get().to(list_datasets_handler::))) - .service( - web::resource("/datasets/external/{provider}") - .route(web::get().to(list_external_datasets_handler::)), - ); -} - -async fn list_providers_handler( - session: C::Session, - ctx: web::Data, - options: web::Query, -) -> Result { - let list = ctx - .dataset_db_ref() - .list_dataset_providers(&session, options.into_inner().validated()?) - .await?; - Ok(web::Json(list)) + // .service( + // web::resource("/datasets/external/{provider}") + // .route(web::get().to(list_external_datasets_handler::)), + // ) + ; } -async fn list_external_datasets_handler( - provider: web::Path, - session: C::Session, - ctx: web::Data, - options: web::Query, -) -> Result { - let options = options.into_inner().validated()?; - let list = ctx - .dataset_db_ref() - .dataset_provider(&session, provider.into_inner()) - .await? - .list(options) // TODO: authorization - .await?; - Ok(web::Json(list)) -} +// async fn list_providers_handler( +// session: C::Session, +// ctx: web::Data, +// options: web::Query, +// ) -> Result { +// let list = ctx +// .dataset_db_ref() +// .list_dataset_providers(&session, options.into_inner().validated()?) +// .await?; +// Ok(web::Json(list)) +// } + +// async fn list_external_datasets_handler( +// provider: web::Path, +// session: C::Session, +// ctx: web::Data, +// options: web::Query, +// ) -> Result { +// let options = options.into_inner().validated()?; +// let list = ctx +// .dataset_db_ref() +// .dataset_provider(&session, provider.into_inner()) +// .await? +// .list(options) // TODO: authorization +// .await?; +// Ok(web::Json(list)) +// } /// Lists available [Datasets](crate::datasets::listing::DatasetListing). /// diff --git a/services/src/handlers/ebv.rs b/services/src/handlers/ebv.rs index 6e68d1e79..9f5898498 100644 --- a/services/src/handlers/ebv.rs +++ b/services/src/handlers/ebv.rs @@ -6,9 +6,8 @@ use crate::contexts::AdminSession; use crate::datasets::external::netcdfcf::{ NetCdfOverview, OverviewGeneration, NETCDF_CF_PROVIDER_ID, }; -use crate::datasets::listing::ExternalDatasetProvider; -use crate::datasets::storage::DatasetProviderDb; use crate::error::Result; +use crate::layers::external::ExternalLayerProvider; use crate::{contexts::Context, datasets::external::netcdfcf::NetCdfCfDataProvider}; use actix_web::{ web::{self, ServiceConfig}, @@ -329,7 +328,7 @@ where T: Send + 'static, F: FnOnce(&NetCdfCfDataProvider) -> Result + Send + 'static, { - let provider: Box = ctx + let provider: Box = ctx .dataset_db_ref() .dataset_provider(session, NETCDF_CF_PROVIDER_ID) .await diff --git a/services/src/handlers/gfbio.rs b/services/src/handlers/gfbio.rs index 413443214..75ca5367e 100644 --- a/services/src/handlers/gfbio.rs +++ b/services/src/handlers/gfbio.rs @@ -17,7 +17,6 @@ use std::collections::HashMap; use crate::datasets::external::gfbio::{GfbioDataProvider, GFBIO_PROVIDER_ID}; use crate::datasets::external::pangaea::PANGAEA_PROVIDER_ID; -use crate::datasets::storage::DatasetProviderDb; use geoengine_datatypes::identifier; use geoengine_operators::util::input::StringOrNumberRange; diff --git a/services/src/handlers/layers.rs b/services/src/handlers/layers.rs index 4d76e354d..a0f0f2a52 100644 --- a/services/src/handlers/layers.rs +++ b/services/src/handlers/layers.rs @@ -1,8 +1,8 @@ use actix_web::{web, FromRequest, Responder}; use crate::error::Result; -use crate::layers::layer::{LayerCollectionId, LayerId}; -use crate::layers::listing::LayerCollectionProvider; + +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::layers::storage::LayerDb; use crate::util::user_input::UserInput; use crate::{contexts::Context, layers::layer::LayerCollectionListOptions}; @@ -13,7 +13,9 @@ where C::Session: FromRequest, { cfg.service(web::resource("/layers").route(web::get().to(list_root_collections_handler::))) + // TODO: add provider as param .service(web::resource("/layers/{id}").route(web::get().to(list_collection_handler::))) + // TODO: add provider as param .service(web::resource("/layer/{id}").route(web::get().to(layer_handler::))); } @@ -36,7 +38,7 @@ async fn list_collection_handler( ) -> Result { let collection = ctx .layer_db_ref() - .collection_items(id.into_inner(), options.into_inner().validated()?) + .collection_items(&id.into_inner(), options.into_inner().validated()?) .await?; Ok(web::Json(collection)) @@ -46,7 +48,7 @@ async fn layer_handler( ctx: web::Data, id: web::Path, ) -> Result { - let collection = ctx.layer_db_ref().get_layer(id.into_inner()).await?; + let collection = ctx.layer_db_ref().get_layer(&id.into_inner()).await?; Ok(web::Json(collection)) } diff --git a/services/src/handlers/mod.rs b/services/src/handlers/mod.rs index c67c88a9f..9cde7c8f0 100644 --- a/services/src/handlers/mod.rs +++ b/services/src/handlers/mod.rs @@ -10,10 +10,10 @@ use std::fmt; use std::str::FromStr; pub mod datasets; -#[cfg(feature = "ebv")] -pub mod ebv; -#[cfg(feature = "nfdi")] -pub mod gfbio; +// #[cfg(feature = "ebv")] +// pub mod ebv; +// #[cfg(feature = "nfdi")] +// pub mod gfbio; pub mod layers; pub mod plots; pub mod projects; diff --git a/services/src/handlers/workflows.rs b/services/src/handlers/workflows.rs index 33f66b1a8..e7379c087 100755 --- a/services/src/handlers/workflows.rs +++ b/services/src/handlers/workflows.rs @@ -1,6 +1,6 @@ use std::collections::HashSet; -use crate::datasets::listing::DatasetProvider; +use crate::datasets::listing::{DatasetProvider, ProvenanceOutput}; use crate::datasets::storage::{AddDataset, DatasetDefinition, DatasetStore, MetaDataDefinition}; use crate::datasets::upload::{UploadId, UploadRootPath}; use crate::error; @@ -228,10 +228,11 @@ async fn get_workflow_provenance_handler( let datasets = workflow.operator.datasets(); let db = ctx.dataset_db_ref(); + let providers = ctx.layer_provider_db_ref(); let provenance: Vec<_> = datasets .iter() - .map(|id| db.provenance(&session, id)) + .map(|id| resolve_provenance::(&session, &db, &providers, &id)) .collect(); let provenance: Result> = join_all(provenance).await.into_iter().collect(); @@ -242,6 +243,13 @@ async fn get_workflow_provenance_handler( Ok(web::Json(provenance)) } +async fn resolve_provenance(session: &C::Session, datasets: &C::DatasetDB, providers: &C::LayerProviderDB, id: &DatasetId) -> Result { + match id { + DatasetId::Internal { dataset_id } => datasets.provenance(session, id).await, + DatasetId::External(_) => todo!(), + } +} + /// parameter for the dataset from workflow handler (body) #[derive(Clone, Debug, Deserialize, Serialize)] struct RasterDatasetFromWorkflow { diff --git a/services/src/layers/add_from_directory.rs b/services/src/layers/add_from_directory.rs index 33abc00f7..275cade7b 100644 --- a/services/src/layers/add_from_directory.rs +++ b/services/src/layers/add_from_directory.rs @@ -6,10 +6,10 @@ use std::{ path::PathBuf, }; -use crate::error::Result; use crate::layers::layer::{ - AddLayer, AddLayerCollection, LayerCollectionDefinition, LayerCollectionId, LayerDefinition, + AddLayer, AddLayerCollection, LayerCollectionDefinition, LayerDefinition, }; +use crate::{error::Result, layers::listing::LayerCollectionId}; use crate::{layers::storage::LayerDb, util::user_input::UserInput}; use log::{info, warn}; @@ -24,7 +24,7 @@ pub async fn add_layers_from_directory(layer_db: &mut L, file_path: layer_db .add_layer_with_id( - def.id, + &def.id, AddLayer { name: def.name, description: def.description, @@ -81,10 +81,10 @@ pub async fn add_layer_collections_from_directory(db: &mut L, file_p } .validated()?; - db.add_collection_with_id(def.id, collection).await?; + db.add_collection_with_id(&def.id, collection).await?; for layer in &def.layers { - db.add_layer_to_collection(*layer, def.id).await?; + db.add_layer_to_collection(layer, &def.id).await?; } Ok(()) @@ -140,7 +140,7 @@ pub async fn add_layer_collections_from_directory(db: &mut L, file_p for (parent, children) in collection_children { for child in children { - let op = db.add_collection_to_parent(child, parent).await; + let op = db.add_collection_to_parent(&child, &parent).await; if let Err(e) = op { warn!("Skipped adding child collection to db: {}", e); diff --git a/services/src/layers/external.rs b/services/src/layers/external.rs new file mode 100644 index 000000000..b9a2557aa --- /dev/null +++ b/services/src/layers/external.rs @@ -0,0 +1,74 @@ +use async_trait::async_trait; +use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; +use geoengine_datatypes::identifier; +use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; +use geoengine_operators::engine::{ + MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor, +}; +use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; +use geoengine_operators::source::{GdalLoadingInfo, OgrSourceDataset}; + +use crate::datasets::listing::ProvenanceOutput; +use crate::error::Result; + +use super::listing::LayerCollectionProvider; + +#[typetag::serde(tag = "type")] +#[async_trait] +pub trait ExternalLayerProviderDefinition: + CloneableLayerProviderDefinition + Send + Sync + std::fmt::Debug +{ + /// create the actual provider for data listing and access + async fn initialize(self: Box) -> Result>; + + /// the type of the provider + fn type_name(&self) -> String; + + /// name of the external data source + fn name(&self) -> String; + + /// id of the provider + fn id(&self) -> LayerProviderId; +} + +pub trait CloneableLayerProviderDefinition { + fn clone_boxed_provider(&self) -> Box; +} + +impl CloneableLayerProviderDefinition for T +where + T: 'static + ExternalLayerProviderDefinition + Clone, +{ + fn clone_boxed_provider(&self) -> Box { + Box::new(self.clone()) + } +} + +impl Clone for Box { + fn clone(&self) -> Box { + self.clone_boxed_provider() + } +} + +/// A provider of datasets that are not hosted by Geo Engine itself but some external party +// TODO: Authorization: the provider needs to accept credentials for the external data source. +// The credentials should be generic s.t. they are independent of the Session type and +// extensible to new provider types. E.g. a key-value map of strings where the provider +// checks that the necessary information is present and how they are incorporated in +// the requests. +#[async_trait] +pub trait ExternalLayerProvider: LayerCollectionProvider + + MetaDataProvider + + MetaDataProvider + + MetaDataProvider + + Send + + Sync + + std::fmt::Debug +{ + // TODO: datasetId should be named something else because there are not external datasets anymore, only external layers + // TODO: rename trait ProvenanceProvider or smth? + async fn provenance(&self, dataset: &DatasetId) -> Result; + + /// Propagates `Any`-casting to the underlying provider + fn as_any(&self) -> &dyn std::any::Any; +} diff --git a/services/src/layers/layer.rs b/services/src/layers/layer.rs index 8c00a7333..09aa269ee 100644 --- a/services/src/layers/layer.rs +++ b/services/src/layers/layer.rs @@ -1,16 +1,28 @@ use serde::{Deserialize, Serialize}; -use geoengine_datatypes::identifier; +use geoengine_datatypes::{dataset::LayerProviderId, identifier}; use crate::{ error::Result, projects::Symbology, util::user_input::UserInput, workflows::workflow::Workflow, }; -identifier!(LayerId); -identifier!(LayerCollectionId); +use super::listing::{LayerCollectionId, LayerId}; + +#[derive(Serialize, Deserialize, Clone)] +struct ProviderLayerId { + provider: LayerProviderId, + id: LayerId, +} + +#[derive(Serialize, Deserialize, Clone)] +struct ProviderLayerCollectionId { + provider: LayerProviderId, + id: LayerCollectionId, +} #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct Layer { + // TODO: add provider, also need a separate struct for import and API output pub id: LayerId, pub name: String, pub description: String, @@ -20,7 +32,8 @@ pub struct Layer { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct LayerListing { - pub id: LayerId, + pub provider: LayerProviderId, + pub layer: LayerId, pub name: String, pub description: String, } diff --git a/services/src/layers/listing.rs b/services/src/layers/listing.rs index ea62c6595..d3fca2834 100644 --- a/services/src/layers/listing.rs +++ b/services/src/layers/listing.rs @@ -1,15 +1,38 @@ +use std::fmt; + use async_trait::async_trait; +use geoengine_datatypes::dataset::DatasetId; use crate::util::user_input::Validated; use crate::{error::Result, workflows::workflow::Workflow}; -use super::layer::{CollectionItem, LayerCollectionId, LayerCollectionListOptions, LayerId}; +use super::layer::{CollectionItem, Layer, LayerCollectionListOptions}; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] +pub struct LayerId(pub String); + +impl fmt::Display for LayerId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] +pub struct LayerCollectionId(pub String); + +impl fmt::Display for LayerCollectionId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} #[async_trait] pub trait LayerCollectionProvider { async fn collection_items( &self, - collection: LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result>; @@ -18,5 +41,5 @@ pub trait LayerCollectionProvider { options: Validated, ) -> Result>; - async fn workflow(&self, layer: LayerId) -> Result; + async fn get_layer(&self, id: &LayerId) -> Result; } diff --git a/services/src/layers/mod.rs b/services/src/layers/mod.rs index 394345023..a8921a6ad 100644 --- a/services/src/layers/mod.rs +++ b/services/src/layers/mod.rs @@ -1,4 +1,5 @@ pub mod add_from_directory; +pub mod external; pub mod layer; pub mod listing; pub mod storage; diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index 46e893062..95f0d2a24 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -1,14 +1,18 @@ use std::collections::HashMap; +use super::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; use super::layer::{ - AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionId, - LayerCollectionListOptions, LayerCollectionListing, LayerId, LayerListing, + AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionListOptions, + LayerCollectionListing, LayerListing, }; -use super::listing::LayerCollectionProvider; +use super::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::error::Result; +use crate::util::user_input::UserInput; use crate::workflows::workflow::Workflow; use crate::{contexts::Db, util::user_input::Validated}; use async_trait::async_trait; +use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::identifier; use geoengine_datatypes::util::Identifier; use snafu::Snafu; @@ -20,17 +24,18 @@ pub enum LayerDbError { NoLayerForGivenId { id: LayerId }, } +pub const INTERNAL_PROVIDER_ID: LayerProviderId = + LayerProviderId::from_u128(0xce5e_84db_cbf9_48a2_9a32_d4b7_cc56_ea74); + #[async_trait] pub trait LayerDb: LayerCollectionProvider + Send + Sync { async fn add_layer(&self, layer: Validated) -> Result; - async fn add_layer_with_id(&self, id: LayerId, layer: Validated) -> Result<()>; - - async fn get_layer(&self, id: LayerId) -> Result; + async fn add_layer_with_id(&self, id: &LayerId, layer: Validated) -> Result<()>; async fn add_layer_to_collection( &self, - layer: LayerId, - collection: LayerCollectionId, + layer: &LayerId, + collection: &LayerCollectionId, ) -> Result<()>; async fn add_collection( @@ -38,17 +43,49 @@ pub trait LayerDb: LayerCollectionProvider + Send + Sync { collection: Validated, ) -> Result; + // TODO: remove once stable names are available async fn add_collection_with_id( &self, - id: LayerCollectionId, + id: &LayerCollectionId, collection: Validated, ) -> Result<()>; async fn add_collection_to_parent( &self, - collection: LayerCollectionId, - parent: LayerCollectionId, + collection: &LayerCollectionId, + parent: &LayerCollectionId, ) -> Result<()>; + + // TODO: share/remove/update +} + +pub struct LayerProviderListing {} + +#[derive(Debug, Clone)] +pub struct LayerProviderListingOptions {} + +impl UserInput for LayerProviderListingOptions { + fn validate(&self) -> Result<()> { + // TODO + Ok(()) + } +} + +#[async_trait] +pub trait LayerProviderDb: Send + Sync + 'static { + async fn add_layer_provider( + &self, + provider: Box, + ) -> Result; + + async fn list_layer_providers( + &self, + options: Validated, + ) -> Result>; + + async fn layer_provider(&self, id: LayerProviderId) -> Result>; + + // TODO: share/remove/update layer providers } #[derive(Default, Debug)] @@ -57,6 +94,7 @@ pub struct HashMapLayerDbBackend { collections: HashMap, collection_children: HashMap>, collection_layers: HashMap>, + external_providers: Db>>, } #[derive(Default, Debug)] @@ -67,51 +105,37 @@ pub struct HashMapLayerDb { #[async_trait] impl LayerDb for HashMapLayerDb { async fn add_layer(&self, layer: Validated) -> Result { - let id = LayerId::new(); + let id = LayerId(uuid::Uuid::new_v4().to_string()); self.backend .write() .await .layers - .insert(id, layer.user_input); + .insert(id.clone(), layer.user_input); Ok(id) } - async fn add_layer_with_id(&self, id: LayerId, layer: Validated) -> Result<()> { + async fn add_layer_with_id(&self, id: &LayerId, layer: Validated) -> Result<()> { self.backend .write() .await .layers - .insert(id, layer.user_input); + .insert(id.clone(), layer.user_input); Ok(()) } - async fn get_layer(&self, id: LayerId) -> Result { - let backend = self.backend.read().await; - - let layer = backend - .layers - .get(&id) - .ok_or(LayerDbError::NoLayerForGivenId { id })?; - - Ok(Layer { - id, - name: layer.name.clone(), - description: layer.description.clone(), - workflow: layer.workflow.clone(), - symbology: layer.symbology.clone(), - }) - } - async fn add_layer_to_collection( &self, - layer: LayerId, - collection: LayerCollectionId, + layer: &LayerId, + collection: &LayerCollectionId, ) -> Result<()> { let mut backend = self.backend.write().await; - let layers = backend.collection_layers.entry(collection).or_default(); + let layers = backend + .collection_layers + .entry(collection.clone()) + .or_default(); - if !layers.contains(&layer) { - layers.push(layer); + if !layers.contains(layer) { + layers.push(layer.clone()); } Ok(()) @@ -121,40 +145,43 @@ impl LayerDb for HashMapLayerDb { &self, collection: Validated, ) -> Result { - let id = LayerCollectionId::new(); + let id = LayerCollectionId(uuid::Uuid::new_v4().to_string()); self.backend .write() .await .collections - .insert(id, collection.user_input); + .insert(id.clone(), collection.user_input); Ok(id) } async fn add_collection_with_id( &self, - id: LayerCollectionId, + id: &LayerCollectionId, collection: Validated, ) -> Result<()> { self.backend .write() .await .collections - .insert(id, collection.user_input); + .insert(id.clone(), collection.user_input); Ok(()) } async fn add_collection_to_parent( &self, - collection: LayerCollectionId, - parent: LayerCollectionId, + collection: &LayerCollectionId, + parent: &LayerCollectionId, ) -> Result<()> { let mut backend = self.backend.write().await; - let children = backend.collection_children.entry(parent).or_default(); + let children = backend + .collection_children + .entry(parent.clone()) + .or_default(); - if !children.contains(&collection) { - children.push(collection); + if !children.contains(collection) { + children.push(collection.clone()); } Ok(()) @@ -165,7 +192,7 @@ impl LayerDb for HashMapLayerDb { impl LayerCollectionProvider for HashMapLayerDb { async fn collection_items( &self, - collection: LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result> { let options = options.user_input; @@ -185,7 +212,7 @@ impl LayerCollectionProvider for HashMapLayerDb { .get(c) .expect("collections reference existing collections as children"); CollectionItem::Collection(LayerCollectionListing { - id: *c, + id: c.clone(), name: collection.name.clone(), description: collection.description.clone(), }) @@ -205,7 +232,8 @@ impl LayerCollectionProvider for HashMapLayerDb { .expect("collections reference existing layers as items"); CollectionItem::Layer(LayerListing { - id: *l, + provider: INTERNAL_PROVIDER_ID, + layer: l.clone(), name: layer.name.clone(), description: layer.description.clone(), }) @@ -236,7 +264,7 @@ impl LayerCollectionProvider for HashMapLayerDb { } Some(CollectionItem::Collection(LayerCollectionListing { - id: *id, + id: id.clone(), name: c.name.clone(), description: c.description.clone(), })) @@ -252,7 +280,8 @@ impl LayerCollectionProvider for HashMapLayerDb { } Some(CollectionItem::Layer(LayerListing { - id: *id, + provider: INTERNAL_PROVIDER_ID, + layer: id.clone(), name: l.name.clone(), description: l.description.clone(), })) @@ -265,18 +294,107 @@ impl LayerCollectionProvider for HashMapLayerDb { .collect()) } - async fn workflow(&self, layer: LayerId) -> Result { + async fn get_layer(&self, id: &LayerId) -> Result { let backend = self.backend.read().await; let layer = backend .layers - .get(&layer) - .ok_or(LayerDbError::NoLayerForGivenId { id: layer })?; + .get(&id) + .ok_or(LayerDbError::NoLayerForGivenId { id: id.clone() })?; + + Ok(Layer { + id: id.clone(), + name: layer.name.clone(), + description: layer.description.clone(), + workflow: layer.workflow.clone(), + symbology: layer.symbology.clone(), + }) + } + + // async fn workflow(&self, layer: LayerId) -> Result { + // let backend = self.backend.read().await; + + // let layer = backend + // .layers + // .get(&layer) + // .ok_or(LayerDbError::NoLayerForGivenId { id: layer })?; - Ok(layer.workflow.clone()) + // Ok(layer.workflow.clone()) + // } +} + +#[derive(Default)] +pub struct HashMapLayerProviderDb { + external_providers: Db>>, +} + +#[async_trait] +impl LayerProviderDb for HashMapLayerProviderDb { + async fn add_layer_provider( + &self, + provider: Box, + ) -> Result { + let id = LayerProviderId::new(); + + self.external_providers.write().await.insert(id, provider); + + Ok(id) + } + + async fn list_layer_providers( + &self, + options: Validated, + ) -> Result> { + todo!() + } + + async fn layer_provider(&self, id: LayerProviderId) -> Result> { + todo!() } } +// #[async_trait] +// impl LayerCollectionProvider for HashMapLayerDb { +// async fn collection_items( +// &self, +// collection: LayerCollectionId, +// options: Validated, +// ) -> Result> { +// todo!() +// } + +// async fn root_collection_items( +// &self, +// _options: Validated, +// ) -> Result> { +// // TODO: use options + +// // on root level return one collection of every provider +// let backend = self.backend.read().await; + +// let result = [CollectionItem::Collection(LayerCollectionListing { +// id: INTERNAL_PROVIDER_ID.clone(), +// name: "Internal".to_string(), +// description: "Datasets managed by Geo Engine", +// })] +// .into_iter() +// .chain(backend.iter().map(|(id, provider)| { +// CollectionItem::Collection(LayerCollectionListing { +// id: id.clone(), +// name: provider.name(), +// description: provider.type_name(), +// }) +// })) +// .collect(); + +// Ok(result) +// } + +// async fn get_layer(&self, id: LayerId) -> Result { +// todo!() +// } +// } + #[cfg(test)] mod tests { use geoengine_datatypes::primitives::Coordinate2D; @@ -321,7 +439,7 @@ mod tests { .validated()?; let top_c_id = db.add_collection(collection).await?; - db.add_layer_to_collection(l_id, top_c_id).await?; + db.add_layer_to_collection(&l_id, &top_c_id).await?; let collection = AddLayerCollection { name: "empty collection".to_string(), @@ -331,11 +449,11 @@ mod tests { let empty_c_id = db.add_collection(collection).await?; - db.add_collection_to_parent(empty_c_id, top_c_id).await?; + db.add_collection_to_parent(&empty_c_id, &top_c_id).await?; let items = db .collection_items( - top_c_id, + &top_c_id, LayerCollectionListOptions { offset: 0, limit: 20, @@ -353,7 +471,8 @@ mod tests { description: "description".to_string() }), CollectionItem::Layer(LayerListing { - id: l_id, + provider: INTERNAL_PROVIDER_ID, + layer: l_id, name: "layer".to_string(), description: "description".to_string(), }) diff --git a/services/src/server.rs b/services/src/server.rs index 0056f65e9..3d8a2c482 100644 --- a/services/src/server.rs +++ b/services/src/server.rs @@ -113,16 +113,16 @@ where .configure(handlers::wms::init_wms_routes::) .configure(handlers::workflows::init_workflow_routes::); - #[cfg(feature = "ebv")] - { - app = app - .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); - } - - #[cfg(feature = "nfdi")] - { - app = app.configure(handlers::gfbio::init_gfbio_routes::); - } + // #[cfg(feature = "ebv")] + // { + // app = app + // .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); + // } + + // #[cfg(feature = "nfdi")] + // { + // app = app.configure(handlers::gfbio::init_gfbio_routes::); + // } if version_api { app = app.route("/version", web::get().to(show_version_handler)); } diff --git a/test_data/provider_defs/mock.json b/test_data/provider_defs/mock.json index 01ba231f2..5e4243b27 100644 --- a/test_data/provider_defs/mock.json +++ b/test_data/provider_defs/mock.json @@ -1,5 +1,5 @@ { - "type": "MockExternalDataProviderDefinition", + "type": "MockExternalLayerProviderDefinition", "id": "d0535f1d-27b6-4982-b2f8-b1070f1bf6ee", "datasets": [ { From ab7f92fd5df7e8dab82aac1f26d19bb05ee09402 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 22 Jun 2022 17:14:44 +0200 Subject: [PATCH 04/21] make layer listing and mock provider work again --- services/src/contexts/in_memory.rs | 10 +- services/src/datasets/add_from_directory.rs | 123 +------- services/src/datasets/external/mock.rs | 77 +++-- services/src/datasets/in_memory.rs | 4 +- services/src/datasets/listing.rs | 2 +- services/src/handlers/layers.rs | 100 ++++++- services/src/handlers/workflows.rs | 6 +- services/src/layers/add_from_directory.rs | 6 +- services/src/layers/external.rs | 1 - services/src/layers/layer.rs | 25 +- services/src/layers/listing.rs | 12 +- services/src/layers/storage.rs | 283 +++++++++++------- .../datasets/external/sentinel_s2_l2a_cogs.rs | 2 +- services/src/pro/layers/postgres_layer_db.rs | 1 - 14 files changed, 354 insertions(+), 298 deletions(-) diff --git a/services/src/contexts/in_memory.rs b/services/src/contexts/in_memory.rs index dacc9b67a..f003d5f64 100644 --- a/services/src/contexts/in_memory.rs +++ b/services/src/contexts/in_memory.rs @@ -62,21 +62,21 @@ impl InMemoryContext { exe_ctx_tiling_spec: TilingSpecification, query_ctx_chunk_size: ChunkByteSize, ) -> Self { - let mut layer_db = HashMapLayerDb::default(); + let mut layer_db = HashMapLayerDb::new(); add_layers_from_directory(&mut layer_db, layer_defs_path).await; add_layer_collections_from_directory(&mut layer_db, layer_collection_defs_path).await; let mut dataset_db = HashMapDatasetDb::default(); - add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; + add_datasets_from_directory(&mut dataset_db, dataset_defs_path).await; - // TODO: load providers from directory - // add_providers_from_directory(&mut dataset_db, provider_defs_path).await; + let mut layer_proivder_db = HashMapLayerProviderDb::default(); + add_providers_from_directory(&mut layer_proivder_db, provider_defs_path).await; Self { project_db: Default::default(), workflow_registry: Default::default(), layer_db: Arc::new(layer_db), - layer_provider_db: Arc::new(HashMapLayerProviderDb::default()), + layer_provider_db: Arc::new(layer_proivder_db), session: Default::default(), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, diff --git a/services/src/datasets/add_from_directory.rs b/services/src/datasets/add_from_directory.rs index 0b7aa262a..5b81eaa27 100644 --- a/services/src/datasets/add_from_directory.rs +++ b/services/src/datasets/add_from_directory.rs @@ -5,129 +5,33 @@ use std::{ path::PathBuf, }; -use crate::datasets::storage::MetaDataDefinition; use crate::error::Result; use crate::layers::external::ExternalLayerProviderDefinition; -use crate::layers::layer::{AddLayer, AddLayerCollection}; -use crate::layers::listing::LayerCollectionId; -use crate::layers::storage::{LayerDb, LayerProviderDb}; +use crate::layers::storage::LayerProviderDb; use crate::util::user_input::UserInput; -use crate::workflows::workflow::Workflow; use crate::{contexts::MockableSession, datasets::storage::DatasetDb}; use super::storage::DatasetDefinition; -use geoengine_datatypes::dataset::DatasetId; -use geoengine_operators::engine::{RasterOperator, TypedOperator, VectorOperator}; -use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; -use geoengine_operators::source::{ - GdalSource, GdalSourceParameters, OgrSource, OgrSourceParameters, -}; use log::warn; -const DATASET_LAYER_COLLECTION_ID: &str = "82825554-6b41-41e8-91c7-e562162c2a08"; - -pub async fn add_dataset_layer_collection(layer_db: &mut L) -> Result<()> { - let collection = AddLayerCollection { - name: "Datasets".to_string(), - description: "Available datasets".to_string(), - } - .validated()?; - - layer_db - .add_collection_with_id( - &LayerCollectionId(DATASET_LAYER_COLLECTION_ID.to_string()), - collection, - ) - .await?; - - Ok(()) -} - -pub async fn add_dataset_as_layer( - def: DatasetDefinition, - dataset: DatasetId, - layer_db: &mut L, -) -> Result<()> { - let workflow = match def.meta_data { - MetaDataDefinition::MockMetaData(_) => Workflow { - operator: TypedOperator::Vector( - MockDatasetDataSource { - params: MockDatasetDataSourceParams { dataset }, - } - .boxed(), - ), - }, - MetaDataDefinition::OgrMetaData(_) => Workflow { - operator: TypedOperator::Vector( - OgrSource { - params: OgrSourceParameters { - dataset, - attribute_projection: None, - attribute_filters: None, - }, - } - .boxed(), - ), - }, - MetaDataDefinition::GdalMetaDataRegular(_) - | MetaDataDefinition::GdalStatic(_) - | MetaDataDefinition::GdalMetadataNetCdfCf(_) - | MetaDataDefinition::GdalMetaDataList(_) => Workflow { - operator: TypedOperator::Raster( - GdalSource { - params: GdalSourceParameters { dataset }, - } - .boxed(), - ), - }, - }; - - let layer = AddLayer { - name: def.properties.name, - description: def.properties.description, - workflow, - symbology: def.properties.symbology, - } - .validated()?; - - let layer = layer_db.add_layer(layer).await?; - layer_db - .add_layer_to_collection( - &layer, - &LayerCollectionId(DATASET_LAYER_COLLECTION_ID.to_string()), - ) - .await?; - - Ok(()) -} - -pub async fn add_datasets_from_directory, L: LayerDb>( +pub async fn add_datasets_from_directory>( dataset_db: &mut D, - layer_db: &mut L, file_path: PathBuf, ) { - async fn add_dataset_definition_from_dir_entry< - S: MockableSession, - D: DatasetDb, - L: LayerDb, - >( + async fn add_dataset_definition_from_dir_entry>( db: &mut D, - layer_db: &mut L, entry: &DirEntry, ) -> Result<()> { let def: DatasetDefinition = serde_json::from_reader(BufReader::new(File::open(entry.path())?))?; - let id = db - .add_dataset( - &S::mock(), // TODO: find suitable way to add public dataset - def.properties.clone().validated()?, - db.wrap_meta_data(def.meta_data.clone()), - ) - .await?; // TODO: add as system user - - add_dataset_as_layer(def, id, layer_db).await?; + db.add_dataset( + &S::mock(), // TODO: find suitable way to add public dataset + def.properties.clone().validated()?, + db.wrap_meta_data(def.meta_data.clone()), + ) + .await?; // TODO: add as system user Ok(()) } @@ -139,16 +43,10 @@ pub async fn add_datasets_from_directory, L: } let dir = dir.expect("checked"); - add_dataset_layer_collection(layer_db) - .await - .expect("Adding dataset layer collection must work"); - for entry in dir { match entry { Ok(entry) if entry.path().extension() == Some(OsStr::new("json")) => { - if let Err(e) = - add_dataset_definition_from_dir_entry(dataset_db, layer_db, &entry).await - { + if let Err(e) = add_dataset_definition_from_dir_entry(dataset_db, &entry).await { warn!( "Skipped adding dataset from directory entry: {:?} error: {}", entry, @@ -163,6 +61,7 @@ pub async fn add_datasets_from_directory, L: } } +// TODO: move to layers source dir pub async fn add_providers_from_directory(db: &mut D, file_path: PathBuf) { async fn add_provider_definition_from_dir_entry( db: &mut D, diff --git a/services/src/datasets/external/mock.rs b/services/src/datasets/external/mock.rs index 0d905f1a6..5fcc5e2ad 100644 --- a/services/src/datasets/external/mock.rs +++ b/services/src/datasets/external/mock.rs @@ -1,27 +1,30 @@ -use std::collections::HashMap; - use crate::datasets::listing::ProvenanceOutput; +use crate::error::Result; use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; -use crate::layers::layer::{CollectionItem, Layer, LayerCollectionListOptions, LayerListing}; +use crate::layers::layer::{ + CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, +}; use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; -use crate::{datasets::listing::DatasetListOptions, error::Result}; +use crate::workflows::workflow::Workflow; use crate::{ - datasets::{ - listing::DatasetListing, - storage::{DatasetDefinition, MetaDataDefinition}, - }, + datasets::storage::{DatasetDefinition, MetaDataDefinition}, error, util::user_input::Validated, }; use async_trait::async_trait; use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; +use geoengine_operators::engine::{TypedOperator, VectorOperator}; +use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; use geoengine_operators::{ engine::{MetaData, MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor}, mock::MockDatasetDataSourceLoadingInfo, source::{GdalLoadingInfo, OgrSourceDataset}, }; use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +pub const ROOT_COLLECTION_ID: Uuid = Uuid::from_u128(0xd630_e723_63d4_440c_9e15_644c_400f_c7c1); #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MockExternalLayerProviderDefinition { @@ -88,23 +91,21 @@ impl LayerCollectionProvider for MockExternalDataProvider { _collection: &LayerCollectionId, _options: Validated, ) -> Result> { - Ok(vec![]) // TODO: throw error instead? - } + // TODO: use collection id + // TODO: use options - async fn root_collection_items( - &self, - _options: Validated, - ) -> Result> { let mut listing = vec![]; for dataset in &self.datasets { listing.push(Ok(CollectionItem::Layer(LayerListing { - provider: self.id, - layer: dataset - .properties - .id - .as_ref() - .ok_or(error::Error::MissingDatasetId) - .map(layer_id_from_dataset_id)?, + id: ProviderLayerId { + provider: self.id, + item: dataset + .properties + .id + .as_ref() + .ok_or(error::Error::MissingDatasetId) + .map(layer_id_from_dataset_id)?, + }, name: dataset.properties.name.clone(), description: dataset.properties.description.clone(), }))); @@ -116,6 +117,10 @@ impl LayerCollectionProvider for MockExternalDataProvider { .collect()) } + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId(ROOT_COLLECTION_ID.to_string())) + } + async fn get_layer(&self, id: &LayerId) -> Result { self.datasets .iter() @@ -128,12 +133,30 @@ impl LayerCollectionProvider for MockExternalDataProvider { == Some(id) }) .ok_or(error::Error::UnknownDatasetId) - .map(|d| Layer { - id: id.clone(), - name: d.properties.name.clone(), - description: d.properties.description.clone(), - workflow: todo!(), - symbology: d.properties.symbology.clone(), + .and_then(|d| { + Ok(Layer { + id: ProviderLayerId { + provider: self.id, + item: id.clone(), + }, + name: d.properties.name.clone(), + description: d.properties.description.clone(), + workflow: Workflow { + operator: TypedOperator::Vector( + MockDatasetDataSource { + params: MockDatasetDataSourceParams { + dataset: d + .properties + .id + .clone() + .ok_or(error::Error::MissingDatasetId)?, + }, + } + .boxed(), + ), + }, + symbology: d.properties.symbology.clone(), + }) }) } } diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index 383ef8002..f1e25418e 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -245,7 +245,7 @@ impl DatasetProvider for HashMapDatasetDb { async fn provenance( &self, - session: &SimpleSession, + _session: &SimpleSession, dataset: &DatasetId, ) -> Result { match dataset { @@ -261,7 +261,7 @@ impl DatasetProvider for HashMapDatasetDb { provenance: d.provenance.clone(), }) .ok_or(error::Error::UnknownDatasetId), - DatasetId::External(id) => { + DatasetId::External(_id) => { todo!() // TODO: throw error } } diff --git a/services/src/datasets/listing.rs b/services/src/datasets/listing.rs index 48de84fd7..7171d10d3 100644 --- a/services/src/datasets/listing.rs +++ b/services/src/datasets/listing.rs @@ -9,7 +9,7 @@ use async_trait::async_trait; use geoengine_datatypes::dataset::DatasetId; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{ - MetaData, MetaDataProvider, RasterResultDescriptor, ResultDescriptor, TypedResultDescriptor, + MetaData, RasterResultDescriptor, ResultDescriptor, TypedResultDescriptor, VectorResultDescriptor, }; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; diff --git a/services/src/handlers/layers.rs b/services/src/handlers/layers.rs index a0f0f2a52..c3db81a70 100644 --- a/services/src/handlers/layers.rs +++ b/services/src/handlers/layers.rs @@ -1,9 +1,11 @@ use actix_web::{web, FromRequest, Responder}; +use geoengine_datatypes::dataset::LayerProviderId; use crate::error::Result; +use crate::layers::layer::{CollectionItem, LayerCollectionListing, ProviderLayerCollectionId}; use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; -use crate::layers::storage::LayerDb; +use crate::layers::storage::{LayerProviderDb, LayerProviderListingOptions}; use crate::util::user_input::UserInput; use crate::{contexts::Context, layers::layer::LayerCollectionListOptions}; @@ -12,33 +14,86 @@ where C: Context, C::Session: FromRequest, { - cfg.service(web::resource("/layers").route(web::get().to(list_root_collections_handler::))) - // TODO: add provider as param - .service(web::resource("/layers/{id}").route(web::get().to(list_collection_handler::))) - // TODO: add provider as param - .service(web::resource("/layer/{id}").route(web::get().to(layer_handler::))); + cfg.service( + web::resource("/layers/collections") + .route(web::get().to(list_root_collections_handler::)), + ) + .service( + web::resource("/layers/collections/{provider}/{item}") + .route(web::get().to(list_collection_handler::)), + ) + .service(web::resource("/layers/{provider}/{item}").route(web::get().to(layer_handler::))); } async fn list_root_collections_handler( ctx: web::Data, options: web::Query, ) -> Result { - let db = ctx.layer_db_ref(); - let collection = db - .root_collection_items(options.into_inner().validated()?) - .await?; + let mut providers = vec![]; - Ok(web::Json(collection)) + // TODO: add dataset db as provider + + if options.offset == 0 && options.limit > 0 { + providers.push(CollectionItem::Collection(LayerCollectionListing { + id: ProviderLayerCollectionId { + provider: crate::layers::storage::INTERNAL_PROVIDER_ID, + item: LayerCollectionId( + crate::layers::storage::INTERNAL_LAYER_DB_ROOT_COLLECTION_ID.to_string(), + ), + }, + name: "Layers".to_string(), + description: "All available Geo Engine layers".to_string(), + })); + } + + let external = ctx.layer_provider_db_ref(); + + for provider_listing in external + .list_layer_providers( + LayerProviderListingOptions { + offset: options.offset, + limit: options.limit, + } + .validated()?, + ) + .await? + { + // TODO: resolve providers in parallel + let provider = external.layer_provider(provider_listing.id).await?; + providers.push(CollectionItem::Collection(LayerCollectionListing { + id: ProviderLayerCollectionId { + provider: provider_listing.id, + item: provider.root_collection_id().await?, + }, + name: provider_listing.name, + description: provider_listing.description, + })); + } + + Ok(web::Json(providers)) } async fn list_collection_handler( ctx: web::Data, - id: web::Path, + path: web::Path<(LayerProviderId, LayerCollectionId)>, options: web::Query, ) -> Result { + let (provider, item) = path.into_inner(); + + if provider == crate::layers::storage::INTERNAL_PROVIDER_ID { + let collection = ctx + .layer_db_ref() + .collection_items(&item, options.into_inner().validated()?) + .await?; + + return Ok(web::Json(collection)); + } + let collection = ctx - .layer_db_ref() - .collection_items(&id.into_inner(), options.into_inner().validated()?) + .layer_provider_db_ref() + .layer_provider(provider) + .await? + .collection_items(&item, options.into_inner().validated()?) .await?; Ok(web::Json(collection)) @@ -46,9 +101,22 @@ async fn list_collection_handler( async fn layer_handler( ctx: web::Data, - id: web::Path, + path: web::Path<(LayerProviderId, LayerId)>, ) -> Result { - let collection = ctx.layer_db_ref().get_layer(&id.into_inner()).await?; + let (provider, item) = path.into_inner(); + + if provider == crate::layers::storage::INTERNAL_PROVIDER_ID { + let collection = ctx.layer_db_ref().get_layer(&item).await?; + + return Ok(web::Json(collection)); + } + + let collection = ctx + .layer_provider_db_ref() + .layer_provider(provider) + .await? + .get_layer(&item) + .await?; Ok(web::Json(collection)) } diff --git a/services/src/handlers/workflows.rs b/services/src/handlers/workflows.rs index e7379c087..4370d7778 100755 --- a/services/src/handlers/workflows.rs +++ b/services/src/handlers/workflows.rs @@ -232,7 +232,7 @@ async fn get_workflow_provenance_handler( let provenance: Vec<_> = datasets .iter() - .map(|id| resolve_provenance::(&session, &db, &providers, &id)) + .map(|id| resolve_provenance::(&session, db, providers, id)) .collect(); let provenance: Result> = join_all(provenance).await.into_iter().collect(); @@ -243,9 +243,9 @@ async fn get_workflow_provenance_handler( Ok(web::Json(provenance)) } -async fn resolve_provenance(session: &C::Session, datasets: &C::DatasetDB, providers: &C::LayerProviderDB, id: &DatasetId) -> Result { +async fn resolve_provenance(session: &C::Session, datasets: &C::DatasetDB, _providers: &C::LayerProviderDB, id: &DatasetId) -> Result { match id { - DatasetId::Internal { dataset_id } => datasets.provenance(session, id).await, + DatasetId::Internal { dataset_id: _ } => datasets.provenance(session, id).await, DatasetId::External(_) => todo!(), } } diff --git a/services/src/layers/add_from_directory.rs b/services/src/layers/add_from_directory.rs index 275cade7b..44c8ae361 100644 --- a/services/src/layers/add_from_directory.rs +++ b/services/src/layers/add_from_directory.rs @@ -22,6 +22,7 @@ pub async fn add_layers_from_directory(layer_db: &mut L, file_path: let def: LayerDefinition = serde_json::from_reader(BufReader::new(File::open(entry.path())?))?; + // TODO: only add layer to root collection that are not contained in any other collection layer_db .add_layer_with_id( &def.id, @@ -32,6 +33,7 @@ pub async fn add_layers_from_directory(layer_db: &mut L, file_path: symbology: def.symbology, } .validated()?, + &layer_db.root_collection_id().await?, ) .await?; @@ -81,7 +83,9 @@ pub async fn add_layer_collections_from_directory(db: &mut L, file_p } .validated()?; - db.add_collection_with_id(&def.id, collection).await?; + // TODO: add only collections that aren't contained in any other collection to the root collection? + db.add_collection_with_id(&def.id, collection, &db.root_collection_id().await?) + .await?; for layer in &def.layers { db.add_layer_to_collection(layer, &def.id).await?; diff --git a/services/src/layers/external.rs b/services/src/layers/external.rs index b9a2557aa..4913fef5d 100644 --- a/services/src/layers/external.rs +++ b/services/src/layers/external.rs @@ -1,6 +1,5 @@ use async_trait::async_trait; use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; -use geoengine_datatypes::identifier; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{ MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor, diff --git a/services/src/layers/layer.rs b/services/src/layers/layer.rs index 09aa269ee..e9d287b59 100644 --- a/services/src/layers/layer.rs +++ b/services/src/layers/layer.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use geoengine_datatypes::{dataset::LayerProviderId, identifier}; +use geoengine_datatypes::dataset::LayerProviderId; use crate::{ error::Result, projects::Symbology, util::user_input::UserInput, workflows::workflow::Workflow, @@ -8,22 +8,22 @@ use crate::{ use super::listing::{LayerCollectionId, LayerId}; -#[derive(Serialize, Deserialize, Clone)] -struct ProviderLayerId { - provider: LayerProviderId, - id: LayerId, +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ProviderLayerId { + pub provider: LayerProviderId, + pub item: LayerId, } -#[derive(Serialize, Deserialize, Clone)] -struct ProviderLayerCollectionId { - provider: LayerProviderId, - id: LayerCollectionId, +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] +pub struct ProviderLayerCollectionId { + pub provider: LayerProviderId, + pub item: LayerCollectionId, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct Layer { // TODO: add provider, also need a separate struct for import and API output - pub id: LayerId, + pub id: ProviderLayerId, pub name: String, pub description: String, pub workflow: Workflow, @@ -32,8 +32,7 @@ pub struct Layer { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct LayerListing { - pub provider: LayerProviderId, - pub layer: LayerId, + pub id: ProviderLayerId, pub name: String, pub description: String, } @@ -72,7 +71,7 @@ pub struct LayerCollection { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct LayerCollectionListing { - pub id: LayerCollectionId, + pub id: ProviderLayerCollectionId, pub name: String, pub description: String, } diff --git a/services/src/layers/listing.rs b/services/src/layers/listing.rs index d3fca2834..8a4394f9c 100644 --- a/services/src/layers/listing.rs +++ b/services/src/layers/listing.rs @@ -1,10 +1,9 @@ use std::fmt; use async_trait::async_trait; -use geoengine_datatypes::dataset::DatasetId; +use crate::error::Result; use crate::util::user_input::Validated; -use crate::{error::Result, workflows::workflow::Workflow}; use super::layer::{CollectionItem, Layer, LayerCollectionListOptions}; @@ -29,17 +28,18 @@ impl fmt::Display for LayerCollectionId { } #[async_trait] +/// Listing of layers and layer collections pub trait LayerCollectionProvider { + /// list all the items in the given `collection` async fn collection_items( &self, collection: &LayerCollectionId, options: Validated, ) -> Result>; - async fn root_collection_items( - &self, - options: Validated, - ) -> Result>; + /// get the id of the root collection + async fn root_collection_id(&self) -> Result; + /// get the full contents of the layer with the given `id` async fn get_layer(&self, id: &LayerId) -> Result; } diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index 95f0d2a24..f6919a86b 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -1,20 +1,21 @@ use std::collections::HashMap; +use std::sync::Arc; use super::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; use super::layer::{ AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionListOptions, - LayerCollectionListing, LayerListing, + LayerCollectionListing, LayerListing, ProviderLayerCollectionId, ProviderLayerId, }; use super::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; -use crate::error::Result; +use crate::error::{Error, Result}; use crate::util::user_input::UserInput; -use crate::workflows::workflow::Workflow; use crate::{contexts::Db, util::user_input::Validated}; use async_trait::async_trait; use geoengine_datatypes::dataset::LayerProviderId; -use geoengine_datatypes::identifier; -use geoengine_datatypes::util::Identifier; +use serde::{Deserialize, Serialize}; use snafu::Snafu; +use tokio::sync::RwLock; +use uuid::Uuid; #[derive(Debug, Snafu)] #[snafu(visibility(pub(crate)))] @@ -27,29 +28,53 @@ pub enum LayerDbError { pub const INTERNAL_PROVIDER_ID: LayerProviderId = LayerProviderId::from_u128(0xce5e_84db_cbf9_48a2_9a32_d4b7_cc56_ea74); +pub const INTERNAL_LAYER_DB_ROOT_COLLECTION_ID: Uuid = + Uuid::from_u128(0x0510_2bb3_a855_4a37_8a8a_3002_6a91_fef1); + #[async_trait] +/// Storage for layers and layer collections pub trait LayerDb: LayerCollectionProvider + Send + Sync { - async fn add_layer(&self, layer: Validated) -> Result; - async fn add_layer_with_id(&self, id: &LayerId, layer: Validated) -> Result<()>; + /// add new `layer` to the given `collection` + async fn add_layer( + &self, + layer: Validated, + collection: &LayerCollectionId, + ) -> Result; + /// add new `layer` with fixed `id` to the given `collection` + /// TODO: remove this method and allow stable names instead + async fn add_layer_with_id( + &self, + id: &LayerId, + layer: Validated, + collection: &LayerCollectionId, + ) -> Result<()>; + + /// add existing `layer` to the given `collection` async fn add_layer_to_collection( &self, layer: &LayerId, collection: &LayerCollectionId, ) -> Result<()>; + /// add new `collection` to the given `parent` + // TODO: remove once stable names are available async fn add_collection( &self, collection: Validated, + parent: &LayerCollectionId, ) -> Result; + /// add new `collection` with fixex `id` to the given `parent` // TODO: remove once stable names are available async fn add_collection_with_id( &self, id: &LayerCollectionId, collection: Validated, + parent: &LayerCollectionId, ) -> Result<()>; + /// add existing `collection` to given `parent` async fn add_collection_to_parent( &self, collection: &LayerCollectionId, @@ -59,10 +84,16 @@ pub trait LayerDb: LayerCollectionProvider + Send + Sync { // TODO: share/remove/update } -pub struct LayerProviderListing {} - -#[derive(Debug, Clone)] -pub struct LayerProviderListingOptions {} +pub struct LayerProviderListing { + pub id: LayerProviderId, + pub name: String, + pub description: String, +} +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LayerProviderListingOptions { + pub offset: u32, + pub limit: u32, +} impl UserInput for LayerProviderListingOptions { fn validate(&self) -> Result<()> { @@ -94,32 +125,69 @@ pub struct HashMapLayerDbBackend { collections: HashMap, collection_children: HashMap>, collection_layers: HashMap>, - external_providers: Db>>, } -#[derive(Default, Debug)] +#[derive(Debug)] pub struct HashMapLayerDb { backend: Db, } +impl HashMapLayerDb { + pub fn new() -> Self { + let mut backend = HashMapLayerDbBackend::default(); + + backend.collections.insert( + LayerCollectionId(INTERNAL_LAYER_DB_ROOT_COLLECTION_ID.to_string()), + AddLayerCollection { + name: "LayerDB".to_string(), + description: "Root collection for LayerDB".to_string(), + }, + ); + + Self { + backend: Arc::new(RwLock::new(backend)), + } + } +} + +impl Default for HashMapLayerDb { + fn default() -> Self { + Self::new() + } +} + #[async_trait] impl LayerDb for HashMapLayerDb { - async fn add_layer(&self, layer: Validated) -> Result { + async fn add_layer( + &self, + layer: Validated, + collection: &LayerCollectionId, + ) -> Result { let id = LayerId(uuid::Uuid::new_v4().to_string()); - self.backend - .write() - .await - .layers - .insert(id.clone(), layer.user_input); + + let mut backend = self.backend.write().await; + backend.layers.insert(id.clone(), layer.user_input); + backend + .collection_layers + .entry(collection.clone()) + .or_default() + .push(id.clone()); Ok(id) } - async fn add_layer_with_id(&self, id: &LayerId, layer: Validated) -> Result<()> { - self.backend - .write() - .await - .layers - .insert(id.clone(), layer.user_input); + async fn add_layer_with_id( + &self, + id: &LayerId, + layer: Validated, + collection: &LayerCollectionId, + ) -> Result<()> { + let mut backend = self.backend.write().await; + backend.layers.insert(id.clone(), layer.user_input); + backend + .collection_layers + .entry(collection.clone()) + .or_default() + .push(id.clone()); Ok(()) } @@ -144,14 +212,19 @@ impl LayerDb for HashMapLayerDb { async fn add_collection( &self, collection: Validated, + parent: &LayerCollectionId, ) -> Result { let id = LayerCollectionId(uuid::Uuid::new_v4().to_string()); - self.backend - .write() - .await + let mut backend = self.backend.write().await; + backend .collections .insert(id.clone(), collection.user_input); + backend + .collection_children + .entry(parent.clone()) + .or_default() + .push(id.clone()); Ok(id) } @@ -160,12 +233,18 @@ impl LayerDb for HashMapLayerDb { &self, id: &LayerCollectionId, collection: Validated, + parent: &LayerCollectionId, ) -> Result<()> { - self.backend - .write() - .await + let mut backend = self.backend.write().await; + backend .collections .insert(id.clone(), collection.user_input); + backend + .collection_children + .entry(parent.clone()) + .or_default() + .push(id.clone()); + Ok(()) } @@ -203,7 +282,7 @@ impl LayerCollectionProvider for HashMapLayerDb { let collections = backend .collection_children - .get(&collection) + .get(collection) .unwrap_or(&empty) .iter() .map(|c| { @@ -212,7 +291,10 @@ impl LayerCollectionProvider for HashMapLayerDb { .get(c) .expect("collections reference existing collections as children"); CollectionItem::Collection(LayerCollectionListing { - id: c.clone(), + id: ProviderLayerCollectionId { + provider: INTERNAL_PROVIDER_ID, + item: c.clone(), + }, name: collection.name.clone(), description: collection.description.clone(), }) @@ -222,7 +304,7 @@ impl LayerCollectionProvider for HashMapLayerDb { let layers = backend .collection_layers - .get(&collection) + .get(collection) .unwrap_or(&empty) .iter() .map(|l| { @@ -232,8 +314,10 @@ impl LayerCollectionProvider for HashMapLayerDb { .expect("collections reference existing layers as items"); CollectionItem::Layer(LayerListing { - provider: INTERNAL_PROVIDER_ID, - layer: l.clone(), + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: l.clone(), + }, name: layer.name.clone(), description: layer.description.clone(), }) @@ -246,52 +330,10 @@ impl LayerCollectionProvider for HashMapLayerDb { .collect()) } - async fn root_collection_items( - &self, - options: Validated, - ) -> Result> { - let options = options.user_input; - - let backend = self.backend.read().await; - - let collections = backend.collections.iter().filter_map(|(id, c)| { - if backend - .collection_children - .values() - .any(|collections| collections.contains(id)) - { - return None; - } - - Some(CollectionItem::Collection(LayerCollectionListing { - id: id.clone(), - name: c.name.clone(), - description: c.description.clone(), - })) - }); - - let layers = backend.layers.iter().filter_map(|(id, l)| { - if backend - .collection_layers - .values() - .any(|layers| layers.contains(id)) - { - return None; - } - - Some(CollectionItem::Layer(LayerListing { - provider: INTERNAL_PROVIDER_ID, - layer: id.clone(), - name: l.name.clone(), - description: l.description.clone(), - })) - }); - - Ok(collections - .chain(layers) - .skip(options.offset as usize) - .take(options.limit as usize) - .collect()) + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId( + INTERNAL_LAYER_DB_ROOT_COLLECTION_ID.to_string(), + )) } async fn get_layer(&self, id: &LayerId) -> Result { @@ -299,28 +341,20 @@ impl LayerCollectionProvider for HashMapLayerDb { let layer = backend .layers - .get(&id) + .get(id) .ok_or(LayerDbError::NoLayerForGivenId { id: id.clone() })?; Ok(Layer { - id: id.clone(), + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: id.clone(), + }, name: layer.name.clone(), description: layer.description.clone(), workflow: layer.workflow.clone(), symbology: layer.symbology.clone(), }) } - - // async fn workflow(&self, layer: LayerId) -> Result { - // let backend = self.backend.read().await; - - // let layer = backend - // .layers - // .get(&layer) - // .ok_or(LayerDbError::NoLayerForGivenId { id: layer })?; - - // Ok(layer.workflow.clone()) - // } } #[derive(Default)] @@ -334,7 +368,7 @@ impl LayerProviderDb for HashMapLayerProviderDb { &self, provider: Box, ) -> Result { - let id = LayerProviderId::new(); + let id = provider.id(); self.external_providers.write().await.insert(id, provider); @@ -345,11 +379,39 @@ impl LayerProviderDb for HashMapLayerProviderDb { &self, options: Validated, ) -> Result> { - todo!() + let options = options.user_input; + + let mut listing = self + .external_providers + .read() + .await + .iter() + .map(|(id, provider)| LayerProviderListing { + id: *id, + name: provider.name(), + description: provider.type_name(), + }) + .collect::>(); + + // TODO: sort option + listing.sort_by(|a, b| a.name.cmp(&b.name)); + + Ok(listing + .into_iter() + .skip(options.offset as usize) + .take(options.limit as usize) + .collect()) } async fn layer_provider(&self, id: LayerProviderId) -> Result> { - todo!() + self.external_providers + .read() + .await + .get(&id) + .cloned() + .ok_or(Error::UnknownProviderId)? + .initialize() + .await } } @@ -403,7 +465,7 @@ mod tests { mock::{MockPointSource, MockPointSourceParams}, }; - use crate::{util::user_input::UserInput, workflows::workflow::WorkflowId}; + use crate::util::user_input::UserInput; use super::*; @@ -411,8 +473,6 @@ mod tests { async fn it_stores_layers() -> Result<()> { let db = HashMapLayerDb::default(); - let _workflow_id = WorkflowId::new(); - let layer = AddLayer { name: "layer".to_string(), description: "description".to_string(), @@ -430,7 +490,9 @@ mod tests { } .validated()?; - let l_id = db.add_layer(layer).await?; + let root_collection = &db.root_collection_id().await?; + + let l_id = db.add_layer(layer, root_collection).await?; let collection = AddLayerCollection { name: "top collection".to_string(), @@ -438,7 +500,7 @@ mod tests { } .validated()?; - let top_c_id = db.add_collection(collection).await?; + let top_c_id = db.add_collection(collection, root_collection).await?; db.add_layer_to_collection(&l_id, &top_c_id).await?; let collection = AddLayerCollection { @@ -447,9 +509,7 @@ mod tests { } .validated()?; - let empty_c_id = db.add_collection(collection).await?; - - db.add_collection_to_parent(&empty_c_id, &top_c_id).await?; + let empty_c_id = db.add_collection(collection, &top_c_id).await?; let items = db .collection_items( @@ -466,13 +526,18 @@ mod tests { items, vec![ CollectionItem::Collection(LayerCollectionListing { - id: empty_c_id, + id: ProviderLayerCollectionId { + provider: INTERNAL_PROVIDER_ID, + item: empty_c_id, + }, name: "empty collection".to_string(), description: "description".to_string() }), CollectionItem::Layer(LayerListing { - provider: INTERNAL_PROVIDER_ID, - layer: l_id, + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: l_id, + }, name: "layer".to_string(), description: "description".to_string(), }) diff --git a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs index 00921310f..f3b0cb1ba 100644 --- a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs +++ b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs @@ -582,7 +582,7 @@ impl MetaDataProvider Date: Thu, 23 Jun 2022 18:32:31 +0200 Subject: [PATCH 05/21] implement layer provider for dataset db --- services/src/datasets/in_memory.rs | 116 ++++++++++++++++++++++++++++- services/src/datasets/storage.rs | 13 +++- services/src/error.rs | 4 + services/src/handlers/layers.rs | 34 ++++++++- services/src/layers/storage.rs | 2 +- 5 files changed, 163 insertions(+), 6 deletions(-) diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index f1e25418e..73ece01ff 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -3,21 +3,31 @@ use crate::datasets::listing::{DatasetListOptions, DatasetListing, DatasetProvid use crate::datasets::storage::{AddDataset, Dataset, DatasetDb, DatasetStore, DatasetStorer}; use crate::error; use crate::error::Result; +use crate::layers::layer::{ + CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, +}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::util::user_input::Validated; +use crate::workflows::workflow::Workflow; use async_trait::async_trait; use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_datatypes::util::Identifier; use geoengine_operators::engine::{ - MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, + MetaData, RasterOperator, RasterResultDescriptor, StaticMetaData, TypedOperator, + TypedResultDescriptor, VectorOperator, VectorResultDescriptor, }; +use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; use geoengine_operators::source::{ - GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, OgrSourceDataset, + GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, GdalSource, + GdalSourceParameters, OgrSource, OgrSourceDataset, OgrSourceParameters, }; use geoengine_operators::{mock::MockDatasetDataSourceLoadingInfo, source::GdalMetaDataStatic}; use std::collections::HashMap; +use std::str::FromStr; use super::listing::ProvenanceOutput; +use super::storage::{DATASET_DB_LAYER_PROVIDER_ID, DATASET_DB_ROOT_COLLECTION_ID}; use super::{ listing::SessionMetaDataProvider, storage::MetaDataDefinition, @@ -387,6 +397,108 @@ impl UploadDb for HashMapDatasetDb { } } +#[async_trait] +impl LayerCollectionProvider for HashMapDatasetDb { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + options: Validated, + ) -> Result> { + // TODO: check collection id + + let options = options.user_input; + + let backend = self.backend.read().await; + + let listing = backend + .datasets + .iter() + .skip(options.offset as usize) + .take(options.limit as usize) + .map(|d| { + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: DATASET_DB_LAYER_PROVIDER_ID, + // use the dataset id also as layer id, TODO: maybe prefix it? + item: LayerId( + d.id.internal() + .expect("Dataset DB contains only internal datasets") + .to_string(), + ), + }, + name: d.name.clone(), + description: d.description.clone(), + }) + }) + .collect(); + + Ok(listing) + } + + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId(DATASET_DB_ROOT_COLLECTION_ID.to_string())) + } + + async fn get_layer(&self, id: &LayerId) -> Result { + let dataset_id = DatasetId::Internal { + dataset_id: InternalDatasetId::from_str(&id.0)?, + }; + + let backend = self.backend.read().await; + + let dataset = backend + .datasets + .iter() + .find(|d| d.id == dataset_id) + .ok_or(error::Error::UnknownDatasetId)?; + + let operator = match dataset.source_operator.as_str() { + "OgrSource" => TypedOperator::Vector( + OgrSource { + params: OgrSourceParameters { + dataset: dataset.id.clone(), + attribute_projection: None, + attribute_filters: None, + }, + } + .boxed(), + ), + "GdalSource" => TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: dataset.id.clone(), + }, + } + .boxed(), + ), + "MockDatasetDataSource" => TypedOperator::Vector( + MockDatasetDataSource { + params: MockDatasetDataSourceParams { + dataset: dataset.id.clone(), + }, + } + .boxed(), + ), + s => { + return Err(crate::error::Error::UnknownOperator { + operator: s.to_owned(), + }) + } + }; + + Ok(Layer { + id: ProviderLayerId { + provider: DATASET_DB_LAYER_PROVIDER_ID, + item: id.clone(), + }, + name: dataset.name.clone(), + description: dataset.description.clone(), + workflow: Workflow { operator }, + symbology: dataset.symbology.clone(), + }) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/services/src/datasets/storage.rs b/services/src/datasets/storage.rs index 787a98ab8..dfb602c25 100644 --- a/services/src/datasets/storage.rs +++ b/services/src/datasets/storage.rs @@ -4,10 +4,11 @@ use crate::datasets::upload::UploadDb; use crate::datasets::upload::UploadId; use crate::error; use crate::error::Result; +use crate::layers::listing::LayerCollectionProvider; use crate::projects::Symbology; use crate::util::user_input::{UserInput, Validated}; use async_trait::async_trait; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_operators::engine::MetaData; use geoengine_operators::source::{GdalMetaDataList, GdalMetadataNetCdfCf}; @@ -20,9 +21,17 @@ use geoengine_operators::{engine::VectorResultDescriptor, source::GdalMetaDataRe use serde::{Deserialize, Serialize}; use snafu::{ensure, ResultExt}; use std::fmt::Debug; +use uuid::Uuid; use super::listing::Provenance; +// TODO: where to put these constants? +pub const DATASET_DB_LAYER_PROVIDER_ID: LayerProviderId = + LayerProviderId::from_u128(0xac50_ed0d_c9a0_41f8_9ce8_35fc_9e38_299b); + +pub const DATASET_DB_ROOT_COLLECTION_ID: Uuid = + Uuid::from_u128(0x5460_73b6_d535_4205_b601_9967_5c9f_6dd7); + #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct Dataset { @@ -187,7 +196,7 @@ impl MetaDataDefinition { /// Handling of datasets provided by geo engine internally, staged and by external providers #[async_trait] pub trait DatasetDb: - DatasetStore + DatasetProvider + UploadDb + Send + Sync + DatasetStore + DatasetProvider + UploadDb + LayerCollectionProvider + Send + Sync { } diff --git a/services/src/error.rs b/services/src/error.rs index 6d41eea7a..90a9c714d 100644 --- a/services/src/error.rs +++ b/services/src/error.rs @@ -336,6 +336,10 @@ pub enum Error { LayerDb { source: crate::layers::storage::LayerDbError, }, + + UnknownOperator { + operator: String, + }, } impl actix_web::error::ResponseError for Error { diff --git a/services/src/handlers/layers.rs b/services/src/handlers/layers.rs index c3db81a70..7bd85729b 100644 --- a/services/src/handlers/layers.rs +++ b/services/src/handlers/layers.rs @@ -27,13 +27,28 @@ where async fn list_root_collections_handler( ctx: web::Data, - options: web::Query, + mut options: web::Query, ) -> Result { let mut providers = vec![]; // TODO: add dataset db as provider if options.offset == 0 && options.limit > 0 { + providers.push(CollectionItem::Collection(LayerCollectionListing { + id: ProviderLayerCollectionId { + provider: crate::datasets::storage::DATASET_DB_LAYER_PROVIDER_ID, + item: LayerCollectionId( + crate::datasets::storage::DATASET_DB_ROOT_COLLECTION_ID.to_string(), + ), + }, + name: "Datasets".to_string(), + description: "Basic Layers for all Datasets".to_string(), + })); + + options.limit -= 1; + } + + if options.offset <= 1 && options.limit > 1 { providers.push(CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { provider: crate::layers::storage::INTERNAL_PROVIDER_ID, @@ -44,6 +59,8 @@ async fn list_root_collections_handler( name: "Layers".to_string(), description: "All available Geo Engine layers".to_string(), })); + + options.limit -= 1; } let external = ctx.layer_provider_db_ref(); @@ -80,6 +97,15 @@ async fn list_collection_handler( ) -> Result { let (provider, item) = path.into_inner(); + if provider == crate::datasets::storage::DATASET_DB_LAYER_PROVIDER_ID { + let collection = ctx + .dataset_db_ref() + .collection_items(&item, options.into_inner().validated()?) + .await?; + + return Ok(web::Json(collection)); + } + if provider == crate::layers::storage::INTERNAL_PROVIDER_ID { let collection = ctx .layer_db_ref() @@ -105,6 +131,12 @@ async fn layer_handler( ) -> Result { let (provider, item) = path.into_inner(); + if provider == crate::datasets::storage::DATASET_DB_LAYER_PROVIDER_ID { + let collection = ctx.dataset_db_ref().get_layer(&item).await?; + + return Ok(web::Json(collection)); + } + if provider == crate::layers::storage::INTERNAL_PROVIDER_ID { let collection = ctx.layer_db_ref().get_layer(&item).await?; diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index f6919a86b..9c2ce2cda 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -465,7 +465,7 @@ mod tests { mock::{MockPointSource, MockPointSourceParams}, }; - use crate::util::user_input::UserInput; + use crate::{util::user_input::UserInput, workflows::workflow::Workflow}; use super::*; From 062a5d6e714de76d586d2c6de814553e2838f6cd Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Fri, 24 Jun 2022 15:50:10 +0200 Subject: [PATCH 06/21] layer collections for the pro version --- services/src/error.rs | 4 + services/src/layers/storage.rs | 43 +- services/src/pro/contexts/in_memory.rs | 44 +- services/src/pro/contexts/postgres.rs | 202 +++++--- .../src/pro/datasets/add_from_directory.rs | 23 +- services/src/pro/datasets/external/mod.rs | 2 +- services/src/pro/datasets/in_memory.rs | 191 +++++--- services/src/pro/datasets/mod.rs | 5 +- services/src/pro/datasets/postgres.rs | 288 ++++++----- services/src/pro/datasets/storage.rs | 12 +- services/src/pro/layers/postgres_layer_db.rs | 457 +++++++++++++----- services/src/pro/server.rs | 20 +- 12 files changed, 800 insertions(+), 491 deletions(-) diff --git a/services/src/error.rs b/services/src/error.rs index 90a9c714d..fa5a67bed 100644 --- a/services/src/error.rs +++ b/services/src/error.rs @@ -340,6 +340,10 @@ pub enum Error { UnknownOperator { operator: String, }, + + IdStringMustBeUuid { + found: String, + }, } impl actix_web::error::ResponseError for Error { diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index 9c2ce2cda..9cfceb573 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -84,6 +84,7 @@ pub trait LayerDb: LayerCollectionProvider + Send + Sync { // TODO: share/remove/update } +#[derive(Debug, Clone, PartialEq)] pub struct LayerProviderListing { pub id: LayerProviderId, pub name: String, @@ -415,48 +416,6 @@ impl LayerProviderDb for HashMapLayerProviderDb { } } -// #[async_trait] -// impl LayerCollectionProvider for HashMapLayerDb { -// async fn collection_items( -// &self, -// collection: LayerCollectionId, -// options: Validated, -// ) -> Result> { -// todo!() -// } - -// async fn root_collection_items( -// &self, -// _options: Validated, -// ) -> Result> { -// // TODO: use options - -// // on root level return one collection of every provider -// let backend = self.backend.read().await; - -// let result = [CollectionItem::Collection(LayerCollectionListing { -// id: INTERNAL_PROVIDER_ID.clone(), -// name: "Internal".to_string(), -// description: "Datasets managed by Geo Engine", -// })] -// .into_iter() -// .chain(backend.iter().map(|(id, provider)| { -// CollectionItem::Collection(LayerCollectionListing { -// id: id.clone(), -// name: provider.name(), -// description: provider.type_name(), -// }) -// })) -// .collect(); - -// Ok(result) -// } - -// async fn get_layer(&self, id: LayerId) -> Result { -// todo!() -// } -// } - #[cfg(test)] mod tests { use geoengine_datatypes::primitives::Coordinate2D; diff --git a/services/src/pro/contexts/in_memory.rs b/services/src/pro/contexts/in_memory.rs index f5b0d39da..8882af625 100644 --- a/services/src/pro/contexts/in_memory.rs +++ b/services/src/pro/contexts/in_memory.rs @@ -3,7 +3,7 @@ use crate::error; use crate::layers::add_from_directory::{ add_layer_collections_from_directory, add_layers_from_directory, }; -use crate::layers::storage::HashMapLayerDb; +use crate::layers::storage::{HashMapLayerDb, HashMapLayerProviderDb}; use crate::pro::contexts::{Context, ProContext}; use crate::pro::datasets::{add_datasets_from_directory, ProHashMapDatasetDb}; use crate::pro::projects::ProHashMapProjectDb; @@ -28,6 +28,7 @@ pub struct ProInMemoryContext { workflow_registry: Arc, dataset_db: Arc, layer_db: Arc, + layer_provider_db: Arc, thread_pool: Arc, exe_ctx_tiling_spec: TilingSpecification, query_ctx_chunk_size: ChunkByteSize, @@ -41,6 +42,7 @@ impl TestDefault for ProInMemoryContext { workflow_registry: Default::default(), dataset_db: Default::default(), layer_db: Default::default(), + layer_provider_db: Default::default(), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec: TestDefault::test_default(), query_ctx_chunk_size: TestDefault::test_default(), @@ -63,9 +65,11 @@ impl ProInMemoryContext { add_layer_collections_from_directory(&mut layer_db, layer_collection_defs_path).await; let mut dataset_db = ProHashMapDatasetDb::default(); - add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; - add_providers_from_directory(&mut dataset_db, provider_defs_path.clone()).await; - add_providers_from_directory(&mut dataset_db, provider_defs_path.join("pro")).await; + add_datasets_from_directory(&mut dataset_db, dataset_defs_path).await; + + let mut layer_provider_db = HashMapLayerProviderDb::default(); + add_providers_from_directory(&mut layer_provider_db, provider_defs_path.clone()).await; + add_providers_from_directory(&mut layer_provider_db, provider_defs_path.join("pro")).await; Self { user_db: Default::default(), @@ -73,6 +77,7 @@ impl ProInMemoryContext { workflow_registry: Default::default(), dataset_db: Arc::new(dataset_db), layer_db: Arc::new(layer_db), + layer_provider_db: Arc::new(layer_provider_db), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, query_ctx_chunk_size, @@ -89,6 +94,7 @@ impl ProInMemoryContext { workflow_registry: Default::default(), dataset_db: Default::default(), layer_db: Default::default(), + layer_provider_db: Default::default(), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, query_ctx_chunk_size, @@ -115,8 +121,10 @@ impl Context for ProInMemoryContext { type WorkflowRegistry = HashMapRegistry; type DatasetDB = ProHashMapDatasetDb; type LayerDB = HashMapLayerDb; + type LayerProviderDB = HashMapLayerProviderDb; type QueryContext = QueryContextImpl; - type ExecutionContext = ExecutionContextImpl; + type ExecutionContext = + ExecutionContextImpl; fn project_db(&self) -> Arc { self.project_db.clone() @@ -146,6 +154,13 @@ impl Context for ProInMemoryContext { &self.layer_db } + fn layer_provider_db(&self) -> Arc { + self.layer_provider_db.clone() + } + fn layer_provider_db_ref(&self) -> &Self::LayerProviderDB { + &self.layer_provider_db + } + fn query_context(&self) -> Result { Ok(QueryContextImpl::new( self.query_ctx_chunk_size, @@ -154,14 +169,17 @@ impl Context for ProInMemoryContext { } fn execution_context(&self, session: UserSession) -> Result { - Ok( - ExecutionContextImpl::::new( - self.dataset_db.clone(), - self.thread_pool.clone(), - session, - self.exe_ctx_tiling_spec, - ), - ) + Ok(ExecutionContextImpl::< + UserSession, + ProHashMapDatasetDb, + HashMapLayerProviderDb, + >::new( + self.dataset_db.clone(), + self.layer_provider_db.clone(), + self.thread_pool.clone(), + session, + self.exe_ctx_tiling_spec, + )) } async fn session_by_id(&self, session_id: crate::contexts::SessionId) -> Result { diff --git a/services/src/pro/contexts/postgres.rs b/services/src/pro/contexts/postgres.rs index 07f8a5de3..98eef829c 100644 --- a/services/src/pro/contexts/postgres.rs +++ b/services/src/pro/contexts/postgres.rs @@ -3,8 +3,9 @@ use crate::error::{self, Result}; use crate::layers::add_from_directory::{ add_layer_collections_from_directory, add_layers_from_directory, }; +use crate::layers::storage::INTERNAL_LAYER_DB_ROOT_COLLECTION_ID; use crate::pro::datasets::{add_datasets_from_directory, PostgresDatasetDb, Role}; -use crate::pro::layers::postgres_layer_db::PostgresLayerDb; +use crate::pro::layers::postgres_layer_db::{PostgresLayerDb, PostgresLayerProviderDb}; use crate::pro::projects::ProjectPermission; use crate::pro::users::{UserDb, UserId, UserSession}; use crate::pro::workflows::postgres_workflow_registry::PostgresWorkflowRegistry; @@ -48,6 +49,7 @@ where workflow_registry: Arc>, dataset_db: Arc>, layer_db: Arc>, + layer_provider_db: Arc>, thread_pool: Arc, exe_ctx_tiling_spec: TilingSpecification, query_ctx_chunk_size: ChunkByteSize, @@ -78,6 +80,7 @@ where workflow_registry: Arc::new(PostgresWorkflowRegistry::new(pool.clone())), dataset_db: Arc::new(PostgresDatasetDb::new(pool.clone())), layer_db: Arc::new(PostgresLayerDb::new(pool.clone())), + layer_provider_db: Arc::new(PostgresLayerProviderDb::new(pool.clone())), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, query_ctx_chunk_size, @@ -109,9 +112,13 @@ where add_layer_collections_from_directory(&mut layer_db, layer_collection_defs_path).await; let mut dataset_db = PostgresDatasetDb::new(pool.clone()); - add_datasets_from_directory(&mut dataset_db, &mut layer_db, dataset_defs_path).await; - add_providers_from_directory(&mut dataset_db, provider_defs_path.clone()).await; - add_providers_from_directory(&mut dataset_db, provider_defs_path.join("pro")).await; + + add_datasets_from_directory(&mut dataset_db, dataset_defs_path).await; + + let mut layer_provider_db = PostgresLayerProviderDb::new(pool.clone()); + + add_providers_from_directory(&mut layer_provider_db, provider_defs_path.clone()).await; + add_providers_from_directory(&mut layer_provider_db, provider_defs_path.join("pro")).await; Ok(Self { user_db: Arc::new(PostgresUserDb::new(pool.clone())), @@ -119,6 +126,7 @@ where workflow_registry: Arc::new(workflow_db), dataset_db: Arc::new(dataset_db), layer_db: Arc::new(layer_db), + layer_provider_db: Arc::new(PostgresLayerProviderDb::new(pool.clone())), thread_pool: create_rayon_thread_pool(0), exe_ctx_tiling_spec, query_ctx_chunk_size, @@ -337,15 +345,6 @@ where provenance json ); - -- TODO: should name be unique (per user)? - CREATE TABLE dataset_providers ( - id UUID PRIMARY KEY, - type_name text NOT NULL, - name text NOT NULL, - - definition json NOT NULL - ); - -- TODO: add constraint not null -- TODO: add constaint byte_size >= 0 CREATE TYPE "FileUpload" AS ( @@ -390,6 +389,18 @@ where description text NOT NULL ); + -- insert the root layer collection + INSERT INTO layer_collections ( + id, + name, + description + ) VALUES ( + '{root_layer_collection_id}', + 'Layers', + 'All available Geo Engine layers' + ); + + CREATE TABLE layers ( id UUID PRIMARY KEY, name text NOT NULL, @@ -410,6 +421,15 @@ where PRIMARY KEY (parent, child) ); + -- TODO: should name be unique (per user)? + CREATE TABLE layer_providers ( + id UUID PRIMARY KEY, + type_name text NOT NULL, + name text NOT NULL, + + definition json NOT NULL + ); + -- TODO: uploads, providers permissions -- TODO: relationship between uploads and datasets? @@ -417,7 +437,8 @@ where , system_role_id = Role::system_role_id(), user_role_id = Role::user_role_id(), - anonymous_role_id = Role::anonymous_role_id())) + anonymous_role_id = Role::anonymous_role_id(), + root_layer_collection_id = INTERNAL_LAYER_DB_ROOT_COLLECTION_ID)) .await?; debug!("Updated user database to schema version {}", version + 1); } @@ -493,8 +514,10 @@ where type WorkflowRegistry = PostgresWorkflowRegistry; type DatasetDB = PostgresDatasetDb; type LayerDB = PostgresLayerDb; + type LayerProviderDB = PostgresLayerProviderDb; type QueryContext = QueryContextImpl; - type ExecutionContext = ExecutionContextImpl>; + type ExecutionContext = + ExecutionContextImpl, PostgresLayerProviderDb>; fn project_db(&self) -> Arc { self.project_db.clone() @@ -524,6 +547,13 @@ where &self.layer_db } + fn layer_provider_db(&self) -> Arc { + self.layer_provider_db.clone() + } + fn layer_provider_db_ref(&self) -> &Self::LayerProviderDB { + &self.layer_provider_db + } + fn query_context(&self) -> Result { // TODO: load config only once Ok(QueryContextImpl::new( @@ -533,14 +563,17 @@ where } fn execution_context(&self, session: UserSession) -> Result { - Ok( - ExecutionContextImpl::>::new( - self.dataset_db.clone(), - self.thread_pool.clone(), - session, - self.exe_ctx_tiling_spec, - ), - ) + Ok(ExecutionContextImpl::< + UserSession, + PostgresDatasetDb, + PostgresLayerProviderDb, + >::new( + self.dataset_db.clone(), + self.layer_provider_db.clone(), + self.thread_pool.clone(), + session, + self.exe_ctx_tiling_spec, + )) } async fn session_by_id(&self, session_id: crate::contexts::SessionId) -> Result { @@ -557,22 +590,24 @@ mod tests { use std::str::FromStr; use super::*; - use crate::datasets::external::mock::MockExternalDataProviderDefinition; + use crate::datasets::external::mock::MockExternalLayerProviderDefinition; use crate::datasets::listing::SessionMetaDataProvider; use crate::datasets::listing::{DatasetListOptions, DatasetListing, ProvenanceOutput}; use crate::datasets::listing::{DatasetProvider, Provenance}; use crate::datasets::storage::{ - AddDataset, DatasetDefinition, DatasetProviderDb, DatasetProviderListOptions, - DatasetProviderListing, DatasetStore, MetaDataDefinition, + AddDataset, DatasetDefinition, DatasetStore, MetaDataDefinition, }; use crate::datasets::upload::{FileId, UploadId}; use crate::datasets::upload::{FileUpload, Upload, UploadDb}; use crate::layers::layer::{ AddLayer, AddLayerCollection, CollectionItem, LayerCollectionListOptions, - LayerCollectionListing, LayerListing, + LayerCollectionListing, LayerListing, ProviderLayerCollectionId, ProviderLayerId, }; use crate::layers::listing::LayerCollectionProvider; - use crate::layers::storage::LayerDb; + use crate::layers::storage::{ + LayerDb, LayerProviderDb, LayerProviderListing, LayerProviderListingOptions, + INTERNAL_PROVIDER_ID, + }; use crate::pro::datasets::{DatasetPermission, Permission, UpdateDatasetPermissions}; use crate::pro::projects::{LoadVersion, ProProjectDb, UserProjectPermission}; use crate::pro::users::{UserCredentials, UserDb, UserRegistration}; @@ -589,7 +624,7 @@ mod tests { use futures::Future; use geoengine_datatypes::collections::VectorDataType; use geoengine_datatypes::dataset::{ - DatasetId, DatasetProviderId, ExternalDatasetId, InternalDatasetId, + DatasetId, ExternalDatasetId, InternalDatasetId, LayerProviderId, }; use geoengine_datatypes::primitives::{ BoundingBox2D, Coordinate2D, FeatureDataType, SpatialResolution, TimeInterval, @@ -1193,14 +1228,12 @@ mod tests { #[allow(clippy::too_many_lines)] #[tokio::test(flavor = "multi_thread", worker_threads = 1)] - async fn it_persists_dataset_providers() { + async fn it_persists_layer_providers() { with_temp_context(|ctx, _| async move { - let db = ctx.dataset_db_ref(); - - let session = ctx.user_db_ref().anonymous().await.unwrap(); + let db = ctx.layer_provider_db_ref(); let provider_id = - DatasetProviderId::from_str("7b20c8d7-d754-4f8f-ad44-dddd25df22d2").unwrap(); + LayerProviderId::from_str("7b20c8d7-d754-4f8f-ad44-dddd25df22d2").unwrap(); let loading_info = OgrSourceDataset { file_name: PathBuf::from("test.csv"), @@ -1248,7 +1281,7 @@ mod tests { phantom: Default::default(), }); - let provider = MockExternalDataProviderDefinition { + let provider = MockExternalLayerProviderDefinition { id: provider_id, datasets: vec![DatasetDefinition { properties: AddDataset { @@ -1266,14 +1299,11 @@ mod tests { }], }; - db.add_dataset_provider(&session, Box::new(provider)) - .await - .unwrap(); + db.add_layer_provider(Box::new(provider)).await.unwrap(); let providers = db - .list_dataset_providers( - &session, - DatasetProviderListOptions { + .list_layer_providers( + LayerProviderListingOptions { offset: 0, limit: 10, } @@ -1287,20 +1317,19 @@ mod tests { assert_eq!( providers[0], - DatasetProviderListing { + LayerProviderListing { id: provider_id, - type_name: "MockType".to_owned(), name: "MockName".to_owned(), + description: "MockType".to_owned(), } ); - let provider = db.dataset_provider(&session, provider_id).await.unwrap(); + let provider = db.layer_provider(provider_id).await.unwrap(); let datasets = provider - .list( - DatasetListOptions { - filter: None, - order: crate::datasets::listing::OrderBy::NameAsc, + .collection_items( + &provider.root_collection_id().await.unwrap(), + LayerCollectionListOptions { offset: 0, limit: 10, } @@ -1743,6 +1772,8 @@ mod tests { ), }; + let root_collection_id = layer_db.root_collection_id().await.unwrap(); + let layer1 = layer_db .add_layer( AddLayer { @@ -1753,14 +1784,18 @@ mod tests { } .validated() .unwrap(), + &root_collection_id, ) .await .unwrap(); assert_eq!( - layer_db.get_layer(layer1).await.unwrap(), + layer_db.get_layer(&layer1).await.unwrap(), crate::layers::layer::Layer { - id: layer1, + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: layer1.clone(), + }, name: "Layer1".to_string(), description: "Layer 1".to_string(), symbology: None, @@ -1768,28 +1803,30 @@ mod tests { } ); - let layer2 = layer_db - .add_layer( - AddLayer { - name: "Layer2".to_string(), - description: "Layer 2".to_string(), - symbology: None, - workflow: workflow.clone(), + let collection1 = layer_db + .add_collection( + AddLayerCollection { + name: "Collection1".to_string(), + description: "Collection 1".to_string(), } .validated() .unwrap(), + &root_collection_id, ) .await .unwrap(); - let collection1 = layer_db - .add_collection( - AddLayerCollection { - name: "Collection1".to_string(), - description: "Collection 1".to_string(), + let layer2 = layer_db + .add_layer( + AddLayer { + name: "Layer2".to_string(), + description: "Layer 2".to_string(), + symbology: None, + workflow: workflow.clone(), } .validated() .unwrap(), + &collection1, ) .await .unwrap(); @@ -1802,22 +1839,19 @@ mod tests { } .validated() .unwrap(), + &collection1, ) .await .unwrap(); layer_db - .add_layer_to_collection(layer1, collection1) - .await - .unwrap(); - - layer_db - .add_collection_to_parent(collection2, collection1) + .add_collection_to_parent(&collection2, &collection1) .await .unwrap(); let root_list = layer_db - .root_collection_items( + .collection_items( + &root_collection_id, LayerCollectionListOptions { offset: 0, limit: 20, @@ -1832,21 +1866,27 @@ mod tests { root_list, vec![ CollectionItem::Collection(LayerCollectionListing { - id: collection1, + id: ProviderLayerCollectionId { + provider: INTERNAL_PROVIDER_ID, + item: collection1.clone(), + }, name: "Collection1".to_string(), description: "Collection 1".to_string(), }), CollectionItem::Layer(LayerListing { - id: layer2, - name: "Layer2".to_string(), - description: "Layer 2".to_string(), + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: layer1, + }, + name: "Layer1".to_string(), + description: "Layer 1".to_string(), }) ] ); let collection1_list = layer_db .collection_items( - collection1, + &collection1, LayerCollectionListOptions { offset: 0, limit: 20, @@ -1861,14 +1901,20 @@ mod tests { collection1_list, vec![ CollectionItem::Collection(LayerCollectionListing { - id: collection2, + id: ProviderLayerCollectionId { + provider: INTERNAL_PROVIDER_ID, + item: collection2, + }, name: "Collection2".to_string(), description: "Collection 2".to_string(), }), CollectionItem::Layer(LayerListing { - id: layer1, - name: "Layer1".to_string(), - description: "Layer 1".to_string(), + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: layer2, + }, + name: "Layer2".to_string(), + description: "Layer 2".to_string(), }) ] ); diff --git a/services/src/pro/datasets/add_from_directory.rs b/services/src/pro/datasets/add_from_directory.rs index 4ba9318f0..06cbcd239 100644 --- a/services/src/pro/datasets/add_from_directory.rs +++ b/services/src/pro/datasets/add_from_directory.rs @@ -4,11 +4,7 @@ use std::{ path::PathBuf, }; -use crate::{ - datasets::add_from_directory::{add_dataset_as_layer, add_dataset_layer_collection}, - error::Result, - layers::storage::LayerDb, -}; +use crate::error::Result; use crate::{ datasets::storage::DatasetDb, pro::datasets::{DatasetPermission, Permission, Role}, @@ -21,20 +17,14 @@ use log::warn; use super::storage::UpdateDatasetPermissions; -pub async fn add_datasets_from_directory< - D: DatasetDb + UpdateDatasetPermissions, - L: LayerDb, ->( +pub async fn add_datasets_from_directory + UpdateDatasetPermissions>( dataset_db: &mut D, - layer_db: &mut L, file_path: PathBuf, ) { async fn add_dataset_definition_from_dir_entry< D: DatasetDb + UpdateDatasetPermissions, - L: LayerDb, >( dataset_db: &mut D, - layer_db: &mut L, entry: &DirEntry, system_session: &UserSession, ) -> Result<()> { @@ -71,8 +61,6 @@ pub async fn add_datasets_from_directory< ) .await?; - add_dataset_as_layer(def, dataset_id, layer_db).await?; - Ok(()) } @@ -85,15 +73,10 @@ pub async fn add_datasets_from_directory< } let dir = dir.expect("checked"); - add_dataset_layer_collection(layer_db) - .await - .expect("Adding dataset layer collection must work"); - for entry in dir { if let Ok(entry) = entry { if let Err(e) = - add_dataset_definition_from_dir_entry(dataset_db, layer_db, &entry, &system_session) - .await + add_dataset_definition_from_dir_entry(dataset_db, &entry, &system_session).await { warn!( "Skipped adding dataset from directory entry: {:?} error: {}", diff --git a/services/src/pro/datasets/external/mod.rs b/services/src/pro/datasets/external/mod.rs index 6d6c793a9..84543502c 100644 --- a/services/src/pro/datasets/external/mod.rs +++ b/services/src/pro/datasets/external/mod.rs @@ -1 +1 @@ -pub mod sentinel_s2_l2a_cogs; +// pub mod sentinel_s2_l2a_cogs; diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 677109b47..58a1e72b7 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -1,36 +1,43 @@ -use crate::contexts::{Db, MockableSession}; +use crate::contexts::Db; use crate::datasets::listing::SessionMetaDataProvider; use crate::datasets::listing::{ - DatasetListOptions, DatasetListing, DatasetProvider, ExternalDatasetProvider, OrderBy, - ProvenanceOutput, + DatasetListOptions, DatasetListing, DatasetProvider, OrderBy, ProvenanceOutput, }; use crate::datasets::storage::{ - AddDataset, Dataset, DatasetDb, DatasetProviderDb, DatasetProviderListOptions, - DatasetProviderListing, DatasetStore, DatasetStorer, ExternalDatasetProviderDefinition, - MetaDataDefinition, + AddDataset, Dataset, DatasetDb, DatasetStore, DatasetStorer, MetaDataDefinition, + DATASET_DB_LAYER_PROVIDER_ID, DATASET_DB_ROOT_COLLECTION_ID, }; use crate::datasets::upload::{Upload, UploadDb, UploadId}; use crate::error; use crate::error::Result; +use crate::layers::layer::{ + CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, +}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::pro::datasets::Permission; use crate::pro::users::{UserId, UserSession}; use crate::util::user_input::Validated; +use crate::workflows::workflow::Workflow; use async_trait::async_trait; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_datatypes::{ - dataset::{DatasetId, DatasetProviderId, InternalDatasetId}, + dataset::{DatasetId, InternalDatasetId}, util::Identifier, }; use geoengine_operators::engine::{ - MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, + MetaData, RasterOperator, RasterResultDescriptor, StaticMetaData, TypedOperator, + TypedResultDescriptor, VectorOperator, VectorResultDescriptor, }; +use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; use geoengine_operators::source::{ - GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, OgrSourceDataset, + GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, GdalSource, + GdalSourceParameters, OgrSource, OgrSourceDataset, OgrSourceParameters, }; use geoengine_operators::{mock::MockDatasetDataSourceLoadingInfo, source::GdalMetaDataStatic}; use log::{info, warn}; use snafu::ensure; use std::collections::HashMap; +use std::str::FromStr; use super::storage::UpdateDatasetPermissions; use super::DatasetPermission; @@ -56,7 +63,6 @@ pub struct ProHashMapDatasetDbBackend { Box>, >, uploads: HashMap>, - external_providers: HashMap>, } #[derive(Default)] @@ -66,62 +72,6 @@ pub struct ProHashMapDatasetDb { impl DatasetDb for ProHashMapDatasetDb {} -#[async_trait] -impl DatasetProviderDb for ProHashMapDatasetDb { - async fn add_dataset_provider( - &self, - _session: &UserSession, - provider: Box, - ) -> Result { - // TODO: authorization - let id = provider.id(); - self.backend - .write() - .await - .external_providers - .insert(id, provider); - Ok(id) - } - - async fn list_dataset_providers( - &self, - _session: &UserSession, - _options: Validated, - ) -> Result> { - // TODO: authorization - // TODO: use options - Ok(self - .backend - .read() - .await - .external_providers - .iter() - .map(|(id, d)| DatasetProviderListing { - id: *id, - type_name: d.type_name(), - name: d.name(), - }) - .collect()) - } - - async fn dataset_provider( - &self, - _session: &UserSession, - provider: DatasetProviderId, - ) -> Result> { - // TODO: authorization - self.backend - .read() - .await - .external_providers - .get(&provider) - .cloned() - .ok_or(error::Error::UnknownProviderId)? - .initialize() - .await - } -} - #[async_trait] pub trait ProHashMapStorable: Send + Sync { async fn store(&self, id: InternalDatasetId, db: &ProHashMapDatasetDb) @@ -394,11 +344,8 @@ impl DatasetProvider for ProHashMapDatasetDb { }) .ok_or(error::Error::UnknownDatasetId) } - DatasetId::External(id) => { - self.dataset_provider(&UserSession::mock(), id.provider_id) - .await? - .provenance(dataset) - .await + DatasetId::External(_id) => { + todo!() // throw error } } } @@ -591,6 +538,108 @@ impl UploadDb for ProHashMapDatasetDb { } } +#[async_trait] +impl LayerCollectionProvider for ProHashMapDatasetDb { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + options: Validated, + ) -> Result> { + // TODO: check collection id + + let options = options.user_input; + + let backend = self.backend.read().await; + + let listing = backend + .datasets + .iter() + .skip(options.offset as usize) + .take(options.limit as usize) + .map(|(_id, d)| { + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: DATASET_DB_LAYER_PROVIDER_ID, + // use the dataset id also as layer id, TODO: maybe prefix it? + item: LayerId( + d.id.internal() + .expect("Dataset DB contains only internal datasets") + .to_string(), + ), + }, + name: d.name.clone(), + description: d.description.clone(), + }) + }) + .collect(); + + Ok(listing) + } + + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId(DATASET_DB_ROOT_COLLECTION_ID.to_string())) + } + + async fn get_layer(&self, id: &LayerId) -> Result { + let dataset_id = DatasetId::Internal { + dataset_id: InternalDatasetId::from_str(&id.0)?, + }; + + let backend = self.backend.read().await; + + let (_id, dataset) = backend + .datasets + .iter() + .find(|(_id, d)| d.id == dataset_id) + .ok_or(error::Error::UnknownDatasetId)?; + + let operator = match dataset.source_operator.as_str() { + "OgrSource" => TypedOperator::Vector( + OgrSource { + params: OgrSourceParameters { + dataset: dataset.id.clone(), + attribute_projection: None, + attribute_filters: None, + }, + } + .boxed(), + ), + "GdalSource" => TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: dataset.id.clone(), + }, + } + .boxed(), + ), + "MockDatasetDataSource" => TypedOperator::Vector( + MockDatasetDataSource { + params: MockDatasetDataSourceParams { + dataset: dataset.id.clone(), + }, + } + .boxed(), + ), + s => { + return Err(crate::error::Error::UnknownOperator { + operator: s.to_owned(), + }) + } + }; + + Ok(Layer { + id: ProviderLayerId { + provider: DATASET_DB_LAYER_PROVIDER_ID, + item: id.clone(), + }, + name: dataset.name.clone(), + description: dataset.description.clone(), + workflow: Workflow { operator }, + symbology: dataset.symbology.clone(), + }) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/services/src/pro/datasets/mod.rs b/services/src/pro/datasets/mod.rs index 4fb464db3..5e1da91aa 100644 --- a/services/src/pro/datasets/mod.rs +++ b/services/src/pro/datasets/mod.rs @@ -8,7 +8,4 @@ mod storage; pub use add_from_directory::add_datasets_from_directory; pub use in_memory::{ProHashMapDatasetDb, ProHashMapStorable}; pub use postgres::PostgresDatasetDb; -pub use storage::{ - DatasetPermission, DatasetProviderPermission, Permission, Role, RoleId, - UpdateDatasetPermissions, -}; +pub use storage::{DatasetPermission, Permission, Role, RoleId, UpdateDatasetPermissions}; diff --git a/services/src/pro/datasets/postgres.rs b/services/src/pro/datasets/postgres.rs index 27b3078c7..14bb85df0 100644 --- a/services/src/pro/datasets/postgres.rs +++ b/services/src/pro/datasets/postgres.rs @@ -1,20 +1,30 @@ +use std::str::FromStr; + use crate::datasets::listing::ProvenanceOutput; use crate::datasets::listing::SessionMetaDataProvider; +use crate::datasets::storage::DATASET_DB_LAYER_PROVIDER_ID; +use crate::datasets::storage::DATASET_DB_ROOT_COLLECTION_ID; use crate::datasets::storage::{ - AddDataset, Dataset, DatasetDb, DatasetProviderDb, DatasetProviderListOptions, - DatasetProviderListing, DatasetStore, DatasetStorer, ExternalDatasetProviderDefinition, - MetaDataDefinition, + AddDataset, Dataset, DatasetDb, DatasetStore, DatasetStorer, MetaDataDefinition, }; use crate::datasets::upload::FileId; use crate::datasets::upload::{Upload, UploadDb, UploadId}; use crate::error::{self, Error, Result}; +use crate::layers::layer::CollectionItem; +use crate::layers::layer::Layer; +use crate::layers::layer::LayerCollectionListOptions; +use crate::layers::layer::LayerListing; +use crate::layers::layer::ProviderLayerId; +use crate::layers::listing::LayerCollectionId; +use crate::layers::listing::LayerCollectionProvider; +use crate::layers::listing::LayerId; use crate::pro::datasets::storage::UpdateDatasetPermissions; use crate::pro::datasets::RoleId; +use crate::projects::Symbology; use crate::util::user_input::Validated; +use crate::workflows::workflow::Workflow; use crate::{ - datasets::listing::{ - DatasetListOptions, DatasetListing, DatasetProvider, ExternalDatasetProvider, - }, + datasets::listing::{DatasetListOptions, DatasetListing, DatasetProvider}, pro::users::UserSession, }; use async_trait::async_trait; @@ -22,18 +32,28 @@ use bb8_postgres::bb8::Pool; use bb8_postgres::tokio_postgres::tls::{MakeTlsConnect, TlsConnect}; use bb8_postgres::tokio_postgres::Socket; use bb8_postgres::PostgresConnectionManager; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, InternalDatasetId}; +use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; use geoengine_datatypes::primitives::RasterQueryRectangle; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_datatypes::util::Identifier; +use geoengine_operators::engine::RasterOperator; +use geoengine_operators::engine::TypedOperator; +use geoengine_operators::engine::VectorOperator; use geoengine_operators::engine::{ MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, }; +use geoengine_operators::mock::MockDatasetDataSource; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; +use geoengine_operators::mock::MockDatasetDataSourceParams; +use geoengine_operators::source::GdalSource; +use geoengine_operators::source::GdalSourceParameters; +use geoengine_operators::source::OgrSource; +use geoengine_operators::source::OgrSourceParameters; use geoengine_operators::source::{GdalLoadingInfo, OgrSourceDataset}; use log::info; use postgres_types::{FromSql, ToSql}; use snafu::{ensure, ResultExt}; +use uuid::Uuid; use super::{DatasetPermission, Permission}; @@ -68,111 +88,6 @@ where { } -#[async_trait] -impl DatasetProviderDb for PostgresDatasetDb -where - Tls: MakeTlsConnect + Clone + Send + Sync + 'static, - >::Stream: Send + Sync, - >::TlsConnect: Send, - <>::TlsConnect as TlsConnect>::Future: Send, -{ - async fn add_dataset_provider( - &self, - _session: &UserSession, - provider: Box, - ) -> Result { - // TODO: permissions - let conn = self.conn_pool.get().await?; - - let stmt = conn - .prepare( - " - INSERT INTO dataset_providers ( - id, - type_name, - name, - definition - ) - VALUES ($1, $2, $3, $4)", - ) - .await?; - - let id = provider.id(); - conn.execute( - &stmt, - &[ - &id, - &provider.type_name(), - &provider.name(), - &serde_json::to_value(provider)?, - ], - ) - .await?; - Ok(id) - } - - async fn list_dataset_providers( - &self, - _session: &UserSession, - _options: Validated, - ) -> Result> { - // TODO: options - // TODO: permission - let conn = self.conn_pool.get().await?; - - let stmt = conn - .prepare( - " - SELECT - id, - type_name, - name - FROM - dataset_providers", - ) - .await?; - - let rows = conn.query(&stmt, &[]).await?; - - Ok(rows - .iter() - .map(|row| DatasetProviderListing { - id: row.get(0), - type_name: row.get(1), - name: row.get(2), - }) - .collect()) - } - - async fn dataset_provider( - &self, - _session: &UserSession, - provider: DatasetProviderId, - ) -> Result> { - // TODO: permissions - let conn = self.conn_pool.get().await?; - - let stmt = conn - .prepare( - " - SELECT - definition - FROM - dataset_providers - WHERE - id = $1", - ) - .await?; - - let row = conn.query_one(&stmt, &[&provider]).await?; - - let definition = - serde_json::from_value::>(row.get(0))?; - - definition.initialize().await - } -} - #[async_trait] impl DatasetProvider for PostgresDatasetDb where @@ -756,6 +671,155 @@ where } } +#[async_trait] +impl LayerCollectionProvider for PostgresDatasetDb +where + Tls: MakeTlsConnect + Clone + Send + Sync + 'static, + >::Stream: Send + Sync, + >::TlsConnect: Send, + <>::TlsConnect as TlsConnect>::Future: Send, +{ + async fn collection_items( + &self, + _collection: &LayerCollectionId, + options: Validated, + ) -> Result> { + // TODO: check collection id + + let conn = self.conn_pool.get().await?; + + let options = options.user_input; + + // TODO: only list datasets that are accessible to the user as layer + // for now they are listed, but cannot be accessed + let stmt = conn + .prepare( + " + SELECT + concat(d.id, ''), + d.name, + d.description + FROM + datasets d + ORDER BY d.name ASC + LIMIT $1 + OFFSET $2;", + ) + .await?; + + let rows = conn + .query( + &stmt, + &[&i64::from(options.limit), &i64::from(options.offset)], + ) + .await?; + + Ok(rows + .iter() + .map(|row| { + Result::::Ok(CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: DATASET_DB_LAYER_PROVIDER_ID, + item: LayerId(row.get(0)), + }, + name: row.get(1), + description: row.get(2), + })) + }) + .filter_map(Result::ok) + .collect()) + } + + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId(DATASET_DB_ROOT_COLLECTION_ID.to_string())) + } + + async fn get_layer(&self, id: &LayerId) -> Result { + let dataset_id = DatasetId::Internal { + dataset_id: InternalDatasetId::from_str(&id.0)?, + }; + + let conn = self.conn_pool.get().await?; + + // TODO: check permission to dataset + // for now they dataset is returned, but cannot be accessed + let stmt = conn + .prepare( + " + SELECT + d.name, + d.description, + d.source_operator, + d.symbology + FROM + datasets d + WHERE id = $1;", + ) + .await?; + + let row = conn + .query_one( + &stmt, + &[ + &Uuid::from_str(&id.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: id.0.clone(), + })?, + ], + ) + .await?; + + let name: String = row.get(0); + let description: String = row.get(1); + let source_operator: String = row.get(2); + let symbology: Option = serde_json::from_value(row.get(3))?; + + let operator = match source_operator.as_str() { + "OgrSource" => TypedOperator::Vector( + OgrSource { + params: OgrSourceParameters { + dataset: dataset_id.clone(), + attribute_projection: None, + attribute_filters: None, + }, + } + .boxed(), + ), + "GdalSource" => TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: dataset_id.clone(), + }, + } + .boxed(), + ), + "MockDatasetDataSource" => TypedOperator::Vector( + MockDatasetDataSource { + params: MockDatasetDataSourceParams { + dataset: dataset_id.clone(), + }, + } + .boxed(), + ), + s => { + return Err(crate::error::Error::UnknownOperator { + operator: s.to_owned(), + }) + } + }; + + Ok(Layer { + id: ProviderLayerId { + provider: DATASET_DB_LAYER_PROVIDER_ID, + item: id.clone(), + }, + name, + description, + workflow: Workflow { operator }, + symbology, + }) + } +} + #[derive(Debug, Clone, ToSql, FromSql)] pub struct FileUpload { pub id: FileId, diff --git a/services/src/pro/datasets/storage.rs b/services/src/pro/datasets/storage.rs index 3521e24e1..8e384f88a 100644 --- a/services/src/pro/datasets/storage.rs +++ b/services/src/pro/datasets/storage.rs @@ -3,10 +3,7 @@ use std::str::FromStr; use crate::error::Result; use crate::pro::users::{UserId, UserSession}; use async_trait::async_trait; -use geoengine_datatypes::{ - dataset::{DatasetId, DatasetProviderId}, - identifier, -}; +use geoengine_datatypes::{dataset::DatasetId, identifier}; #[cfg(feature = "postgres")] use postgres_types::{FromSql, ToSql}; use serde::{Deserialize, Serialize}; @@ -54,13 +51,6 @@ pub struct DatasetPermission { pub permission: Permission, } -#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Hash)] -pub struct DatasetProviderPermission { - pub role: RoleId, - pub external_provider: DatasetProviderId, - pub permission: Permission, -} - #[async_trait] pub trait UpdateDatasetPermissions { async fn add_dataset_permission( diff --git a/services/src/pro/layers/postgres_layer_db.rs b/services/src/pro/layers/postgres_layer_db.rs index ac49190b3..ea1a44e5d 100644 --- a/services/src/pro/layers/postgres_layer_db.rs +++ b/services/src/pro/layers/postgres_layer_db.rs @@ -1,3 +1,5 @@ +use std::str::FromStr; + use async_trait::async_trait; use bb8_postgres::{ bb8::Pool, @@ -7,20 +9,26 @@ use bb8_postgres::{ }, PostgresConnectionManager, }; +use geoengine_datatypes::dataset::LayerProviderId; use snafu::ResultExt; +use uuid::Uuid; use crate::{ error::{self, Result}, layers::{ + external::{ExternalLayerProvider, ExternalLayerProviderDefinition}, layer::{ - AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionId, - LayerCollectionListOptions, LayerCollectionListing, LayerId, LayerListing, + AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionListOptions, + LayerCollectionListing, LayerListing, ProviderLayerCollectionId, ProviderLayerId, + }, + listing::{LayerCollectionId, LayerCollectionProvider, LayerId}, + storage::{ + LayerDb, LayerDbError, LayerProviderDb, LayerProviderListing, + LayerProviderListingOptions, INTERNAL_LAYER_DB_ROOT_COLLECTION_ID, + INTERNAL_PROVIDER_ID, }, - listing::LayerCollectionProvider, - storage::{LayerDb, LayerDbError}, }, util::user_input::Validated, - workflows::workflow::Workflow, }; pub struct PostgresLayerDb @@ -53,15 +61,26 @@ where >::TlsConnect: Send, <>::TlsConnect as TlsConnect>::Future: Send, { - async fn add_layer(&self, layer: Validated) -> Result { - let conn = self.conn_pool.get().await?; + async fn add_layer( + &self, + layer: Validated, + collection: &LayerCollectionId, + ) -> Result { + let collection_id = + Uuid::from_str(&collection.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + + let mut conn = self.conn_pool.get().await?; let layer = layer.user_input; - let id = LayerId::new(); + let layer_id = Uuid::new_v4(); let symbology = serde_json::to_value(&layer.symbology).context(error::SerdeJson)?; - let stmt = conn + let trans = conn.build_transaction().start().await?; + + let stmt = trans .prepare( " INSERT INTO layers (id, name, description, workflow, symbology) @@ -69,28 +88,58 @@ where ) .await?; - conn.execute( - &stmt, - &[ - &id, - &layer.name, - &layer.description, - &serde_json::to_value(&layer.workflow).context(error::SerdeJson)?, - &symbology, - ], - ) - .await?; + trans + .execute( + &stmt, + &[ + &layer_id, + &layer.name, + &layer.description, + &serde_json::to_value(&layer.workflow).context(error::SerdeJson)?, + &symbology, + ], + ) + .await?; - Ok(id) + let stmt = trans + .prepare( + " + INSERT INTO collection_layers (collection, layer) + VALUES ($1, $2) ON CONFLICT DO NOTHING;", + ) + .await?; + + trans.execute(&stmt, &[&collection_id, &layer_id]).await?; + + trans.commit().await?; + + Ok(LayerId(layer_id.to_string())) } - async fn add_layer_with_id(&self, id: LayerId, layer: Validated) -> Result<()> { - let conn = self.conn_pool.get().await?; + + async fn add_layer_with_id( + &self, + id: &LayerId, + layer: Validated, + collection: &LayerCollectionId, + ) -> Result<()> { + let layer_id = Uuid::from_str(&id.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + + let collection_id = + Uuid::from_str(&collection.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + + let mut conn = self.conn_pool.get().await?; let layer = layer.user_input; let symbology = serde_json::to_value(&layer.symbology).context(error::SerdeJson)?; - let stmt = conn + let trans = conn.build_transaction().start().await?; + + let stmt = trans .prepare( " INSERT INTO layers (id, name, description, workflow, symbology) @@ -98,56 +147,48 @@ where ) .await?; - conn.execute( - &stmt, - &[ - &id, - &layer.name, - &layer.description, - &serde_json::to_value(&layer.workflow).context(error::SerdeJson)?, - &symbology, - ], - ) - .await?; - - Ok(()) - } - - async fn get_layer(&self, id: LayerId) -> Result { - let conn = self.conn_pool.get().await?; + trans + .execute( + &stmt, + &[ + &layer_id, + &layer.name, + &layer.description, + &serde_json::to_value(&layer.workflow).context(error::SerdeJson)?, + &symbology, + ], + ) + .await?; - let stmt = conn + let stmt = trans .prepare( " - SELECT - name, - description, - workflow, - symbology - FROM layers l - WHERE l.id = $1;", + INSERT INTO collection_layers (collection, layer) + VALUES ($1, $2) ON CONFLICT DO NOTHING;", ) .await?; - let row = conn - .query_one(&stmt, &[&id]) - .await - .map_err(|_error| LayerDbError::NoLayerForGivenId { id })?; + trans.execute(&stmt, &[&collection_id, &layer_id]).await?; - Ok(Layer { - id, - name: row.get(0), - description: row.get(1), - workflow: serde_json::from_value(row.get(2)).context(error::SerdeJson)?, - symbology: serde_json::from_value(row.get(3)).context(error::SerdeJson)?, - }) + trans.commit().await?; + + Ok(()) } async fn add_layer_to_collection( &self, - layer: LayerId, - collection: LayerCollectionId, + layer: &LayerId, + collection: &LayerCollectionId, ) -> Result<()> { + let layer_id = Uuid::from_str(&layer.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + + let collection_id = + Uuid::from_str(&collection.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + let conn = self.conn_pool.get().await?; let stmt = conn @@ -158,7 +199,7 @@ where ) .await?; - conn.execute(&stmt, &[&collection, &layer]).await?; + conn.execute(&stmt, &[&collection_id, &layer_id]).await?; Ok(()) } @@ -166,14 +207,21 @@ where async fn add_collection( &self, collection: Validated, + parent: &LayerCollectionId, ) -> Result { - let conn = self.conn_pool.get().await?; + let parent = Uuid::from_str(&parent.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: parent.0.clone(), + })?; + + let mut conn = self.conn_pool.get().await?; let collection = collection.user_input; - let id = LayerCollectionId::new(); + let collection_id = Uuid::new_v4(); - let stmt = conn + let trans = conn.build_transaction().start().await?; + + let stmt = trans .prepare( " INSERT INTO layer_collections (id, name, description) @@ -181,22 +229,50 @@ where ) .await?; - conn.execute(&stmt, &[&id, &collection.name, &collection.description]) + trans + .execute( + &stmt, + &[&collection_id, &collection.name, &collection.description], + ) .await?; - Ok(id) + let stmt = trans + .prepare( + " + INSERT INTO collection_children (parent, child) + VALUES ($1, $2) ON CONFLICT DO NOTHING;", + ) + .await?; + + trans.execute(&stmt, &[&parent, &collection_id]).await?; + + trans.commit().await?; + + Ok(LayerCollectionId(collection_id.to_string())) } async fn add_collection_with_id( &self, - id: LayerCollectionId, + id: &LayerCollectionId, collection: Validated, + parent: &LayerCollectionId, ) -> Result<()> { - let conn = self.conn_pool.get().await?; + let collection_id = + Uuid::from_str(&id.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: id.0.clone(), + })?; + + let parent = Uuid::from_str(&parent.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: parent.0.clone(), + })?; + + let mut conn = self.conn_pool.get().await?; let collection = collection.user_input; - let stmt = conn + let trans = conn.build_transaction().start().await?; + + let stmt = trans .prepare( " INSERT INTO layer_collections (id, name, description) @@ -204,17 +280,42 @@ where ) .await?; - conn.execute(&stmt, &[&id, &collection.name, &collection.description]) + trans + .execute( + &stmt, + &[&collection_id, &collection.name, &collection.description], + ) + .await?; + + let stmt = trans + .prepare( + " + INSERT INTO collection_children (parent, child) + VALUES ($1, $2) ON CONFLICT DO NOTHING;", + ) .await?; + trans.execute(&stmt, &[&parent, &collection_id]).await?; + + trans.commit().await?; + Ok(()) } async fn add_collection_to_parent( &self, - collection: LayerCollectionId, - parent: LayerCollectionId, + collection: &LayerCollectionId, + parent: &LayerCollectionId, ) -> Result<()> { + let collection = + Uuid::from_str(&collection.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + + let parent = Uuid::from_str(&parent.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: parent.0.clone(), + })?; + let conn = self.conn_pool.get().await?; let stmt = conn @@ -241,9 +342,14 @@ where { async fn collection_items( &self, - collection: LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result> { + let collection = + Uuid::from_str(&collection.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: collection.0.clone(), + })?; + let conn = self.conn_pool.get().await?; let options = options.user_input; @@ -254,7 +360,7 @@ where SELECT id, name, description, is_layer FROM ( SELECT - id, + concat(id, '') AS id, name, description, FALSE AS is_layer @@ -262,7 +368,7 @@ where WHERE cc.parent = $1 ) u UNION ( SELECT - id, + concat(id, '') AS id, name, description, TRUE As is_layer @@ -294,13 +400,19 @@ where if is_layer { CollectionItem::Layer(LayerListing { - id: row.get(0), + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: LayerId(row.get(0)), + }, name: row.get(1), description: row.get(2), }) } else { CollectionItem::Collection(LayerCollectionListing { - id: row.get(0), + id: ProviderLayerCollectionId { + provider: INTERNAL_PROVIDER_ID, + item: LayerCollectionId(row.get(0)), + }, name: row.get(1), description: row.get(2), }) @@ -309,10 +421,119 @@ where .collect()) } - async fn root_collection_items( + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId( + INTERNAL_LAYER_DB_ROOT_COLLECTION_ID.to_string(), + )) + } + + async fn get_layer(&self, id: &LayerId) -> Result { + let layer_id = Uuid::from_str(&id.0).map_err(|_| error::Error::IdStringMustBeUuid { + found: id.0.clone(), + })?; + + let conn = self.conn_pool.get().await?; + + let stmt = conn + .prepare( + " + SELECT + name, + description, + workflow, + symbology + FROM layers l + WHERE l.id = $1;", + ) + .await?; + + let row = conn + .query_one(&stmt, &[&layer_id]) + .await + .map_err(|_error| LayerDbError::NoLayerForGivenId { id: id.clone() })?; + + Ok(Layer { + id: ProviderLayerId { + provider: INTERNAL_PROVIDER_ID, + item: id.clone(), + }, + name: row.get(0), + description: row.get(1), + workflow: serde_json::from_value(row.get(2)).context(error::SerdeJson)?, + symbology: serde_json::from_value(row.get(3)).context(error::SerdeJson)?, + }) + } +} + +pub struct PostgresLayerProviderDb +where + Tls: MakeTlsConnect + Clone + Send + Sync + 'static, + >::Stream: Send + Sync, + >::TlsConnect: Send, + <>::TlsConnect as TlsConnect>::Future: Send, +{ + pub(crate) conn_pool: Pool>, +} + +impl PostgresLayerProviderDb +where + Tls: MakeTlsConnect + Clone + Send + Sync + 'static, + >::Stream: Send + Sync, + >::TlsConnect: Send, + <>::TlsConnect as TlsConnect>::Future: Send, +{ + pub fn new(conn_pool: Pool>) -> Self { + Self { conn_pool } + } +} + +#[async_trait] +impl LayerProviderDb for PostgresLayerProviderDb +where + Tls: MakeTlsConnect + Clone + Send + Sync + 'static, + >::Stream: Send + Sync, + >::TlsConnect: Send, + <>::TlsConnect as TlsConnect>::Future: Send, +{ + async fn add_layer_provider( &self, - options: Validated, - ) -> Result> { + provider: Box, + ) -> Result { + // TODO: permissions + let conn = self.conn_pool.get().await?; + + let stmt = conn + .prepare( + " + INSERT INTO layer_providers ( + id, + type_name, + name, + definition + ) + VALUES ($1, $2, $3, $4)", + ) + .await?; + + let id = provider.id(); + conn.execute( + &stmt, + &[ + &id, + &provider.type_name(), + &provider.name(), + &serde_json::to_value(provider)?, + ], + ) + .await?; + Ok(id) + } + + async fn list_layer_providers( + &self, + options: Validated, + ) -> Result> { + // TODO: permission let conn = self.conn_pool.get().await?; let options = options.user_input; @@ -320,28 +541,15 @@ where let stmt = conn .prepare( " - SELECT id, name, description, is_layer - FROM ( - SELECT - id, - name, - description, - FALSE AS is_layer - FROM layer_collections c LEFT JOIN collection_children cc ON (c.id = cc.child) - WHERE cc.parent IS NULL - ) a UNION ( SELECT id, - name, - description, - TRUE AS is_layer - FROM layers l LEFT JOIN collection_layers cl ON (l.id = cl.layer) - WHERE cl.collection IS NULL - ) - ORDER BY is_layer ASC, name ASC - LIMIT $1 - OFFSET $2; - ", + name, + type_name + FROM + layer_providers + ORDER BY name ASC + LIMIT $1 + OFFSET $2;", ) .await?; @@ -353,45 +561,36 @@ where .await?; Ok(rows - .into_iter() - .map(|row| { - let is_layer: bool = row.get(3); - - if is_layer { - CollectionItem::Layer(LayerListing { - id: row.get(0), - name: row.get(1), - description: row.get(2), - }) - } else { - CollectionItem::Collection(LayerCollectionListing { - id: row.get(0), - name: row.get(1), - description: row.get(2), - }) - } + .iter() + .map(|row| LayerProviderListing { + id: row.get(0), + name: row.get(1), + description: row.get(2), }) .collect()) } - async fn workflow(&self, layer: LayerId) -> Result { + async fn layer_provider(&self, id: LayerProviderId) -> Result> { + // TODO: permissions let conn = self.conn_pool.get().await?; let stmt = conn .prepare( " - SELECT - workflow, - FROM layers l - WHERE l.id = $1;", + SELECT + definition + FROM + layer_providers + WHERE + id = $1", ) .await?; - let row = conn - .query_one(&stmt, &[&layer]) - .await - .map_err(|_error| LayerDbError::NoLayerForGivenId { id: layer })?; + let row = conn.query_one(&stmt, &[&id]).await?; + + let definition = + serde_json::from_value::>(row.get(0))?; - Ok(serde_json::from_value(row.get(0)).context(error::SerdeJson)?) + definition.initialize().await } } diff --git a/services/src/pro/server.rs b/services/src/pro/server.rs index 9f4ac8b54..51a966dfe 100644 --- a/services/src/pro/server.rs +++ b/services/src/pro/server.rs @@ -59,16 +59,16 @@ where app = app.configure(pro::handlers::drone_mapping::init_drone_mapping_routes::); } - #[cfg(feature = "ebv")] - { - app = app - .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); - } - - #[cfg(feature = "nfdi")] - { - app = app.configure(handlers::gfbio::init_gfbio_routes::); - } + // #[cfg(feature = "ebv")] + // { + // app = app + // .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); + // } + + // #[cfg(feature = "nfdi")] + // { + // app = app.configure(handlers::gfbio::init_gfbio_routes::); + // } if version_api { app = app.route( From e605ecbd0ccd004994dedd5f8401ee22fa98270c Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Mon, 27 Jun 2022 11:26:17 +0200 Subject: [PATCH 07/21] migrate gfbio to layer provider --- services/src/datasets/external/gfbio.rs | 165 +++++++++++++----------- services/src/datasets/external/mod.rs | 4 +- services/src/datasets/mod.rs | 2 +- 3 files changed, 95 insertions(+), 76 deletions(-) diff --git a/services/src/datasets/external/gfbio.rs b/services/src/datasets/external/gfbio.rs index 5a23ddd75..7ad942a10 100644 --- a/services/src/datasets/external/gfbio.rs +++ b/services/src/datasets/external/gfbio.rs @@ -3,25 +3,28 @@ use std::marker::PhantomData; use crate::datasets::listing::{Provenance, ProvenanceOutput}; use crate::error::Error; +use crate::error::Result; use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; -use crate::layers::layer::{CollectionItem, Layer, LayerCollectionListOptions, LayerListing}; +use crate::layers::layer::{ + CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, +}; use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; -use crate::layers::storage::LayerProviderId; -use crate::{datasets::listing::DatasetListOptions, error::Result}; -use crate::{datasets::listing::DatasetListing, util::user_input::Validated}; +use crate::util::user_input::Validated; +use crate::workflows::workflow::Workflow; use async_trait::async_trait; use bb8_postgres::bb8::{Pool, PooledConnection}; use bb8_postgres::tokio_postgres::{Config, NoTls}; use bb8_postgres::PostgresConnectionManager; use geoengine_datatypes::collections::VectorDataType; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; +use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; use geoengine_datatypes::primitives::{ FeatureDataType, RasterQueryRectangle, VectorQueryRectangle, }; use geoengine_datatypes::spatial_reference::SpatialReference; -use geoengine_operators::engine::{StaticMetaData, TypedResultDescriptor}; +use geoengine_operators::engine::{StaticMetaData, TypedOperator, VectorOperator}; use geoengine_operators::source::{ - OgrSourceColumnSpec, OgrSourceDatasetTimeType, OgrSourceErrorSpec, + OgrSource, OgrSourceColumnSpec, OgrSourceDatasetTimeType, OgrSourceErrorSpec, + OgrSourceParameters, }; use geoengine_operators::{ engine::{MetaData, MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor}, @@ -84,7 +87,7 @@ impl ExternalLayerProviderDefinition for GfbioDataProviderDefinition { self.name.clone() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { GFBIO_PROVIDER_ID } } @@ -170,23 +173,22 @@ impl GfbioDataProvider { impl LayerCollectionProvider for GfbioDataProvider { async fn collection_items( &self, - collection: LayerCollectionId, - options: Validated, - ) -> Result> { - todo!() - } - - async fn root_collection_items( - &self, + _collection: &LayerCollectionId, options: Validated, ) -> Result> { + // TODO: check collection id let conn = self.pool.get().await?; + let options = options.user_input; + let stmt = conn .prepare(&format!( r#" SELECT surrogate_key, "{title}", "{details}" - FROM {schema}.abcd_datasets;"#, + FROM {schema}.abcd_datasets + ORDER BY surrogate_key + LIMIT $1 + OFFSET $2;"#, title = self .column_name_to_hash .get("/DataSets/DataSet/Metadata/Description/Representation/Title") @@ -199,14 +201,21 @@ impl LayerCollectionProvider for GfbioDataProvider { )) .await?; - let rows = conn.query(&stmt, &[]).await?; + let rows = conn + .query( + &stmt, + &[&i64::from(options.limit), &i64::from(options.offset)], + ) + .await?; let listings: Vec<_> = rows .into_iter() .map(|row| { CollectionItem::Layer(LayerListing { - provider: GFBIO_PROVIDER_ID, - layer: row.get::(0).to_string(), + id: ProviderLayerId { + provider: GFBIO_PROVIDER_ID, + item: LayerId(row.get::(0).to_string()), + }, name: row.get(1), description: row.try_get(2).unwrap_or_else(|_| "".to_owned()), }) @@ -216,14 +225,60 @@ impl LayerCollectionProvider for GfbioDataProvider { Ok(listings) } - async fn get_layer(&self, id: LayerId) -> Result { + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId("abcd".to_owned())) + } + + async fn get_layer(&self, id: &LayerId) -> Result { + let surrogate_key: i32 = id.0.parse().map_err(|_| Error::InvalidDatasetId)?; + + let conn = self.pool.get().await?; + + let stmt = conn + .prepare(&format!( + r#" + SELECT "{title}", "{details}" + FROM {schema}.abcd_datasets + WHERE surrogate_key = $1;"#, + title = self + .column_name_to_hash + .get("/DataSets/DataSet/Metadata/Description/Representation/Title") + .ok_or(Error::GfbioMissingAbcdField)?, + details = self + .column_name_to_hash + .get("/DataSets/DataSet/Metadata/Description/Representation/Details") + .ok_or(Error::GfbioMissingAbcdField)?, + schema = self.db_config.schema + )) + .await?; + + let row = conn.query_one(&stmt, &[&surrogate_key]).await?; + Ok(Layer { - id, - name: todo!(), - description: todo!(), - workflow: todo!(), - symbology: todo!(), + id: ProviderLayerId { + provider: GFBIO_PROVIDER_ID, + item: id.clone(), + }, + name: row.get(0), + description: row.try_get(1).unwrap_or_else(|_| "".to_owned()), + workflow: Workflow { + operator: TypedOperator::Vector( + OgrSource { + params: OgrSourceParameters { + dataset: DatasetId::External(ExternalDatasetId { + provider_id: GFBIO_PROVIDER_ID, + dataset_id: id.0.clone(), + }), + attribute_projection: None, + attribute_filters: None, + }, + } + .boxed(), + ), + }, + symbology: None, // TODO }) + } } #[async_trait] @@ -398,6 +453,7 @@ mod tests { use bb8_postgres::bb8::ManageConnection; use futures::StreamExt; use geoengine_datatypes::collections::MultiPointCollection; + use geoengine_datatypes::dataset::ExternalDatasetId; use geoengine_datatypes::primitives::{ BoundingBox2D, FeatureData, MultiPoint, SpatialResolution, TimeInterval, }; @@ -407,10 +463,7 @@ mod tests { use rand::RngCore; use crate::test_data; - use crate::{ - datasets::listing::OrderBy, - util::{config, user_input::UserInput}, - }; + use crate::util::{config, user_input::UserInput}; use std::{fs::File, io::Read, path::PathBuf}; use super::*; @@ -485,10 +538,9 @@ mod tests { .unwrap(); let listing = provider - .list( - DatasetListOptions { - filter: None, - order: OrderBy::NameAsc, + .collection_items( + &provider.root_collection_id().await.unwrap(), + LayerCollectionListOptions { offset: 0, limit: 10, } @@ -503,47 +555,14 @@ mod tests { assert_eq!( listing, - vec![DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: GFBIO_PROVIDER_ID, - dataset_id: "1".to_string(), - }), + vec![CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: GFBIO_PROVIDER_ID, + item: LayerId("1".to_string()), + }, name: "Example Title".to_string(), description: "".to_string(), - tags: vec![], - source_operator: "OgrSource".to_string(), - result_descriptor: TypedResultDescriptor::Vector(VectorResultDescriptor { - data_type: VectorDataType::MultiPoint, - spatial_reference: SpatialReference::epsg_4326().into(), - columns: [ - ("/DataSets/DataSet/Units/Unit/DateLastEdited".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Gathering/Agents/GatheringAgent/AgentText".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Gathering/Country/ISO3166Code".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Gathering/Country/Name".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Gathering/DateTime/ISODateTimeBegin".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Gathering/LocalityText".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Gathering/SiteCoordinateSets/SiteCoordinates/CoordinatesLatLong/SpatialDatum".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Identifications/Identification/Result/TaxonIdentified/HigherTaxa/HigherTaxon/HigherTaxonName".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Identifications/Identification/Result/TaxonIdentified/HigherTaxa/HigherTaxon/HigherTaxonRank".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/Identifications/Identification/Result/TaxonIdentified/ScientificName/FullScientificNameString".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/MultiMediaObjects/MultiMediaObject/Creator".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/MultiMediaObjects/MultiMediaObject/FileURI".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/MultiMediaObjects/MultiMediaObject/Format".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/MultiMediaObjects/MultiMediaObject/IPR/Licenses/License/Details".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/MultiMediaObjects/MultiMediaObject/IPR/Licenses/License/Text".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/MultiMediaObjects/MultiMediaObject/IPR/Licenses/License/URI".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/RecordBasis".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/RecordURI".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/SourceID".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/SourceInstitutionID".to_owned(), FeatureDataType::Text), - ("/DataSets/DataSet/Units/Unit/UnitID".to_owned(), FeatureDataType::Text), - ] - .iter() - .cloned() - .collect(), - }), - symbology: None, - }] + })] ); } diff --git a/services/src/datasets/external/mod.rs b/services/src/datasets/external/mod.rs index 989bf090d..a900089e8 100644 --- a/services/src/datasets/external/mod.rs +++ b/services/src/datasets/external/mod.rs @@ -1,5 +1,5 @@ -// #[cfg(feature = "nfdi")] -// pub mod gfbio; +#[cfg(feature = "nfdi")] +pub mod gfbio; pub mod mock; // #[cfg(feature = "nature40")] // pub mod nature40; diff --git a/services/src/datasets/mod.rs b/services/src/datasets/mod.rs index 7f1734f67..00c93daeb 100644 --- a/services/src/datasets/mod.rs +++ b/services/src/datasets/mod.rs @@ -1,5 +1,5 @@ pub mod add_from_directory; -pub mod external; +pub mod external; // TODO: move to layers/external pub mod in_memory; pub mod listing; pub mod storage; From 435b153c8a873c0bd77a480cce18c0d81d33b6dd Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Mon, 27 Jun 2022 18:34:23 +0200 Subject: [PATCH 08/21] migrate external providers --- output.gpkg | 0 services/src/datasets/external/mod.rs | 14 +- services/src/datasets/external/nature40.rs | 308 +++++++++--------- .../src/datasets/external/netcdfcf/error.rs | 5 +- .../src/datasets/external/netcdfcf/mod.rs | 112 +++---- services/src/datasets/external/nfdi/mod.rs | 158 +++++++-- services/src/datasets/external/pangaea/mod.rs | 50 ++- services/src/error.rs | 23 +- services/src/handlers/ebv.rs | 30 +- services/src/handlers/gfbio.rs | 11 +- services/src/handlers/mod.rs | 8 +- services/src/layers/layer.rs | 9 + services/src/pro/datasets/external/mod.rs | 2 +- .../datasets/external/sentinel_s2_l2a_cogs.rs | 132 ++++++-- services/src/pro/server.rs | 20 +- services/src/server.rs | 20 +- 16 files changed, 526 insertions(+), 376 deletions(-) create mode 100644 output.gpkg diff --git a/output.gpkg b/output.gpkg new file mode 100644 index 000000000..e69de29bb diff --git a/services/src/datasets/external/mod.rs b/services/src/datasets/external/mod.rs index a900089e8..42ef79b81 100644 --- a/services/src/datasets/external/mod.rs +++ b/services/src/datasets/external/mod.rs @@ -1,10 +1,10 @@ #[cfg(feature = "nfdi")] pub mod gfbio; pub mod mock; -// #[cfg(feature = "nature40")] -// pub mod nature40; -// pub mod netcdfcf; -// #[cfg(feature = "nfdi")] -// pub mod nfdi; -// #[cfg(feature = "nfdi")] -// pub mod pangaea; +#[cfg(feature = "nature40")] +pub mod nature40; +pub mod netcdfcf; +#[cfg(feature = "nfdi")] +pub mod nfdi; +#[cfg(feature = "nfdi")] +pub mod pangaea; diff --git a/services/src/datasets/external/nature40.rs b/services/src/datasets/external/nature40.rs index 4ddcea4ed..f6f928142 100644 --- a/services/src/datasets/external/nature40.rs +++ b/services/src/datasets/external/nature40.rs @@ -2,25 +2,28 @@ use std::path::Path; use crate::datasets::listing::ProvenanceOutput; use crate::error::Error; +use crate::error::Result; +use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::layer::{ + CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, +}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::util::parsing::{deserialize_base_url, string_or_string_array}; use crate::util::retry::retry; -use crate::{datasets::listing::DatasetListOptions, error::Result}; -use crate::{ - datasets::{ - listing::{DatasetListing, ExternalDatasetProvider}, - storage::ExternalDatasetProviderDefinition, - }, - error, - util::user_input::Validated, -}; +use crate::workflows::workflow::Workflow; +use crate::{error, util::user_input::Validated}; use async_trait::async_trait; use futures::future::join_all; use gdal::DatasetOptions; use gdal::Metadata; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; +use geoengine_datatypes::dataset::ExternalDatasetId; +use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; -use geoengine_operators::engine::TypedResultDescriptor; +use geoengine_operators::engine::RasterOperator; +use geoengine_operators::engine::TypedOperator; use geoengine_operators::source::GdalMetaDataStatic; +use geoengine_operators::source::GdalSource; +use geoengine_operators::source::GdalSourceParameters; use geoengine_operators::util::gdal::{ gdal_open_dataset_ex, gdal_parameters_from_dataset, raster_descriptor_from_dataset, }; @@ -40,7 +43,7 @@ use url::Url; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Nature40DataProviderDefinition { - id: DatasetProviderId, + id: LayerProviderId, name: String, #[serde(deserialize_with = "deserialize_base_url")] base_url: Url, @@ -71,8 +74,8 @@ impl Default for RequestRetries { #[typetag::serde] #[async_trait] -impl ExternalDatasetProviderDefinition for Nature40DataProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl ExternalLayerProviderDefinition for Nature40DataProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(Nature40DataProvider { id: self.id, base_url: self.base_url, @@ -90,14 +93,14 @@ impl ExternalDatasetProviderDefinition for Nature40DataProviderDefinition { self.name.clone() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { self.id } } #[derive(Debug)] pub struct Nature40DataProvider { - id: DatasetProviderId, + id: LayerProviderId, base_url: Url, user: String, password: String, @@ -109,6 +112,7 @@ struct RasterDb { name: String, title: String, #[serde(deserialize_with = "string_or_string_array", default)] + #[allow(dead_code)] tags: Vec, } @@ -135,8 +139,28 @@ struct RasterDbs { } #[async_trait] -impl ExternalDatasetProvider for Nature40DataProvider { - async fn list(&self, _options: Validated) -> Result> { +impl ExternalLayerProvider for Nature40DataProvider { + async fn provenance(&self, dataset: &DatasetId) -> Result { + Ok(ProvenanceOutput { + dataset: dataset.clone(), + provenance: None, + }) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +#[async_trait] +impl LayerCollectionProvider for Nature40DataProvider { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + _options: Validated, + ) -> Result> { + // TODO: check collection id + // TODO: query the other dbs as well let raster_dbs = self.load_raster_dbs().await?; @@ -154,33 +178,20 @@ impl ExternalDatasetProvider for Nature40DataProvider { let (dataset, band_labels) = self.get_band_labels(dataset).await?; for band_index in 1..=dataset.raster_count() { - if let Ok(result_descriptor) = - raster_descriptor_from_dataset(&dataset, band_index, None) - { - listing.push(Ok(DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: self.id, - dataset_id: format!("{}:{}", db.name.clone(), band_index), - }), - name: db.title.clone(), - description: format!( - "Band {}: {}", - band_index, - band_labels - .get((band_index - 1) as usize) - .unwrap_or(&"".to_owned()) - ), - tags: db.tags.clone(), - source_operator: "GdalSource".to_owned(), - result_descriptor: TypedResultDescriptor::Raster(result_descriptor), - symbology: None, // TODO: build symbology - })); - } else { - info!( - "Could not create restult descriptor for band {} of {}", - band_index, db.name - ); - } + listing.push(Ok(CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: self.id, + item: LayerId(format!("{}:{}", db.name.clone(), band_index)), + }, + name: db.title.clone(), + description: format!( + "Band {}: {}", + band_index, + band_labels + .get((band_index - 1) as usize) + .unwrap_or(&"".to_owned()) + ), + }))); } } else { info!("Could not open dataset {}", db.name); @@ -189,21 +200,74 @@ impl ExternalDatasetProvider for Nature40DataProvider { let mut listing: Vec<_> = listing .into_iter() - .filter_map(|d: Result| if let Ok(d) = d { Some(d) } else { None }) + .filter_map(|d: Result| if let Ok(d) = d { Some(d) } else { None }) .collect(); - listing.sort_by(|a, b| a.name.cmp(&b.name)); + listing.sort_by(|a, b| a.name().cmp(b.name())); Ok(listing) } - async fn provenance(&self, dataset: &DatasetId) -> Result { - Ok(ProvenanceOutput { - dataset: dataset.clone(), - provenance: None, - }) + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId("root".to_owned())) } - fn as_any(&self) -> &dyn std::any::Any { - self + async fn get_layer(&self, id: &LayerId) -> Result { + let split: Vec<_> = id.0.split(':').collect(); + + let (db_name, band_index) = match *split.as_slice() { + [db, band_index] => { + if let Ok(band_index) = band_index.parse::() { + (db, band_index) + } else { + return Err(Error::InvalidExternalDatasetId { provider: self.id }); + } + } + _ => { + return Err(Error::InvalidExternalDatasetId { provider: self.id }); + } + }; + + let dbs = self.load_raster_dbs().await?; + + let db = dbs + .rasterdbs + .iter() + .find(|db| db.name == db_name) + .ok_or(Error::Nature40UnknownRasterDbname)?; + + let dataset_url = db.url(&self.base_url)?; + + let dataset = self.load_dataset(dataset_url).await?; + + let (_dataset, band_labels) = self.get_band_labels(dataset).await?; + + Ok(Layer { + id: ProviderLayerId { + provider: self.id, + item: id.clone(), + }, + name: db.title.clone(), + description: format!( + "Band {}: {}", + band_index, + band_labels + .get((band_index - 1) as usize) + .unwrap_or(&"".to_owned()) + ), + workflow: Workflow { + operator: TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: DatasetId::External(ExternalDatasetId { + provider_id: self.id, + dataset_id: id.0.clone(), + }), + }, + } + .boxed(), + ), + }, + symbology: None, + }) } } @@ -415,6 +479,7 @@ mod tests { use std::{fs::File, io::Read, path::PathBuf, str::FromStr}; use geoengine_datatypes::{ + dataset::ExternalDatasetId, primitives::{ Measurement, QueryRectangle, SpatialPartition2D, SpatialResolution, TimeInterval, }, @@ -433,7 +498,7 @@ mod tests { }; use serde_json::json; - use crate::{datasets::listing::OrderBy, test_data, util::user_input::UserInput}; + use crate::{test_data, util::user_input::UserInput}; use super::*; @@ -686,7 +751,7 @@ mod tests { expect_lidar_requests(&mut server); let provider = Box::new(Nature40DataProviderDefinition { - id: DatasetProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), + id: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), name: "Nature40".to_owned(), base_url: Url::parse(&server.url_str("")).unwrap(), user: "geoengine".to_owned(), @@ -698,10 +763,9 @@ mod tests { .unwrap(); let listing = provider - .list( - DatasetListOptions { - filter: None, - order: OrderBy::NameAsc, + .collection_items( + &provider.root_collection_id().await.unwrap(), + LayerCollectionListOptions { offset: 0, limit: 10, } @@ -714,102 +778,42 @@ mod tests { assert_eq!( listing, vec![ - DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str( - "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" - ) - .unwrap(), - dataset_id: "geonode_ortho_muf_1m:1".to_owned() - }), + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") + .unwrap(), + item: LayerId("geonode_ortho_muf_1m:1".to_owned()) + }, name: "MOF Luftbild".to_owned(), description: "Band 1: band1".to_owned(), - tags: vec!["natur40".to_owned()], - source_operator: "GdalSource".to_owned(), - result_descriptor: TypedResultDescriptor::Raster(RasterResultDescriptor { - data_type: RasterDataType::F32, - spatial_reference: SpatialReference::new( - SpatialReferenceAuthority::Epsg, - 3044 - ) - .into(), - measurement: Measurement::Unitless, - no_data_value: None - }), - symbology: None - }, - DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str( - "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" - ) - .unwrap(), - dataset_id: "geonode_ortho_muf_1m:2".to_owned() - }), + }), + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") + .unwrap(), + item: LayerId("geonode_ortho_muf_1m:2".to_owned()) + }, name: "MOF Luftbild".to_owned(), description: "Band 2: band2".to_owned(), - tags: vec!["natur40".to_owned()], - source_operator: "GdalSource".to_owned(), - result_descriptor: TypedResultDescriptor::Raster(RasterResultDescriptor { - data_type: RasterDataType::F32, - spatial_reference: SpatialReference::new( - SpatialReferenceAuthority::Epsg, - 3044 - ) - .into(), - measurement: Measurement::Unitless, - no_data_value: None - }), - symbology: None - }, - DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str( - "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" - ) - .unwrap(), - dataset_id: "geonode_ortho_muf_1m:3".to_owned() - }), + }), + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") + .unwrap(), + item: LayerId("geonode_ortho_muf_1m:3".to_owned()) + }, name: "MOF Luftbild".to_owned(), description: "Band 3: band3".to_owned(), - tags: vec!["natur40".to_owned()], - source_operator: "GdalSource".to_owned(), - result_descriptor: TypedResultDescriptor::Raster(RasterResultDescriptor { - data_type: RasterDataType::F32, - spatial_reference: SpatialReference::new( - SpatialReferenceAuthority::Epsg, - 3044 - ) - .into(), - measurement: Measurement::Unitless, - no_data_value: None - }), - symbology: None - }, - DatasetListing { - id: DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str( - "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" - ) - .unwrap(), - dataset_id: "lidar_2018_wetness_1m:1".to_owned() - }), + }), + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") + .unwrap(), + item: LayerId("geonode_ortho_muf_1m:4".to_owned()) + }, name: "Topografic Wetness index".to_owned(), description: "Band 1: wetness".to_owned(), - tags: vec!["natur40".to_owned()], - source_operator: "GdalSource".to_owned(), - result_descriptor: TypedResultDescriptor::Raster(RasterResultDescriptor { - data_type: RasterDataType::F32, - spatial_reference: SpatialReference::new( - SpatialReferenceAuthority::Epsg, - 25832 - ) - .into(), - measurement: Measurement::Unitless, - no_data_value: None - }), - symbology: None - } + }) ] ); } @@ -822,7 +826,7 @@ mod tests { expect_lidar_requests(&mut server); let provider = Box::new(Nature40DataProviderDefinition { - id: DatasetProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), + id: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), name: "Nature40".to_owned(), base_url: Url::parse(&server.url_str("")).unwrap(), user: "geoengine".to_owned(), @@ -836,10 +840,8 @@ mod tests { let meta: Box> = provider .meta_data(&DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str( - "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd", - ) - .unwrap(), + provider_id: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") + .unwrap(), dataset_id: "lidar_2018_wetness_1m:1".to_owned(), })) .await diff --git a/services/src/datasets/external/netcdfcf/error.rs b/services/src/datasets/external/netcdfcf/error.rs index a04a19001..0b2e9c9af 100644 --- a/services/src/datasets/external/netcdfcf/error.rs +++ b/services/src/datasets/external/netcdfcf/error.rs @@ -1,10 +1,9 @@ use std::path::PathBuf; use gdal::errors::GdalError; -use geoengine_datatypes::dataset::DatasetProviderId; use snafu::Snafu; -use geoengine_datatypes::error::ErrorSource; +use geoengine_datatypes::{dataset::LayerProviderId, error::ErrorSource}; #[derive(Debug, Snafu)] #[snafu(visibility(pub(crate)))] @@ -78,7 +77,7 @@ pub enum NetCdfCf4DProviderError { source: geoengine_datatypes::error::Error, }, InvalidExternalDatasetId { - provider: DatasetProviderId, + provider: LayerProviderId, }, InvalidDatasetIdLength { length: usize, diff --git a/services/src/datasets/external/netcdfcf/mod.rs b/services/src/datasets/external/netcdfcf/mod.rs index ccfc72124..5f5a76c4b 100644 --- a/services/src/datasets/external/netcdfcf/mod.rs +++ b/services/src/datasets/external/netcdfcf/mod.rs @@ -1,13 +1,19 @@ -use crate::datasets::listing::DatasetListOptions; -use crate::datasets::listing::{ExternalDatasetProvider, ProvenanceOutput}; +use crate::datasets::listing::ProvenanceOutput; +use crate::error::Error; +use crate::layers::external::ExternalLayerProvider; +use crate::layers::external::ExternalLayerProviderDefinition; +use crate::layers::layer::CollectionItem; +use crate::layers::layer::Layer; +use crate::layers::layer::LayerCollectionListOptions; +use crate::layers::listing::LayerCollectionId; +use crate::layers::listing::LayerCollectionProvider; +use crate::layers::listing::LayerId; use crate::projects::{RasterSymbology, Symbology}; -use crate::{ - datasets::{listing::DatasetListing, storage::ExternalDatasetProviderDefinition}, - util::user_input::Validated, -}; +use crate::{datasets::listing::DatasetListing, util::user_input::Validated}; use async_trait::async_trait; use gdal::{DatasetOptions, GdalOpenFlags}; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; +use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId}; use geoengine_datatypes::operations::image::{Colorizer, RgbaColor}; use geoengine_datatypes::primitives::{ DateTime, DateTimeParseFormat, Measurement, RasterQueryRectangle, TimeGranularity, @@ -46,8 +52,8 @@ mod overviews; type Result = std::result::Result; /// Singleton Provider with id `1690c483-b17f-4d98-95c8-00a64849cd0b` -pub const NETCDF_CF_PROVIDER_ID: DatasetProviderId = - DatasetProviderId::from_u128(0x1690_c483_b17f_4d98_95c8_00a6_4849_cd0b); +pub const NETCDF_CF_PROVIDER_ID: LayerProviderId = + LayerProviderId::from_u128(0x1690_c483_b17f_4d98_95c8_00a6_4849_cd0b); #[derive(Clone, Debug, Serialize, Deserialize)] pub struct NetCdfCfDataProviderDefinition { @@ -64,8 +70,8 @@ pub struct NetCdfCfDataProvider { #[typetag::serde] #[async_trait] -impl ExternalDatasetProviderDefinition for NetCdfCfDataProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl ExternalLayerProviderDefinition for NetCdfCfDataProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(NetCdfCfDataProvider { path: self.path, overviews: self.overviews, @@ -80,7 +86,7 @@ impl ExternalDatasetProviderDefinition for NetCdfCfDataProviderDefinition { self.name.clone() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { NETCDF_CF_PROVIDER_ID } } @@ -272,8 +278,9 @@ impl NetCdfCfDataProvider { }) } + #[allow(dead_code)] pub(crate) fn listing_from_netcdf( - id: DatasetProviderId, + id: LayerProviderId, provider_path: &Path, overview_path: Option<&Path>, dataset_path: &Path, @@ -664,58 +671,7 @@ fn parse_time_coverage( } #[async_trait] -impl ExternalDatasetProvider for NetCdfCfDataProvider { - async fn list( - &self, - options: Validated, - ) -> crate::error::Result> { - // TODO: user right management - // TODO: options - - let mut dir = tokio::fs::read_dir(&self.path).await?; - - let mut datasets = vec![]; - while let Some(entry) = dir.next_entry().await? { - if !entry.path().is_file() { - continue; - } - - let provider_path = self.path.clone(); - let overviews_path = self.overviews.clone(); - let relative_path = if let Ok(p) = entry.path().strip_prefix(&provider_path) { - p.to_path_buf() - } else { - // cannot actually happen since `entry` is listed from `provider_path` - continue; - }; - - let listing = tokio::task::spawn_blocking(move || { - Self::listing_from_netcdf( - NETCDF_CF_PROVIDER_ID, - &provider_path, - Some(&overviews_path), - &relative_path, - ) - }) - .await?; - - match listing { - Ok(listing) => datasets.extend(listing), - Err(e) => debug!("Failed to list dataset: {}", e), - } - } - - // TODO: react to filter and sort options - // TODO: don't compute everything and filter then - let datasets = datasets - .into_iter() - .skip(options.user_input.offset as usize) - .take(options.user_input.limit as usize) - .collect(); - - Ok(datasets) - } - +impl ExternalLayerProvider for NetCdfCfDataProvider { async fn provenance(&self, dataset: &DatasetId) -> crate::error::Result { Ok(ProvenanceOutput { dataset: dataset.clone(), @@ -728,6 +684,26 @@ impl ExternalDatasetProvider for NetCdfCfDataProvider { } } +#[async_trait] +// TODO: replace the custom dataset API with this one +impl LayerCollectionProvider for NetCdfCfDataProvider { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + _options: Validated, + ) -> crate::error::Result> { + Err(Error::NotYetImplemented) + } + + async fn root_collection_id(&self) -> crate::error::Result { + Err(Error::NotYetImplemented) + } + + async fn get_layer(&self, _id: &LayerId) -> crate::error::Result { + Err(Error::NotYetImplemented) + } +} + #[async_trait] impl MetaDataProvider for NetCdfCfDataProvider @@ -875,7 +851,7 @@ mod tests { #[allow(clippy::too_many_lines)] async fn test_listing_from_netcdf_m() { let provider_id = - DatasetProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); + LayerProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); let listing = NetCdfCfDataProvider::listing_from_netcdf( provider_id, @@ -1036,7 +1012,7 @@ mod tests { #[tokio::test] async fn test_listing_from_netcdf_sm() { let provider_id = - DatasetProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); + LayerProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); let listing = NetCdfCfDataProvider::listing_from_netcdf( provider_id, @@ -1320,7 +1296,7 @@ mod tests { .unwrap(); let provider_id = - DatasetProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); + LayerProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); let listing = NetCdfCfDataProvider::listing_from_netcdf( provider_id, diff --git a/services/src/datasets/external/nfdi/mod.rs b/services/src/datasets/external/nfdi/mod.rs index 39412cd92..4ea3ddc69 100644 --- a/services/src/datasets/external/nfdi/mod.rs +++ b/services/src/datasets/external/nfdi/mod.rs @@ -1,25 +1,29 @@ use crate::datasets::external::nfdi::metadata::{DataType, GEMetadata, RasterInfo, VectorInfo}; use crate::datasets::listing::{ - DatasetListOptions, DatasetListing, ExternalDatasetProvider, ProvenanceOutput, + ProvenanceOutput, }; -use crate::datasets::storage::{Dataset, ExternalDatasetProviderDefinition}; +use crate::datasets::storage::{Dataset}; use crate::error::{Error, Result}; +use crate::layers::external::{ExternalLayerProviderDefinition, ExternalLayerProvider}; +use crate::layers::layer::{LayerCollectionListOptions, CollectionItem, Layer, LayerListing, ProviderLayerId}; +use crate::layers::listing::{LayerCollectionProvider, LayerCollectionId, LayerId}; use crate::util::user_input::Validated; +use crate::workflows::workflow::Workflow; use geoengine_datatypes::collections::VectorDataType; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; +use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; use geoengine_datatypes::primitives::{ FeatureDataType, Measurement, RasterQueryRectangle, VectorQueryRectangle, }; use geoengine_datatypes::spatial_reference::SpatialReferenceOption; use geoengine_operators::engine::{ MetaData, MetaDataProvider, RasterResultDescriptor, ResultDescriptor, TypedResultDescriptor, - VectorResultDescriptor, + VectorResultDescriptor, VectorOperator, TypedOperator, RasterOperator, }; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; use geoengine_operators::source::{ FileNotFoundHandling, GdalDatasetParameters, GdalLoadingInfo, GdalLoadingInfoTemporalSlice, GdalLoadingInfoTemporalSliceIterator, OgrSourceColumnSpec, OgrSourceDataset, - OgrSourceDatasetTimeType, OgrSourceDurationSpec, OgrSourceErrorSpec, OgrSourceTimeFormat, + OgrSourceDatasetTimeType, OgrSourceDurationSpec, OgrSourceErrorSpec, OgrSourceTimeFormat, OgrSourceParameters, OgrSource, GdalSource, GdalSourceParameters, }; use scienceobjectsdb_rust_api::sciobjectsdb::sciobjsdb::api::storage::models::v1::Object; use scienceobjectsdb_rust_api::sciobjectsdb::sciobjsdb::api::storage::services::v1::dataset_service_client::DatasetServiceClient; @@ -48,7 +52,7 @@ const URL_REPLACEMENT: &str = "%URL%"; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct NFDIDataProviderDefinition { - id: DatasetProviderId, + id: LayerProviderId, name: String, api_url: String, project_id: String, @@ -57,8 +61,8 @@ pub struct NFDIDataProviderDefinition { #[typetag::serde] #[async_trait::async_trait] -impl ExternalDatasetProviderDefinition for NFDIDataProviderDefinition { - async fn initialize(self: Box) -> Result> { +impl ExternalLayerProviderDefinition for NFDIDataProviderDefinition { + async fn initialize(self: Box) -> Result> { Ok(Box::new(NFDIDataProvider::new(self).await?)) } @@ -70,7 +74,7 @@ impl ExternalDatasetProviderDefinition for NFDIDataProviderDefinition { self.name.clone() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { self.id } } @@ -108,7 +112,7 @@ impl Interceptor for APITokenInterceptor { /// is cheap. #[derive(Debug)] pub struct NFDIDataProvider { - id: DatasetProviderId, + id: LayerProviderId, project_id: String, project_stub: ProjectServiceClient>, dataset_stub: DatasetServiceClient>, @@ -505,8 +509,30 @@ impl MetaDataProvider) -> Result> { +impl ExternalLayerProvider for NFDIDataProvider { + async fn provenance(&self, dataset: &DatasetId) -> Result { + let (ds, _) = self.dataset_info(dataset).await?; + + Ok(ProvenanceOutput { + dataset: dataset.clone(), + provenance: ds.provenance, + }) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } +} + +#[async_trait::async_trait] +impl LayerCollectionProvider for NFDIDataProvider { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + _options: Validated, + ) -> Result> { + // TODO: check collection id + let mut project_stub = self.project_stub.clone(); let resp = project_stub @@ -519,21 +545,79 @@ impl ExternalDatasetProvider for NFDIDataProvider { Ok(resp .datasets .into_iter() - .map(|ds| Self::extract_metadata(&ds).map(|md| self.map_dataset(&ds, &md).listing())) - .collect::>>()?) + .map(|ds| { + CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: self.id, + item: LayerId(ds.id), + }, + name: ds.name, + description: ds.description, + }) + }) + .collect()) } - async fn provenance(&self, dataset: &DatasetId) -> Result { - let (ds, _) = self.dataset_info(dataset).await?; - - Ok(ProvenanceOutput { - dataset: dataset.clone(), - provenance: ds.provenance, - }) + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId("root".to_string())) } - fn as_any(&self) -> &dyn std::any::Any { - self + async fn get_layer(&self, id: &LayerId) -> Result { + let mut project_stub = self.project_stub.clone(); + + // TODO: avoid loading ALL project datasets + let resp = project_stub + .get_project_datasets(GetProjectDatasetsRequest { + id: self.project_id.clone(), + }) + .await? + .into_inner(); + + let dataset = resp + .datasets + .into_iter() + .find(|ds| ds.id == id.0) + .ok_or(Error::UnknownDatasetId)?; + + let meta_data = Self::extract_metadata(&dataset)?; + + let operator = match meta_data.data_type { + DataType::SingleVectorFile(_) => TypedOperator::Vector( + OgrSource { + params: OgrSourceParameters { + dataset: DatasetId::External(ExternalDatasetId { + provider_id: self.id, + dataset_id: id.0.clone(), + }), + attribute_projection: None, + attribute_filters: None, + }, + } + .boxed(), + ), + DataType::SingleRasterFile(_) => TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: DatasetId::External(ExternalDatasetId { + provider_id: self.id, + dataset_id: id.0.clone(), + }), + }, + } + .boxed(), + ), + }; + + Ok(Layer { + id: ProviderLayerId { + provider: self.id, + item: id.clone(), + }, + name: dataset.name, + description: dataset.description, + workflow: Workflow { operator }, + symbology: None, + }) } } @@ -680,8 +764,11 @@ mod tests { use crate::datasets::external::nfdi::{ ExpiringDownloadLink, NFDIDataProvider, NFDIDataProviderDefinition, }; + use crate::layers::external::ExternalLayerProvider; + use crate::layers::layer::LayerCollectionListOptions; + use crate::layers::listing::LayerCollectionProvider; use futures::StreamExt; - use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; + use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; use httptest::responders::status_code; use httptest::{Expectation, Server}; use scienceobjectsdb_rust_api::sciobjectsdb::sciobjsdb::api::storage::models::v1::{ @@ -696,8 +783,7 @@ mod tests { use tokio::fs::File; use tokio::io::AsyncReadExt; - use crate::datasets::listing::{DatasetListOptions, ExternalDatasetProvider, OrderBy}; - use crate::util::user_input::Validated; + use crate::util::user_input::UserInput; use geoengine_datatypes::collections::{FeatureCollectionInfos, MultiPointCollection}; use geoengine_datatypes::primitives::{ @@ -723,7 +809,7 @@ mod tests { async fn new_provider_with_url(url: String) -> NFDIDataProvider { let def = NFDIDataProviderDefinition { - id: DatasetProviderId::from_str(PROVIDER_ID).unwrap(), + id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), api_token: TOKEN.to_string(), api_url: url, project_id: PROJECT_ID.to_string(), @@ -1035,14 +1121,16 @@ mod tests { let addr = format!("http://{}", server.address()); let provider = new_provider_with_url(addr).await; - let opts = DatasetListOptions { - filter: None, + let root = provider.root_collection_id().await.unwrap(); + + let opts = LayerCollectionListOptions { limit: 100, offset: 0, - order: OrderBy::NameAsc, - }; + } + .validated() + .unwrap(); - let res = provider.list(Validated { user_input: opts }).await; + let res = provider.collection_items(&root, opts).await; assert!(res.is_ok()); let res = res.unwrap(); assert_eq!(1, res.len()); @@ -1079,7 +1167,7 @@ mod tests { ); let id = DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str(PROVIDER_ID).unwrap(), + provider_id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), dataset_id: DATASET_ID.to_string(), }); @@ -1163,7 +1251,7 @@ mod tests { ); let id = DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str(PROVIDER_ID).unwrap(), + provider_id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), dataset_id: DATASET_ID.to_string(), }); @@ -1278,7 +1366,7 @@ mod tests { ); let id = DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId::from_str(PROVIDER_ID).unwrap(), + provider_id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), dataset_id: DATASET_ID.to_string(), }); diff --git a/services/src/datasets/external/pangaea/mod.rs b/services/src/datasets/external/pangaea/mod.rs index 23494ffa6..afe7d3f85 100644 --- a/services/src/datasets/external/pangaea/mod.rs +++ b/services/src/datasets/external/pangaea/mod.rs @@ -1,10 +1,10 @@ use crate::datasets::external::pangaea::meta::PangeaMetaData; -use crate::datasets::listing::{ - DatasetListOptions, DatasetListing, ExternalDatasetProvider, Provenance, ProvenanceOutput, -}; -use crate::datasets::storage::ExternalDatasetProviderDefinition; +use crate::datasets::listing::{Provenance, ProvenanceOutput}; +use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::layer::{CollectionItem, Layer, LayerCollectionListOptions}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId}; +use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{ MetaData, MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor, @@ -19,8 +19,8 @@ use serde::{Deserialize, Serialize}; mod meta; -pub const PANGAEA_PROVIDER_ID: DatasetProviderId = - DatasetProviderId::from_u128(0xe3b9_3bf3_1bc1_48db_80e8_97cf_b068_5e8d); +pub const PANGAEA_PROVIDER_ID: LayerProviderId = + LayerProviderId::from_u128(0xe3b9_3bf3_1bc1_48db_80e8_97cf_b068_5e8d); /// The pangaea provider allows to include datasets from /// @@ -33,8 +33,8 @@ pub struct PangaeaDataProviderDefinition { #[typetag::serde] #[async_trait] -impl ExternalDatasetProviderDefinition for PangaeaDataProviderDefinition { - async fn initialize(self: Box) -> Result> { +impl ExternalLayerProviderDefinition for PangaeaDataProviderDefinition { + async fn initialize(self: Box) -> Result> { Ok(Box::new(PangaeaDataProvider::new(self.base_url))) } @@ -46,7 +46,7 @@ impl ExternalDatasetProviderDefinition for PangaeaDataProviderDefinition { self.name.clone() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { PANGAEA_PROVIDER_ID } } @@ -67,11 +67,7 @@ impl PangaeaDataProvider { } #[async_trait] -impl ExternalDatasetProvider for PangaeaDataProvider { - async fn list(&self, _options: Validated) -> Result> { - Ok(vec![]) - } - +impl ExternalLayerProvider for PangaeaDataProvider { async fn provenance(&self, dataset: &DatasetId) -> Result { let doi = dataset .external() @@ -115,6 +111,25 @@ impl ExternalDatasetProvider for PangaeaDataProvider { } } +#[async_trait] +impl LayerCollectionProvider for PangaeaDataProvider { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + _options: Validated, + ) -> Result> { + Err(Error::NotYetImplemented) + } + + async fn root_collection_id(&self) -> Result { + Err(Error::NotYetImplemented) + } + + async fn get_layer(&self, _id: &LayerId) -> Result { + Err(Error::NotYetImplemented) + } +} + #[async_trait] impl MetaDataProvider for PangaeaDataProvider @@ -198,9 +213,8 @@ impl #[cfg(test)] mod tests { use crate::datasets::external::pangaea::{PangaeaDataProviderDefinition, PANGAEA_PROVIDER_ID}; - use crate::datasets::listing::ExternalDatasetProvider; - use crate::datasets::storage::ExternalDatasetProviderDefinition; use crate::error::Error; + use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; use futures::StreamExt; use geoengine_datatypes::collections::{ DataCollection, FeatureCollectionInfos, IntoGeometryIterator, MultiPointCollection, @@ -232,7 +246,7 @@ mod tests { crate::test_data!(String::from("pangaea/") + file_name).into() } - async fn create_provider(server: &Server) -> Result, Error> { + async fn create_provider(server: &Server) -> Result, Error> { Box::new(PangaeaDataProviderDefinition { name: "Pangaea".to_string(), base_url: server.url_str("").strip_suffix('/').unwrap().to_owned(), diff --git a/services/src/error.rs b/services/src/error.rs index fa5a67bed..2a27a1b5e 100644 --- a/services/src/error.rs +++ b/services/src/error.rs @@ -1,4 +1,7 @@ -use crate::{handlers::ErrorResponse, workflows::workflow::WorkflowId}; +use crate::{ + datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse, + workflows::workflow::WorkflowId, +}; // use crate::{datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse}; use actix_web::http::StatusCode; use actix_web::HttpResponse; @@ -314,16 +317,16 @@ pub enum Error { }, MissingNFDIMetaData, - // #[snafu(context(false))] - // NetCdfCf4DProvider { - // source: NetCdfCf4DProviderError, - // }, + #[snafu(context(false))] + NetCdfCf4DProvider { + source: NetCdfCf4DProviderError, + }, - // #[cfg(feature = "ebv")] - // #[snafu(context(false))] - // EbvHandler { - // source: crate::handlers::ebv::EbvError, - // }, + #[cfg(feature = "ebv")] + #[snafu(context(false))] + EbvHandler { + source: crate::handlers::ebv::EbvError, + }, #[cfg(feature = "nfdi")] #[snafu(display("Could not parse GFBio basket: {}", message,))] GFBioBasketParse { diff --git a/services/src/handlers/ebv.rs b/services/src/handlers/ebv.rs index 9f5898498..3596f8dd0 100644 --- a/services/src/handlers/ebv.rs +++ b/services/src/handlers/ebv.rs @@ -8,12 +8,13 @@ use crate::datasets::external::netcdfcf::{ }; use crate::error::Result; use crate::layers::external::ExternalLayerProvider; +use crate::layers::storage::LayerProviderDb; use crate::{contexts::Context, datasets::external::netcdfcf::NetCdfCfDataProvider}; use actix_web::{ web::{self, ServiceConfig}, FromRequest, Responder, }; -use geoengine_datatypes::dataset::DatasetProviderId; +use geoengine_datatypes::dataset::LayerProviderId; use geoengine_datatypes::error::{BoxedResultExt, ErrorSource}; use log::{debug, warn}; use serde::{Deserialize, Serialize}; @@ -114,9 +115,9 @@ pub enum EbvError { #[snafu(display("Cannot lookup dataset with id {id}"))] CannotLookupDataset { id: usize }, #[snafu(display("Cannot find NetCdfCf provider with id {id}"))] - NoNetCdfCfProviderForId { id: DatasetProviderId }, + NoNetCdfCfProviderForId { id: LayerProviderId }, #[snafu(display("NetCdfCf provider with id {id} cannot list files"))] - CdfCfProviderCannotListFiles { id: DatasetProviderId }, + CdfCfProviderCannotListFiles { id: LayerProviderId }, #[snafu(display("Internal server error"))] Internal { source: Box }, } @@ -263,7 +264,7 @@ async fn get_dataset_metadata(base_url: &BaseUrl, id: usize) -> Result( async fn with_netcdfcf_provider( ctx: &C, - session: &C::Session, + _session: &C::Session, f: F, ) -> Result where @@ -329,8 +330,8 @@ where F: FnOnce(&NetCdfCfDataProvider) -> Result + Send + 'static, { let provider: Box = ctx - .dataset_db_ref() - .dataset_provider(session, NETCDF_CF_PROVIDER_ID) + .layer_provider_db_ref() + .layer_provider(NETCDF_CF_PROVIDER_ID) .await .map_err(|_| EbvError::NoNetCdfCfProviderForId { id: NETCDF_CF_PROVIDER_ID, @@ -482,15 +483,12 @@ mod tests { let ctx = InMemoryContext::test_default(); let session_id = ctx.default_session_ref().await.id(); - ctx.dataset_db_ref() - .add_dataset_provider( - &*ctx.default_session_ref().await, - Box::new(NetCdfCfDataProviderDefinition { - name: "test".to_string(), - path: test_data!("netcdf4d").to_path_buf(), - overviews: test_data!("netcdf4d/overviews").to_path_buf(), - }), - ) + ctx.layer_provider_db_ref() + .add_layer_provider(Box::new(NetCdfCfDataProviderDefinition { + name: "test".to_string(), + path: test_data!("netcdf4d").to_path_buf(), + overviews: test_data!("netcdf4d/overviews").to_path_buf(), + })) .await .unwrap(); diff --git a/services/src/handlers/gfbio.rs b/services/src/handlers/gfbio.rs index 75ca5367e..5a3bd821a 100644 --- a/services/src/handlers/gfbio.rs +++ b/services/src/handlers/gfbio.rs @@ -1,5 +1,6 @@ use crate::contexts::Context; use crate::error::Result; +use crate::layers::storage::LayerProviderDb; use crate::util::config::{get_config_element, GFBio}; use actix_web::{web, FromRequest, Responder}; use futures::stream::FuturesUnordered; @@ -38,8 +39,8 @@ async fn get_basket_handler( // Get basket content let config = get_config_element::()?; let abcd_provider = ctx - .dataset_db_ref() - .dataset_provider(&session, GFBIO_PROVIDER_ID) + .layer_provider_db() + .layer_provider(GFBIO_PROVIDER_ID) .await .ok(); @@ -433,7 +434,7 @@ mod tests { BasketInternal, TypedBasketEntry, }; use geoengine_datatypes::collections::VectorDataType; - use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; + use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; use geoengine_datatypes::spatial_reference::{SpatialReference, SpatialReferenceOption}; use geoengine_operators::engine::{TypedResultDescriptor, VectorResultDescriptor}; use geoengine_operators::source::AttributeFilter; @@ -476,7 +477,7 @@ mod tests { #[test] fn basket_entry_serialization_ok() { let id = DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId(Uuid::default()), + provider_id: LayerProviderId(Uuid::default()), dataset_id: "1".to_string(), }); @@ -515,7 +516,7 @@ mod tests { #[test] fn basket_entry_serialization_ok_with_filter() { let id = DatasetId::External(ExternalDatasetId { - provider_id: DatasetProviderId(Uuid::default()), + provider_id: LayerProviderId(Uuid::default()), dataset_id: "1".to_string(), }); diff --git a/services/src/handlers/mod.rs b/services/src/handlers/mod.rs index 9cde7c8f0..c67c88a9f 100644 --- a/services/src/handlers/mod.rs +++ b/services/src/handlers/mod.rs @@ -10,10 +10,10 @@ use std::fmt; use std::str::FromStr; pub mod datasets; -// #[cfg(feature = "ebv")] -// pub mod ebv; -// #[cfg(feature = "nfdi")] -// pub mod gfbio; +#[cfg(feature = "ebv")] +pub mod ebv; +#[cfg(feature = "nfdi")] +pub mod gfbio; pub mod layers; pub mod plots; pub mod projects; diff --git a/services/src/layers/layer.rs b/services/src/layers/layer.rs index e9d287b59..eeac812ae 100644 --- a/services/src/layers/layer.rs +++ b/services/src/layers/layer.rs @@ -83,6 +83,15 @@ pub enum CollectionItem { Layer(LayerListing), } +impl CollectionItem { + pub fn name(&self) -> &str { + match self { + CollectionItem::Collection(c) => &c.name, + CollectionItem::Layer(l) => &l.name, + } + } +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct AddLayerCollection { pub name: String, diff --git a/services/src/pro/datasets/external/mod.rs b/services/src/pro/datasets/external/mod.rs index 84543502c..6d6c793a9 100644 --- a/services/src/pro/datasets/external/mod.rs +++ b/services/src/pro/datasets/external/mod.rs @@ -1 +1 @@ -// pub mod sentinel_s2_l2a_cogs; +pub mod sentinel_s2_l2a_cogs; diff --git a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs index f3b0cb1ba..f0e91a01b 100644 --- a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs +++ b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs @@ -1,14 +1,17 @@ -use crate::datasets::listing::{ - DatasetListOptions, DatasetListing, ExternalDatasetProvider, ProvenanceOutput, +use crate::datasets::listing::{DatasetListing, ProvenanceOutput}; +use crate::error::{self, Error, Result}; +use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::layer::{ + CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::datasets::storage::ExternalDatasetProviderDefinition; -use crate::error::{self, Result}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::projects::{RasterSymbology, Symbology}; use crate::stac::{Feature as StacFeature, FeatureCollection as StacCollection, StacAsset}; use crate::util::retry::retry; use crate::util::user_input::Validated; +use crate::workflows::workflow::Workflow; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, DatasetProviderId, ExternalDatasetId}; +use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; use geoengine_datatypes::operations::image::{Colorizer, RgbaColor}; use geoengine_datatypes::operations::reproject::{ CoordinateProjection, CoordinateProjector, ReprojectClipped, @@ -20,12 +23,13 @@ use geoengine_datatypes::primitives::{ use geoengine_datatypes::raster::RasterDataType; use geoengine_datatypes::spatial_reference::{SpatialReference, SpatialReferenceAuthority}; use geoengine_operators::engine::{ - MetaData, MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor, + MetaData, MetaDataProvider, RasterOperator, RasterResultDescriptor, TypedOperator, + VectorResultDescriptor, }; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; use geoengine_operators::source::{ GdalDatasetGeoTransform, GdalDatasetParameters, GdalLoadingInfo, GdalLoadingInfoTemporalSlice, - GdalLoadingInfoTemporalSliceIterator, OgrSourceDataset, + GdalLoadingInfoTemporalSliceIterator, GdalSource, GdalSourceParameters, OgrSourceDataset, }; use log::debug; use reqwest::Client; @@ -40,7 +44,7 @@ use std::path::PathBuf; #[serde(rename_all = "camelCase")] pub struct SentinelS2L2ACogsProviderDefinition { name: String, - id: DatasetProviderId, + id: LayerProviderId, api_url: String, bands: Vec, zones: Vec, @@ -69,10 +73,8 @@ impl Default for StacApiRetries { #[typetag::serde] #[async_trait] -impl ExternalDatasetProviderDefinition for SentinelS2L2ACogsProviderDefinition { - async fn initialize( - self: Box, - ) -> crate::error::Result> { +impl ExternalLayerProviderDefinition for SentinelS2L2ACogsProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(SentinelS2L2aCogsDataProvider::new( self.id, self.api_url, @@ -90,7 +92,7 @@ impl ExternalDatasetProviderDefinition for SentinelS2L2ACogsProviderDefinition { self.name.clone() } - fn id(&self) -> DatasetProviderId { + fn id(&self) -> LayerProviderId { self.id } } @@ -118,6 +120,8 @@ pub struct SentinelDataset { #[derive(Debug)] pub struct SentinelS2L2aCogsDataProvider { + id: LayerProviderId, + api_url: String, datasets: HashMap, @@ -127,13 +131,14 @@ pub struct SentinelS2L2aCogsDataProvider { impl SentinelS2L2aCogsDataProvider { pub fn new( - id: DatasetProviderId, + id: LayerProviderId, api_url: String, bands: &[Band], zones: &[Zone], stac_api_retries: StacApiRetries, ) -> Self { Self { + id, api_url, datasets: Self::create_datasets(&id, bands, zones), stac_api_retries, @@ -141,7 +146,7 @@ impl SentinelS2L2aCogsDataProvider { } fn create_datasets( - id: &DatasetProviderId, + id: &LayerProviderId, bands: &[Band], zones: &[Zone], ) -> HashMap { @@ -203,15 +208,7 @@ impl SentinelS2L2aCogsDataProvider { } #[async_trait] -impl ExternalDatasetProvider for SentinelS2L2aCogsDataProvider { - async fn list(&self, _options: Validated) -> Result> { - // TODO: options - let mut x: Vec = - self.datasets.values().map(|d| d.listing.clone()).collect(); - x.sort_by_key(|e| e.name.clone()); - Ok(x) - } - +impl ExternalLayerProvider for SentinelS2L2aCogsDataProvider { async fn provenance(&self, dataset: &DatasetId) -> Result { Ok(ProvenanceOutput { dataset: dataset.clone(), @@ -224,6 +221,72 @@ impl ExternalDatasetProvider for SentinelS2L2aCogsDataProvider { } } +#[async_trait] +impl LayerCollectionProvider for SentinelS2L2aCogsDataProvider { + async fn collection_items( + &self, + _collection: &LayerCollectionId, + _options: Validated, + ) -> Result> { + // TODO: check collection id + + // TODO: options + let mut x = self + .datasets + .values() + .map(|d| { + let id = d.listing.id.external().ok_or(Error::InvalidDatasetId)?; + Ok(CollectionItem::Layer(LayerListing { + id: ProviderLayerId { + provider: id.provider_id, + item: LayerId(id.dataset_id), + }, + name: d.listing.name.clone(), + description: d.listing.description.clone(), + })) + }) + .collect::>>()?; + x.sort_by_key(|e| e.name().to_string()); + Ok(x) + } + + async fn root_collection_id(&self) -> Result { + Ok(LayerCollectionId("SentinelS2L2ACogs".to_owned())) + } + + async fn get_layer(&self, id: &LayerId) -> Result { + let dataset_id = DatasetId::External(ExternalDatasetId { + provider_id: self.id, + dataset_id: id.0.clone(), + }); + + let dataset = self + .datasets + .get(&dataset_id) + .ok_or(Error::UnknownDatasetId)?; + + Ok(Layer { + id: ProviderLayerId { + provider: self.id, + item: id.clone(), + }, + name: dataset.listing.name.clone(), + description: dataset.listing.description.clone(), + workflow: Workflow { + operator: TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: dataset_id, + }, + } + .boxed(), + ), + }, + symbology: dataset.listing.symbology.clone(), + }) + } +} + #[derive(Debug, Clone)] pub struct SentinelS2L2aCogsMetaData { api_url: String, @@ -582,7 +645,7 @@ impl MetaDataProvider Result<()> { // TODO: mock STAC endpoint - let def: Box = + let def: Box = serde_json::from_reader(BufReader::new(File::open(test_data!( "provider_defs/pro/sentinel_s2_l2a_cogs.json" ))?))?; @@ -618,7 +681,7 @@ mod tests { provider .meta_data( &ExternalDatasetId { - provider_id: DatasetProviderId::from_str( + provider_id: LayerProviderId::from_str( "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5", )?, dataset_id: "UTM32N:B01".to_owned(), @@ -679,7 +742,7 @@ mod tests { let mut exe = MockExecutionContext::test_default(); - let def: Box = + let def: Box = serde_json::from_reader(BufReader::new(File::open(test_data!( "provider_defs/pro/sentinel_s2_l2a_cogs.json" ))?))?; @@ -690,7 +753,7 @@ mod tests { provider .meta_data( &ExternalDatasetId { - provider_id: DatasetProviderId::from_str( + provider_id: LayerProviderId::from_str( "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5", )?, dataset_id: "UTM32N:B01".to_owned(), @@ -701,7 +764,7 @@ mod tests { exe.add_meta_data( ExternalDatasetId { - provider_id: DatasetProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, + provider_id: LayerProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, dataset_id: "UTM32N:B01".to_owned(), } .into(), @@ -711,9 +774,7 @@ mod tests { let op = GdalSource { params: GdalSourceParameters { dataset: ExternalDatasetId { - provider_id: DatasetProviderId::from_str( - "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5", - )?, + provider_id: LayerProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, dataset_id: "UTM32N:B01".to_owned(), } .into(), @@ -791,10 +852,9 @@ mod tests { ]), ); - let provider_id: DatasetProviderId = - "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5".parse().unwrap(); + let provider_id: LayerProviderId = "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5".parse().unwrap(); - let provider_def: Box = + let provider_def: Box = Box::new(SentinelS2L2ACogsProviderDefinition { name: "Element 84 AWS STAC".into(), id: provider_id, diff --git a/services/src/pro/server.rs b/services/src/pro/server.rs index 51a966dfe..9f4ac8b54 100644 --- a/services/src/pro/server.rs +++ b/services/src/pro/server.rs @@ -59,16 +59,16 @@ where app = app.configure(pro::handlers::drone_mapping::init_drone_mapping_routes::); } - // #[cfg(feature = "ebv")] - // { - // app = app - // .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); - // } - - // #[cfg(feature = "nfdi")] - // { - // app = app.configure(handlers::gfbio::init_gfbio_routes::); - // } + #[cfg(feature = "ebv")] + { + app = app + .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); + } + + #[cfg(feature = "nfdi")] + { + app = app.configure(handlers::gfbio::init_gfbio_routes::); + } if version_api { app = app.route( diff --git a/services/src/server.rs b/services/src/server.rs index 3d8a2c482..0056f65e9 100644 --- a/services/src/server.rs +++ b/services/src/server.rs @@ -113,16 +113,16 @@ where .configure(handlers::wms::init_wms_routes::) .configure(handlers::workflows::init_workflow_routes::); - // #[cfg(feature = "ebv")] - // { - // app = app - // .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); - // } - - // #[cfg(feature = "nfdi")] - // { - // app = app.configure(handlers::gfbio::init_gfbio_routes::); - // } + #[cfg(feature = "ebv")] + { + app = app + .service(web::scope("/ebv").configure(handlers::ebv::init_ebv_routes::(None))); + } + + #[cfg(feature = "nfdi")] + { + app = app.configure(handlers::gfbio::init_gfbio_routes::); + } if version_api { app = app.route("/version", web::get().to(show_version_handler)); } From 95a93f68ca914ca508feea1e742236860bdb0ea2 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Tue, 28 Jun 2022 18:07:08 +0200 Subject: [PATCH 09/21] fix test --- services/src/datasets/external/nature40.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/src/datasets/external/nature40.rs b/services/src/datasets/external/nature40.rs index f6f928142..ce277bd3d 100644 --- a/services/src/datasets/external/nature40.rs +++ b/services/src/datasets/external/nature40.rs @@ -809,7 +809,7 @@ mod tests { id: ProviderLayerId { provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") .unwrap(), - item: LayerId("geonode_ortho_muf_1m:4".to_owned()) + item: LayerId("lidar_2018_wetness_1m:1".to_owned()) }, name: "Topografic Wetness index".to_owned(), description: "Band 1: wetness".to_owned(), From b857b38c2c55b6be8b1fac5929488ea6442f75e2 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 29 Jun 2022 10:59:06 +0200 Subject: [PATCH 10/21] layer listing for netcdfcf --- .../src/datasets/external/netcdfcf/mod.rs | 153 +++++++++++------- 1 file changed, 95 insertions(+), 58 deletions(-) diff --git a/services/src/datasets/external/netcdfcf/mod.rs b/services/src/datasets/external/netcdfcf/mod.rs index e1a2df18e..9a39ddf12 100644 --- a/services/src/datasets/external/netcdfcf/mod.rs +++ b/services/src/datasets/external/netcdfcf/mod.rs @@ -1,14 +1,16 @@ use crate::datasets::listing::ProvenanceOutput; -use crate::error::Error; use crate::layers::external::ExternalLayerProvider; use crate::layers::external::ExternalLayerProviderDefinition; use crate::layers::layer::CollectionItem; use crate::layers::layer::Layer; use crate::layers::layer::LayerCollectionListOptions; +use crate::layers::layer::LayerListing; +use crate::layers::layer::ProviderLayerId; use crate::layers::listing::LayerCollectionId; use crate::layers::listing::LayerCollectionProvider; use crate::layers::listing::LayerId; use crate::projects::{RasterSymbology, Symbology}; +use crate::workflows::workflow::Workflow; use crate::{datasets::listing::DatasetListing, util::user_input::Validated}; use async_trait::async_trait; use gdal::{DatasetOptions, GdalOpenFlags}; @@ -21,7 +23,11 @@ use geoengine_datatypes::primitives::{ }; use geoengine_datatypes::raster::{GdalGeoTransform, RasterDataType}; use geoengine_datatypes::spatial_reference::SpatialReference; +use geoengine_operators::engine::RasterOperator; +use geoengine_operators::engine::TypedOperator; use geoengine_operators::engine::TypedResultDescriptor; +use geoengine_operators::source::GdalSource; +use geoengine_operators::source::GdalSourceParameters; use geoengine_operators::source::{ FileNotFoundHandling, GdalDatasetGeoTransform, GdalDatasetParameters, GdalLoadingInfoTemporalSlice, GdalMetaDataList, GdalMetadataNetCdfCf, @@ -824,58 +830,6 @@ enum Metadata { #[async_trait] impl ExternalLayerProvider for NetCdfCfDataProvider { - // async fn list( - // &self, - // options: Validated, - // ) -> crate::error::Result> { - // // TODO: user right management - // // TODO: options - - // let mut dir = tokio::fs::read_dir(&self.path).await?; - - // let mut datasets = vec![]; - // while let Some(entry) = dir.next_entry().await? { - // if !entry.path().is_file() { - // continue; - // } - - // let provider_path = self.path.clone(); - // let overviews_path = self.overviews.clone(); - // let relative_path = if let Ok(p) = entry.path().strip_prefix(&provider_path) { - // p.to_path_buf() - // } else { - // // cannot actually happen since `entry` is listed from `provider_path` - // continue; - // }; - - // let listing = tokio::task::spawn_blocking(move || { - // Self::listing_from_netcdf( - // NETCDF_CF_PROVIDER_ID, - // &provider_path, - // Some(&overviews_path), - // &relative_path, - // false, - // ) - // }) - // .await?; - - // match listing { - // Ok(listing) => datasets.extend(listing), - // Err(e) => debug!("Failed to list dataset: {}", e), - // } - // } - - // // TODO: react to filter and sort options - // // TODO: don't compute everything and filter then - // let datasets = datasets - // .into_iter() - // .skip(options.user_input.offset as usize) - // .take(options.user_input.limit as usize) - // .collect(); - - // Ok(datasets) - // } - async fn provenance(&self, dataset: &DatasetId) -> crate::error::Result { Ok(ProvenanceOutput { dataset: dataset.clone(), @@ -894,17 +848,100 @@ impl LayerCollectionProvider for NetCdfCfDataProvider { async fn collection_items( &self, _collection: &LayerCollectionId, - _options: Validated, + options: Validated, ) -> crate::error::Result> { - Err(Error::NotYetImplemented) + // TODO: check collection id + + let mut dir = tokio::fs::read_dir(&self.path).await?; + + let mut datasets = vec![]; + while let Some(entry) = dir.next_entry().await? { + if !entry.path().is_file() { + continue; + } + + let provider_path = self.path.clone(); + let overviews_path = self.overviews.clone(); + let relative_path = if let Ok(p) = entry.path().strip_prefix(&provider_path) { + p.to_path_buf() + } else { + // cannot actually happen since `entry` is listed from `provider_path` + continue; + }; + + let listing = tokio::task::spawn_blocking(move || { + Self::listing_from_netcdf( + NETCDF_CF_PROVIDER_ID, + &provider_path, + Some(&overviews_path), + &relative_path, + false, + ) + .map(|l| { + l.into_iter() + .map(|l| { + CollectionItem::Layer(LayerListing { + id: crate::layers::layer::ProviderLayerId { + provider: NETCDF_CF_PROVIDER_ID, + item: LayerId( + l.id.external() + .expect("listing produces only external datasets") + .dataset_id, + ), + }, + name: l.name, + description: l.description, + }) + }) + .collect::>() + }) + }) + .await?; + + match listing { + Ok(listing) => datasets.extend(listing), + Err(e) => debug!("Failed to list dataset: {}", e), + } + } + + // TODO: react to filter and sort options + // TODO: don't compute everything and filter then + let datasets = datasets + .into_iter() + .skip(options.user_input.offset as usize) + .take(options.user_input.limit as usize) + .collect(); + + Ok(datasets) } async fn root_collection_id(&self) -> crate::error::Result { - Err(Error::NotYetImplemented) + Ok(LayerCollectionId("root".to_string())) } - async fn get_layer(&self, _id: &LayerId) -> crate::error::Result { - Err(Error::NotYetImplemented) + async fn get_layer(&self, id: &LayerId) -> crate::error::Result { + Ok(Layer { + id: ProviderLayerId { + provider: NETCDF_CF_PROVIDER_ID, + item: id.clone(), + }, + name: "".to_string(), // TODO: get from file or overview + description: "".to_string(), // TODO: get from file or overview + workflow: Workflow { + operator: TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: DatasetId::External(ExternalDatasetId { + provider_id: NETCDF_CF_PROVIDER_ID, + dataset_id: id.0.clone(), + }), + }, + } + .boxed(), + ), + }, + symbology: None, + }) } } From 99987d78246085dd4f7acc8f83835c5f8af7885a Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 29 Jun 2022 11:07:16 +0200 Subject: [PATCH 11/21] check root collection ids --- services/src/datasets/external/gfbio.rs | 13 ++++++++++--- services/src/datasets/external/mock.rs | 11 +++++++++-- services/src/datasets/external/nature40.rs | 10 ++++++++-- services/src/datasets/external/netcdfcf/mod.rs | 10 ++++++++-- services/src/datasets/external/nfdi/mod.rs | 12 +++++++++--- services/src/datasets/in_memory.rs | 10 ++++++++-- services/src/error.rs | 11 ++++++----- .../pro/datasets/external/sentinel_s2_l2a_cogs.rs | 11 ++++++++--- services/src/pro/datasets/in_memory.rs | 9 +++++++-- services/src/pro/datasets/postgres.rs | 9 +++++++-- 10 files changed, 80 insertions(+), 26 deletions(-) diff --git a/services/src/datasets/external/gfbio.rs b/services/src/datasets/external/gfbio.rs index a720e1f4c..5417a24d5 100644 --- a/services/src/datasets/external/gfbio.rs +++ b/services/src/datasets/external/gfbio.rs @@ -2,8 +2,8 @@ use std::collections::HashMap; use std::marker::PhantomData; use crate::datasets::listing::{Provenance, ProvenanceOutput}; -use crate::error::Error; use crate::error::Result; +use crate::error::{self, Error}; use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, @@ -34,6 +34,7 @@ use geoengine_operators::{ source::{GdalLoadingInfo, OgrSourceDataset}, }; use serde::{Deserialize, Serialize}; +use snafu::ensure; pub const GFBIO_PROVIDER_ID: LayerProviderId = LayerProviderId::from_u128(0x907f_9f5b_0304_4a0e_a5ef_28de_62d1_c0f9); @@ -175,10 +176,16 @@ impl GfbioDataProvider { impl LayerCollectionProvider for GfbioDataProvider { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); + let conn = self.pool.get().await?; let options = options.user_input; diff --git a/services/src/datasets/external/mock.rs b/services/src/datasets/external/mock.rs index 5fcc5e2ad..7167fc996 100644 --- a/services/src/datasets/external/mock.rs +++ b/services/src/datasets/external/mock.rs @@ -22,6 +22,7 @@ use geoengine_operators::{ source::{GdalLoadingInfo, OgrSourceDataset}, }; use serde::{Deserialize, Serialize}; +use snafu::ensure; use uuid::Uuid; pub const ROOT_COLLECTION_ID: Uuid = Uuid::from_u128(0xd630_e723_63d4_440c_9e15_644c_400f_c7c1); @@ -88,10 +89,16 @@ impl ExternalLayerProvider for MockExternalDataProvider { impl LayerCollectionProvider for MockExternalDataProvider { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, _options: Validated, ) -> Result> { - // TODO: use collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); + // TODO: use options let mut listing = vec![]; diff --git a/services/src/datasets/external/nature40.rs b/services/src/datasets/external/nature40.rs index c389c1199..a6182efa2 100644 --- a/services/src/datasets/external/nature40.rs +++ b/services/src/datasets/external/nature40.rs @@ -37,6 +37,7 @@ use quick_xml::events::Event; use quick_xml::Reader; use reqwest::Client; use serde::{Deserialize, Serialize}; +use snafu::ensure; use snafu::ResultExt; use url::Url; @@ -156,10 +157,15 @@ impl ExternalLayerProvider for Nature40DataProvider { impl LayerCollectionProvider for Nature40DataProvider { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, _options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); // TODO: query the other dbs as well let raster_dbs = self.load_raster_dbs().await?; diff --git a/services/src/datasets/external/netcdfcf/mod.rs b/services/src/datasets/external/netcdfcf/mod.rs index 9a39ddf12..052506f68 100644 --- a/services/src/datasets/external/netcdfcf/mod.rs +++ b/services/src/datasets/external/netcdfcf/mod.rs @@ -40,6 +40,7 @@ use geoengine_operators::{ }; use log::debug; use serde::{Deserialize, Serialize}; +use snafu::ensure; use snafu::{OptionExt, ResultExt}; use std::collections::VecDeque; use std::io::BufReader; @@ -847,10 +848,15 @@ impl ExternalLayerProvider for NetCdfCfDataProvider { impl LayerCollectionProvider for NetCdfCfDataProvider { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> crate::error::Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + crate::error::UnknownLayerCollectionId { + id: collection.clone() + } + ); let mut dir = tokio::fs::read_dir(&self.path).await?; diff --git a/services/src/datasets/external/nfdi/mod.rs b/services/src/datasets/external/nfdi/mod.rs index ce60b7988..7555c0b36 100644 --- a/services/src/datasets/external/nfdi/mod.rs +++ b/services/src/datasets/external/nfdi/mod.rs @@ -3,7 +3,7 @@ use crate::datasets::listing::{ ProvenanceOutput, }; use crate::datasets::storage::{Dataset}; -use crate::error::{Error, Result}; +use crate::error::{Error, Result, self}; use crate::layers::external::{ExternalLayerProviderDefinition, ExternalLayerProvider}; use crate::layers::layer::{LayerCollectionListOptions, CollectionItem, Layer, LayerListing, ProviderLayerId}; use crate::layers::listing::{LayerCollectionProvider, LayerCollectionId, LayerId}; @@ -35,6 +35,7 @@ use scienceobjectsdb_rust_api::sciobjectsdb::sciobjsdb::api::storage::services:: GetProjectDatasetsRequest, }; use serde::{Deserialize, Serialize}; +use snafu::ensure; use std::collections::HashMap; use std::fmt::Debug; use std::marker::PhantomData; @@ -541,10 +542,15 @@ impl ExternalLayerProvider for NFDIDataProvider { impl LayerCollectionProvider for NFDIDataProvider { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, _options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); let mut project_stub = self.project_stub.clone(); diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index babba909d..9fa25d009 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -23,6 +23,7 @@ use geoengine_operators::source::{ GdalSourceParameters, OgrSource, OgrSourceDataset, OgrSourceParameters, }; use geoengine_operators::{mock::MockDatasetDataSourceLoadingInfo, source::GdalMetaDataStatic}; +use snafu::ensure; use std::collections::HashMap; use std::str::FromStr; @@ -401,10 +402,15 @@ impl UploadDb for HashMapDatasetDb { impl LayerCollectionProvider for HashMapDatasetDb { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); let options = options.user_input; diff --git a/services/src/error.rs b/services/src/error.rs index b2bf5b01b..0377090ff 100644 --- a/services/src/error.rs +++ b/services/src/error.rs @@ -1,8 +1,5 @@ -use crate::{ - datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse, - workflows::workflow::WorkflowId, -}; -// use crate::{datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse}; +use crate::{datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::ErrorResponse}; +use crate::{layers::listing::LayerCollectionId, workflows::workflow::WorkflowId}; use actix_web::http::StatusCode; use actix_web::HttpResponse; use geoengine_datatypes::{ @@ -352,6 +349,10 @@ pub enum Error { TaskError { source: crate::tasks::TaskError, }, + + UnknownLayerCollectionId { + id: LayerCollectionId, + }, } impl actix_web::error::ResponseError for Error { diff --git a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs index 3d12b577b..ad49878a9 100644 --- a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs +++ b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs @@ -34,7 +34,7 @@ use geoengine_operators::source::{ use log::debug; use reqwest::Client; use serde::{Deserialize, Serialize}; -use snafu::ResultExt; +use snafu::{ensure, ResultExt}; use std::collections::HashMap; use std::convert::TryInto; use std::fmt::Debug; @@ -227,10 +227,15 @@ impl ExternalLayerProvider for SentinelS2L2aCogsDataProvider { impl LayerCollectionProvider for SentinelS2L2aCogsDataProvider { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, _options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); // TODO: options let mut x = self diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 9cb08c07f..7e6a6f4e2 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -542,10 +542,15 @@ impl UploadDb for ProHashMapDatasetDb { impl LayerCollectionProvider for ProHashMapDatasetDb { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); let options = options.user_input; diff --git a/services/src/pro/datasets/postgres.rs b/services/src/pro/datasets/postgres.rs index 14bb85df0..a5e151d79 100644 --- a/services/src/pro/datasets/postgres.rs +++ b/services/src/pro/datasets/postgres.rs @@ -681,10 +681,15 @@ where { async fn collection_items( &self, - _collection: &LayerCollectionId, + collection: &LayerCollectionId, options: Validated, ) -> Result> { - // TODO: check collection id + ensure!( + *collection == self.root_collection_id().await?, + error::UnknownLayerCollectionId { + id: collection.clone() + } + ); let conn = self.conn_pool.get().await?; From c2d3b516be941ecd3e67acc00c91ba08250ce81a Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 29 Jun 2022 11:21:22 +0200 Subject: [PATCH 12/21] fix external dataset handling for dataset db --- services/src/datasets/in_memory.rs | 28 +++++++---------- services/src/pro/datasets/in_memory.rs | 42 +++++++++++--------------- 2 files changed, 29 insertions(+), 41 deletions(-) diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index 9fa25d009..0205581da 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -259,23 +259,17 @@ impl DatasetProvider for HashMapDatasetDb { _session: &SimpleSession, dataset: &DatasetId, ) -> Result { - match dataset { - DatasetId::Internal { dataset_id: _ } => self - .backend - .read() - .await - .datasets - .iter() - .find(|d| d.id == *dataset) - .map(|d| ProvenanceOutput { - dataset: d.id.clone(), - provenance: d.provenance.clone(), - }) - .ok_or(error::Error::UnknownDatasetId), - DatasetId::External(_id) => { - todo!() // TODO: throw error - } - } + self.backend + .read() + .await + .datasets + .iter() + .find(|d| d.id == *dataset) + .map(|d| ProvenanceOutput { + dataset: d.id.clone(), + provenance: d.provenance.clone(), + }) + .ok_or(error::Error::UnknownDatasetId) } } diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 7e6a6f4e2..5e7d38ad6 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -323,31 +323,25 @@ impl DatasetProvider for ProHashMapDatasetDb { dataset: &DatasetId, ) -> Result { let backend = self.backend.read().await; - match dataset { - DatasetId::Internal { dataset_id: _ } => { - ensure!( - backend - .dataset_permissions - .iter() - .any(|p| session.roles.contains(&p.role)), - error::DatasetPermissionDenied { - dataset: dataset.clone(), - } - ); - - backend - .datasets - .get(dataset) - .map(|d| ProvenanceOutput { - dataset: d.id.clone(), - provenance: d.provenance.clone(), - }) - .ok_or(error::Error::UnknownDatasetId) - } - DatasetId::External(_id) => { - todo!() // throw error + + ensure!( + backend + .dataset_permissions + .iter() + .any(|p| session.roles.contains(&p.role)), + error::DatasetPermissionDenied { + dataset: dataset.clone(), } - } + ); + + backend + .datasets + .get(dataset) + .map(|d| ProvenanceOutput { + dataset: d.id.clone(), + provenance: d.provenance.clone(), + }) + .ok_or(error::Error::UnknownDatasetId) } } From 3a902a70ea60922d2150ecde33ad076960b198c6 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 29 Jun 2022 11:48:54 +0200 Subject: [PATCH 13/21] cleanup --- datatypes/src/dataset.rs | 3 ++ output.gpkg | 0 .../src/datasets/external/netcdfcf/mod.rs | 1 - services/src/datasets/external/nfdi/mod.rs | 1 - services/src/datasets/storage.rs | 1 - services/src/handlers/datasets.rs | 36 +------------------ services/src/handlers/layers.rs | 2 -- services/src/layers/external.rs | 2 +- services/src/layers/layer.rs | 1 - services/src/pro/datasets/in_memory.rs | 2 +- 10 files changed, 6 insertions(+), 43 deletions(-) delete mode 100644 output.gpkg diff --git a/datatypes/src/dataset.rs b/datatypes/src/dataset.rs index 9b2caf3c7..42d58ee3b 100644 --- a/datatypes/src/dataset.rs +++ b/datatypes/src/dataset.rs @@ -9,6 +9,9 @@ identifier!(StagingDatasetId); #[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "camelCase", tag = "type")] +// TODO: distinguish between Datasets (stuff MANAGED by Geo Engine) and Data (stuff LOADABLE by Geo Engine) +// DatasetId should be used to refer to local dataset and DataId (or similar) should differentiate +// between local datasets and external providers pub enum DatasetId { #[serde(rename_all = "camelCase")] Internal { diff --git a/output.gpkg b/output.gpkg deleted file mode 100644 index e69de29bb..000000000 diff --git a/services/src/datasets/external/netcdfcf/mod.rs b/services/src/datasets/external/netcdfcf/mod.rs index 052506f68..b0fb3dc5d 100644 --- a/services/src/datasets/external/netcdfcf/mod.rs +++ b/services/src/datasets/external/netcdfcf/mod.rs @@ -844,7 +844,6 @@ impl ExternalLayerProvider for NetCdfCfDataProvider { } #[async_trait] -// TODO: replace the custom dataset API with this one impl LayerCollectionProvider for NetCdfCfDataProvider { async fn collection_items( &self, diff --git a/services/src/datasets/external/nfdi/mod.rs b/services/src/datasets/external/nfdi/mod.rs index 7555c0b36..2cfb127e9 100644 --- a/services/src/datasets/external/nfdi/mod.rs +++ b/services/src/datasets/external/nfdi/mod.rs @@ -584,7 +584,6 @@ impl LayerCollectionProvider for NFDIDataProvider { async fn get_layer(&self, id: &LayerId) -> Result { let mut project_stub = self.project_stub.clone(); - // TODO: avoid loading ALL project datasets let resp = project_stub .get_project_datasets(GetProjectDatasetsRequest { id: self.project_id.clone(), diff --git a/services/src/datasets/storage.rs b/services/src/datasets/storage.rs index dfb602c25..0f7e598bb 100644 --- a/services/src/datasets/storage.rs +++ b/services/src/datasets/storage.rs @@ -25,7 +25,6 @@ use uuid::Uuid; use super::listing::Provenance; -// TODO: where to put these constants? pub const DATASET_DB_LAYER_PROVIDER_ID: LayerProviderId = LayerProviderId::from_u128(0xac50_ed0d_c9a0_41f8_9ce8_35fc_9e38_299b); diff --git a/services/src/handlers/datasets.rs b/services/src/handlers/datasets.rs index 91fef155d..ad5f2d772 100644 --- a/services/src/handlers/datasets.rs +++ b/services/src/handlers/datasets.rs @@ -56,43 +56,9 @@ where web::resource("/suggest").route(web::get().to(suggest_meta_data_handler::)), ), ) - // .service(web::resource("/providers").route(web::get().to(list_providers_handler::))) - .service(web::resource("/datasets").route(web::get().to(list_datasets_handler::))) - // .service( - // web::resource("/datasets/external/{provider}") - // .route(web::get().to(list_external_datasets_handler::)), - // ) - ; + .service(web::resource("/datasets").route(web::get().to(list_datasets_handler::))); } -// async fn list_providers_handler( -// session: C::Session, -// ctx: web::Data, -// options: web::Query, -// ) -> Result { -// let list = ctx -// .dataset_db_ref() -// .list_dataset_providers(&session, options.into_inner().validated()?) -// .await?; -// Ok(web::Json(list)) -// } - -// async fn list_external_datasets_handler( -// provider: web::Path, -// session: C::Session, -// ctx: web::Data, -// options: web::Query, -// ) -> Result { -// let options = options.into_inner().validated()?; -// let list = ctx -// .dataset_db_ref() -// .dataset_provider(&session, provider.into_inner()) -// .await? -// .list(options) // TODO: authorization -// .await?; -// Ok(web::Json(list)) -// } - /// Lists available [Datasets](crate::datasets::listing::DatasetListing). /// /// # Example diff --git a/services/src/handlers/layers.rs b/services/src/handlers/layers.rs index 7bd85729b..cfd4594fe 100644 --- a/services/src/handlers/layers.rs +++ b/services/src/handlers/layers.rs @@ -31,8 +31,6 @@ async fn list_root_collections_handler( ) -> Result { let mut providers = vec![]; - // TODO: add dataset db as provider - if options.offset == 0 && options.limit > 0 { providers.push(CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { diff --git a/services/src/layers/external.rs b/services/src/layers/external.rs index 4913fef5d..2739defa5 100644 --- a/services/src/layers/external.rs +++ b/services/src/layers/external.rs @@ -49,7 +49,7 @@ impl Clone for Box { } } -/// A provider of datasets that are not hosted by Geo Engine itself but some external party +/// A provider of layers that are not hosted by Geo Engine itself but some external party // TODO: Authorization: the provider needs to accept credentials for the external data source. // The credentials should be generic s.t. they are independent of the Session type and // extensible to new provider types. E.g. a key-value map of strings where the provider diff --git a/services/src/layers/layer.rs b/services/src/layers/layer.rs index 1d5363653..58e42d3ad 100644 --- a/services/src/layers/layer.rs +++ b/services/src/layers/layer.rs @@ -22,7 +22,6 @@ pub struct ProviderLayerCollectionId { #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct Layer { - // TODO: add provider, also need a separate struct for import and API output pub id: ProviderLayerId, pub name: String, pub description: String, diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 5e7d38ad6..616716607 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -559,7 +559,7 @@ impl LayerCollectionProvider for ProHashMapDatasetDb { CollectionItem::Layer(LayerListing { id: ProviderLayerId { provider: DATASET_DB_LAYER_PROVIDER_ID, - // use the dataset id also as layer id, TODO: maybe prefix it? + // use the dataset id also as layer id item: LayerId( d.id.internal() .expect("Dataset DB contains only internal datasets") From 158facb5874b18d868b667ce7e4061a33f518ea3 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 6 Jul 2022 16:05:29 +0200 Subject: [PATCH 14/21] add trait for OperatorNames and convenience method creating source operator --- operators/src/engine/mod.rs | 2 +- operators/src/engine/operator.rs | 4 ++ .../src/mock/mock_dataset_data_source.rs | 8 ++- operators/src/source/gdal_source/mod.rs | 6 ++- operators/src/source/ogr_source/mod.rs | 6 ++- services/src/datasets/in_memory.rs | 44 +++------------ services/src/pro/datasets/in_memory.rs | 44 +++------------ services/src/pro/datasets/postgres.rs | 53 ++++--------------- services/src/util/mod.rs | 1 + services/src/util/operators.rs | 46 ++++++++++++++++ 10 files changed, 91 insertions(+), 123 deletions(-) create mode 100644 services/src/util/operators.rs diff --git a/operators/src/engine/mod.rs b/operators/src/engine/mod.rs index 53d608ea4..b8628dc74 100644 --- a/operators/src/engine/mod.rs +++ b/operators/src/engine/mod.rs @@ -7,7 +7,7 @@ pub use execution_context::{ }; pub use operator::{ InitializedPlotOperator, InitializedRasterOperator, InitializedVectorOperator, - OperatorDatasets, PlotOperator, RasterOperator, TypedOperator, VectorOperator, + OperatorDatasets, OperatorName, PlotOperator, RasterOperator, TypedOperator, VectorOperator, }; pub use operator_impl::{ MultipleRasterOrSingleVectorSource, MultipleRasterSources, MultipleVectorSources, Operator, diff --git a/operators/src/engine/operator.rs b/operators/src/engine/operator.rs index 144e9f937..f87ab7fcd 100644 --- a/operators/src/engine/operator.rs +++ b/operators/src/engine/operator.rs @@ -273,3 +273,7 @@ impl OperatorDatasets for TypedOperator { } } } + +pub trait OperatorName { + const TYPE_NAME: &'static str; +} diff --git a/operators/src/mock/mock_dataset_data_source.rs b/operators/src/mock/mock_dataset_data_source.rs index 90ecb1d95..3fb41cbc5 100644 --- a/operators/src/mock/mock_dataset_data_source.rs +++ b/operators/src/mock/mock_dataset_data_source.rs @@ -1,6 +1,6 @@ use crate::engine::{ - ExecutionContext, InitializedVectorOperator, MetaData, OperatorDatasets, QueryContext, - ResultDescriptor, SourceOperator, TypedVectorQueryProcessor, VectorOperator, + ExecutionContext, InitializedVectorOperator, MetaData, OperatorDatasets, OperatorName, + QueryContext, ResultDescriptor, SourceOperator, TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, VectorResultDescriptor, }; use crate::util::Result; @@ -111,6 +111,10 @@ pub struct MockDatasetDataSourceParams { pub type MockDatasetDataSource = SourceOperator; +impl OperatorName for MockDatasetDataSource { + const TYPE_NAME: &'static str = "MockDatasetDataSource"; +} + #[typetag::serde] #[async_trait] impl VectorOperator for MockDatasetDataSource { diff --git a/operators/src/source/gdal_source/mod.rs b/operators/src/source/gdal_source/mod.rs index 4dd26c7d3..a43623593 100755 --- a/operators/src/source/gdal_source/mod.rs +++ b/operators/src/source/gdal_source/mod.rs @@ -1,5 +1,5 @@ use crate::adapters::SparseTilesFillAdapter; -use crate::engine::{MetaData, OperatorDatasets, QueryProcessor}; +use crate::engine::{MetaData, OperatorDatasets, OperatorName, QueryProcessor}; use crate::util::gdal::gdal_open_dataset_ex; use crate::util::input::float_option_with_nan; use crate::{ @@ -557,6 +557,10 @@ where pub type GdalSource = SourceOperator; +impl OperatorName for GdalSource { + const TYPE_NAME: &'static str = "GdalSource"; +} + #[typetag::serde] #[async_trait] impl RasterOperator for GdalSource { diff --git a/operators/src/source/ogr_source/mod.rs b/operators/src/source/ogr_source/mod.rs index 5e63e7820..1e0d94bd7 100644 --- a/operators/src/source/ogr_source/mod.rs +++ b/operators/src/source/ogr_source/mod.rs @@ -36,7 +36,7 @@ use geoengine_datatypes::primitives::{ }; use geoengine_datatypes::util::arrow::ArrowTyped; -use crate::engine::{OperatorDatasets, QueryProcessor}; +use crate::engine::{OperatorDatasets, OperatorName, QueryProcessor}; use crate::error::Error; use crate::util::input::StringOrNumberRange; use crate::util::Result; @@ -78,6 +78,10 @@ impl OperatorDatasets for OgrSourceParameters { pub type OgrSource = SourceOperator; +impl OperatorName for OgrSource { + const TYPE_NAME: &'static str = "OgrSource"; +} + /// - `file_name`: path to the input file /// - `layer_name`: name of the layer to load /// - `time`: the type of the time attribute(s) diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index 0205581da..927c9d36e 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -7,6 +7,7 @@ use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::util::operators::source_operator_from_dataset; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use async_trait::async_trait; @@ -14,13 +15,12 @@ use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_datatypes::util::Identifier; use geoengine_operators::engine::{ - MetaData, RasterOperator, RasterResultDescriptor, StaticMetaData, TypedOperator, - TypedResultDescriptor, VectorOperator, VectorResultDescriptor, + MetaData, RasterResultDescriptor, StaticMetaData, + TypedResultDescriptor, VectorResultDescriptor, }; -use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; + use geoengine_operators::source::{ - GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, GdalSource, - GdalSourceParameters, OgrSource, OgrSourceDataset, OgrSourceParameters, + GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, OgrSourceDataset, }; use geoengine_operators::{mock::MockDatasetDataSourceLoadingInfo, source::GdalMetaDataStatic}; use snafu::ensure; @@ -452,39 +452,7 @@ impl LayerCollectionProvider for HashMapDatasetDb { .find(|d| d.id == dataset_id) .ok_or(error::Error::UnknownDatasetId)?; - let operator = match dataset.source_operator.as_str() { - "OgrSource" => TypedOperator::Vector( - OgrSource { - params: OgrSourceParameters { - dataset: dataset.id.clone(), - attribute_projection: None, - attribute_filters: None, - }, - } - .boxed(), - ), - "GdalSource" => TypedOperator::Raster( - GdalSource { - params: GdalSourceParameters { - dataset: dataset.id.clone(), - }, - } - .boxed(), - ), - "MockDatasetDataSource" => TypedOperator::Vector( - MockDatasetDataSource { - params: MockDatasetDataSourceParams { - dataset: dataset.id.clone(), - }, - } - .boxed(), - ), - s => { - return Err(crate::error::Error::UnknownOperator { - operator: s.to_owned(), - }) - } - }; + let operator = source_operator_from_dataset(&dataset.source_operator, &dataset.id)?; Ok(Layer { id: ProviderLayerId { diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 616716607..16f723fca 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -16,6 +16,7 @@ use crate::layers::layer::{ use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; use crate::pro::datasets::Permission; use crate::pro::users::{UserId, UserSession}; +use crate::util::operators::source_operator_from_dataset; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use async_trait::async_trait; @@ -25,13 +26,12 @@ use geoengine_datatypes::{ util::Identifier, }; use geoengine_operators::engine::{ - MetaData, RasterOperator, RasterResultDescriptor, StaticMetaData, TypedOperator, - TypedResultDescriptor, VectorOperator, VectorResultDescriptor, + MetaData, RasterResultDescriptor, StaticMetaData, + TypedResultDescriptor, VectorResultDescriptor, }; -use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; + use geoengine_operators::source::{ - GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, GdalSource, - GdalSourceParameters, OgrSource, OgrSourceDataset, OgrSourceParameters, + GdalLoadingInfo, GdalMetaDataList, GdalMetaDataRegular, GdalMetadataNetCdfCf, OgrSourceDataset, }; use geoengine_operators::{mock::MockDatasetDataSourceLoadingInfo, source::GdalMetaDataStatic}; use log::{info, warn}; @@ -592,39 +592,7 @@ impl LayerCollectionProvider for ProHashMapDatasetDb { .find(|(_id, d)| d.id == dataset_id) .ok_or(error::Error::UnknownDatasetId)?; - let operator = match dataset.source_operator.as_str() { - "OgrSource" => TypedOperator::Vector( - OgrSource { - params: OgrSourceParameters { - dataset: dataset.id.clone(), - attribute_projection: None, - attribute_filters: None, - }, - } - .boxed(), - ), - "GdalSource" => TypedOperator::Raster( - GdalSource { - params: GdalSourceParameters { - dataset: dataset.id.clone(), - }, - } - .boxed(), - ), - "MockDatasetDataSource" => TypedOperator::Vector( - MockDatasetDataSource { - params: MockDatasetDataSourceParams { - dataset: dataset.id.clone(), - }, - } - .boxed(), - ), - s => { - return Err(crate::error::Error::UnknownOperator { - operator: s.to_owned(), - }) - } - }; + let operator = source_operator_from_dataset(&dataset.source_operator, &dataset.id)?; Ok(Layer { id: ProviderLayerId { diff --git a/services/src/pro/datasets/postgres.rs b/services/src/pro/datasets/postgres.rs index a5e151d79..d8fd94608 100644 --- a/services/src/pro/datasets/postgres.rs +++ b/services/src/pro/datasets/postgres.rs @@ -21,6 +21,7 @@ use crate::layers::listing::LayerId; use crate::pro::datasets::storage::UpdateDatasetPermissions; use crate::pro::datasets::RoleId; use crate::projects::Symbology; +use crate::util::operators::source_operator_from_dataset; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use crate::{ @@ -36,19 +37,19 @@ use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; use geoengine_datatypes::primitives::RasterQueryRectangle; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_datatypes::util::Identifier; -use geoengine_operators::engine::RasterOperator; -use geoengine_operators::engine::TypedOperator; -use geoengine_operators::engine::VectorOperator; + + + use geoengine_operators::engine::{ MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, }; -use geoengine_operators::mock::MockDatasetDataSource; + use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; -use geoengine_operators::mock::MockDatasetDataSourceParams; -use geoengine_operators::source::GdalSource; -use geoengine_operators::source::GdalSourceParameters; -use geoengine_operators::source::OgrSource; -use geoengine_operators::source::OgrSourceParameters; + + + + + use geoengine_operators::source::{GdalLoadingInfo, OgrSourceDataset}; use log::info; use postgres_types::{FromSql, ToSql}; @@ -778,39 +779,7 @@ where let source_operator: String = row.get(2); let symbology: Option = serde_json::from_value(row.get(3))?; - let operator = match source_operator.as_str() { - "OgrSource" => TypedOperator::Vector( - OgrSource { - params: OgrSourceParameters { - dataset: dataset_id.clone(), - attribute_projection: None, - attribute_filters: None, - }, - } - .boxed(), - ), - "GdalSource" => TypedOperator::Raster( - GdalSource { - params: GdalSourceParameters { - dataset: dataset_id.clone(), - }, - } - .boxed(), - ), - "MockDatasetDataSource" => TypedOperator::Vector( - MockDatasetDataSource { - params: MockDatasetDataSourceParams { - dataset: dataset_id.clone(), - }, - } - .boxed(), - ), - s => { - return Err(crate::error::Error::UnknownOperator { - operator: s.to_owned(), - }) - } - }; + let operator = source_operator_from_dataset(&source_operator, &dataset_id)?; Ok(Layer { id: ProviderLayerId { diff --git a/services/src/util/mod.rs b/services/src/util/mod.rs index 93cbcfdf7..e1630afaa 100644 --- a/services/src/util/mod.rs +++ b/services/src/util/mod.rs @@ -7,6 +7,7 @@ pub use geoengine_datatypes::util::Identifier; pub use geoengine_operators::util::{spawn, spawn_blocking, spawn_blocking_with_thread_pool}; pub mod config; +pub mod operators; pub mod parsing; pub mod retry; pub mod tests; diff --git a/services/src/util/operators.rs b/services/src/util/operators.rs new file mode 100644 index 000000000..af9643f00 --- /dev/null +++ b/services/src/util/operators.rs @@ -0,0 +1,46 @@ +use crate::error::Result; +use geoengine_datatypes::dataset::DatasetId; +use geoengine_operators::{ + engine::{OperatorName, RasterOperator, TypedOperator, VectorOperator}, + mock::{MockDatasetDataSource, MockDatasetDataSourceParams}, + source::{GdalSource, GdalSourceParameters, OgrSource, OgrSourceParameters}, +}; + +pub fn source_operator_from_dataset( + source_operator_name: &str, + dataset: &DatasetId, +) -> Result { + Ok(match source_operator_name { + OgrSource::TYPE_NAME => TypedOperator::Vector( + OgrSource { + params: OgrSourceParameters { + dataset: dataset.clone(), + attribute_projection: None, + attribute_filters: None, + }, + } + .boxed(), + ), + GdalSource::TYPE_NAME => TypedOperator::Raster( + GdalSource { + params: GdalSourceParameters { + dataset: dataset.clone(), + }, + } + .boxed(), + ), + MockDatasetDataSource::TYPE_NAME => TypedOperator::Vector( + MockDatasetDataSource { + params: MockDatasetDataSourceParams { + dataset: dataset.clone(), + }, + } + .boxed(), + ), + s => { + return Err(crate::error::Error::UnknownOperator { + operator: s.to_owned(), + }) + } + }) +} From ddfafc8adfbc119ea5a6f0c02257dae460ce2468 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 6 Jul 2022 18:23:39 +0200 Subject: [PATCH 15/21] add an "unsorted collection" for leftover items --- services/src/datasets/in_memory.rs | 3 +- services/src/layers/add_from_directory.rs | 42 +++++++++++++++---- services/src/layers/storage.rs | 14 ++++++- services/src/pro/datasets/in_memory.rs | 3 +- services/src/pro/datasets/postgres.rs | 6 --- .../no_parent_collection.json | 7 ++++ .../root_collection.json | 10 +++++ 7 files changed, 66 insertions(+), 19 deletions(-) create mode 100644 test_data/layer_collection_defs/no_parent_collection.json create mode 100644 test_data/layer_collection_defs/root_collection.json diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index 927c9d36e..97d668160 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -15,8 +15,7 @@ use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_datatypes::util::Identifier; use geoengine_operators::engine::{ - MetaData, RasterResultDescriptor, StaticMetaData, - TypedResultDescriptor, VectorResultDescriptor, + MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, }; use geoengine_operators::source::{ diff --git a/services/src/layers/add_from_directory.rs b/services/src/layers/add_from_directory.rs index 44c8ae361..c33041436 100644 --- a/services/src/layers/add_from_directory.rs +++ b/services/src/layers/add_from_directory.rs @@ -13,6 +13,9 @@ use crate::{error::Result, layers::listing::LayerCollectionId}; use crate::{layers::storage::LayerDb, util::user_input::UserInput}; use log::{info, warn}; +use uuid::Uuid; + +pub const UNSORTED_COLLECTION_ID: Uuid = Uuid::from_u128(0xffb2_dd9e_f5ad_427c_b7f1_c9a0_c7a0_ae3f); pub async fn add_layers_from_directory(layer_db: &mut L, file_path: PathBuf) { async fn add_layer_from_dir_entry( @@ -22,7 +25,6 @@ pub async fn add_layers_from_directory(layer_db: &mut L, file_path: let def: LayerDefinition = serde_json::from_reader(BufReader::new(File::open(entry.path())?))?; - // TODO: only add layer to root collection that are not contained in any other collection layer_db .add_layer_with_id( &def.id, @@ -33,7 +35,7 @@ pub async fn add_layers_from_directory(layer_db: &mut L, file_path: symbology: def.symbology, } .validated()?, - &layer_db.root_collection_id().await?, + &LayerCollectionId(UNSORTED_COLLECTION_ID.to_string()), ) .await?; @@ -83,9 +85,12 @@ pub async fn add_layer_collections_from_directory(db: &mut L, file_p } .validated()?; - // TODO: add only collections that aren't contained in any other collection to the root collection? - db.add_collection_with_id(&def.id, collection, &db.root_collection_id().await?) - .await?; + db.add_collection_with_id( + &def.id, + collection, + &LayerCollectionId(UNSORTED_COLLECTION_ID.to_string()), + ) + .await?; for layer in &def.layers { db.add_layer_to_collection(layer, &def.id).await?; @@ -126,13 +131,36 @@ pub async fn add_layer_collections_from_directory(db: &mut L, file_p } } + let root_id = db + .root_collection_id() + .await + .expect("root id must be resolved"); let mut collection_children: HashMap> = HashMap::new(); + let unsorted = AddLayerCollection { + name: "Unsorted".to_string(), + description: "Unsorted Layers".to_string(), + } + .validated() + .expect("unsorted collection is valid"); + + db.add_collection_with_id( + &LayerCollectionId(UNSORTED_COLLECTION_ID.to_string()), + unsorted, + &root_id, + ) + .await + .expect("unsorted collection should always be added"); + for def in collection_defs { - let collection = add_collection_to_db(db, &def).await; + let ok = if def.id == root_id { + Ok(()) + } else { + add_collection_to_db(db, &def).await + }; - match collection { + match ok { Ok(_) => { collection_children.insert(def.id, def.collections); } diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index 1ad3a5a4e..c649b2035 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -1,3 +1,4 @@ +use std::cmp::Ordering; use std::collections::HashMap; use std::sync::Arc; @@ -324,11 +325,20 @@ impl LayerCollectionProvider for HashMapLayerDb { }) }); - Ok(collections + let mut listing = collections .chain(layers) .skip(options.offset as usize) .take(options.limit as usize) - .collect()) + .collect::>(); + + listing.sort_by(|a, b| match (a, b) { + (CollectionItem::Collection(a), CollectionItem::Collection(b)) => a.name.cmp(&b.name), + (CollectionItem::Layer(a), CollectionItem::Layer(b)) => a.name.cmp(&b.name), + (CollectionItem::Collection(_), CollectionItem::Layer(_)) => Ordering::Less, + (CollectionItem::Layer(_), CollectionItem::Collection(_)) => Ordering::Greater, + }); + + Ok(listing) } async fn root_collection_id(&self) -> Result { diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 16f723fca..541929b79 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -26,8 +26,7 @@ use geoengine_datatypes::{ util::Identifier, }; use geoengine_operators::engine::{ - MetaData, RasterResultDescriptor, StaticMetaData, - TypedResultDescriptor, VectorResultDescriptor, + MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, }; use geoengine_operators::source::{ diff --git a/services/src/pro/datasets/postgres.rs b/services/src/pro/datasets/postgres.rs index d8fd94608..5efff865c 100644 --- a/services/src/pro/datasets/postgres.rs +++ b/services/src/pro/datasets/postgres.rs @@ -38,18 +38,12 @@ use geoengine_datatypes::primitives::RasterQueryRectangle; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_datatypes::util::Identifier; - - use geoengine_operators::engine::{ MetaData, RasterResultDescriptor, StaticMetaData, TypedResultDescriptor, VectorResultDescriptor, }; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; - - - - use geoengine_operators::source::{GdalLoadingInfo, OgrSourceDataset}; use log::info; use postgres_types::{FromSql, ToSql}; diff --git a/test_data/layer_collection_defs/no_parent_collection.json b/test_data/layer_collection_defs/no_parent_collection.json new file mode 100644 index 000000000..4a3c07450 --- /dev/null +++ b/test_data/layer_collection_defs/no_parent_collection.json @@ -0,0 +1,7 @@ +{ + "id": "12555d50-194e-450f-acb0-a2f571987e32", + "name": "An collection without a parent", + "description": "There is nothing here", + "collections": [], + "layers": [] +} diff --git a/test_data/layer_collection_defs/root_collection.json b/test_data/layer_collection_defs/root_collection.json new file mode 100644 index 000000000..3866b08c3 --- /dev/null +++ b/test_data/layer_collection_defs/root_collection.json @@ -0,0 +1,10 @@ +{ + "id": "05102bb3-a855-4a37-8a8a-30026a91fef1", + "name": "LayerDB", + "description": "Root collection for LayerDB", + "collections": [ + "272bf675-2e27-4412-824c-287c1e6841ac", + "a29f77cc-51ce-466b-86ef-d0ab2170bc0a" + ], + "layers": [] +} From c3a4f3f179b76ae5f4c54d96f3b6055c3498c961 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Tue, 12 Jul 2022 16:41:08 +0200 Subject: [PATCH 16/21] rename ids and providers --- datatypes/src/dataset.rs | 51 +- operators/benches/expression.rs | 2 +- operators/benches/workflows.rs | 27 +- operators/src/engine/execution_context.rs | 23 +- operators/src/engine/mod.rs | 4 +- operators/src/engine/operator.rs | 29 +- operators/src/engine/operator_impl.rs | 64 +-- operators/src/error.rs | 12 +- .../src/mock/mock_dataset_data_source.rs | 22 +- .../mock/mock_feature_collection_source.rs | 11 +- operators/src/mock/mock_point_source.rs | 8 +- operators/src/mock/mock_raster_source.rs | 8 +- operators/src/plot/class_histogram.rs | 8 +- operators/src/plot/histogram.rs | 8 +- .../src/plot/temporal_raster_mean_plot.rs | 6 +- operators/src/processing/expression/mod.rs | 10 +- operators/src/processing/meteosat/mod.rs | 8 +- operators/src/processing/point_in_polygon.rs | 12 +- .../src/processing/raster_vector_join/mod.rs | 6 +- .../raster_vector_join/non_aggregated.rs | 8 +- operators/src/processing/reprojection.rs | 18 +- operators/src/processing/time_shift.rs | 14 +- operators/src/processing/vector_join/mod.rs | 14 +- operators/src/source/csv.rs | 8 +- operators/src/source/gdal_source/mod.rs | 28 +- operators/src/source/ogr_source/mod.rs | 130 ++--- operators/src/util/gdal.rs | 6 +- .../src/util/input/multi_raster_or_vector.rs | 22 +- operators/src/util/input/raster_or_vector.rs | 28 +- services/src/contexts/mod.rs | 38 +- services/src/datasets/add_from_directory.rs | 4 +- services/src/datasets/external/gfbio.rs | 78 +-- services/src/datasets/external/mock.rs | 68 ++- services/src/datasets/external/nature40.rs | 101 ++-- .../src/datasets/external/netcdfcf/error.rs | 8 +- .../src/datasets/external/netcdfcf/mod.rs | 461 +++++++----------- services/src/datasets/external/nfdi/mod.rs | 178 ++++--- services/src/datasets/external/pangaea/mod.rs | 60 +-- services/src/datasets/in_memory.rs | 87 ++-- services/src/datasets/listing.rs | 6 +- services/src/datasets/storage.rs | 8 +- services/src/error.rs | 20 +- services/src/handlers/datasets.rs | 38 +- services/src/handlers/ebv.rs | 12 +- services/src/handlers/gfbio.rs | 28 +- services/src/handlers/layers.rs | 20 +- services/src/handlers/wfs.rs | 17 +- services/src/handlers/workflows.rs | 31 +- services/src/layers/external.rs | 31 +- services/src/layers/layer.rs | 14 +- services/src/layers/listing.rs | 10 +- services/src/layers/storage.rs | 46 +- services/src/pro/contexts/postgres.rs | 69 +-- .../src/pro/datasets/add_from_directory.rs | 4 +- .../datasets/external/sentinel_s2_l2a_cogs.rs | 164 +++---- services/src/pro/datasets/in_memory.rs | 166 +++---- services/src/pro/datasets/postgres.rs | 86 ++-- services/src/pro/handlers/drone_mapping.rs | 12 +- services/src/pro/layers/postgres_layer_db.rs | 27 +- services/src/util/operators.rs | 14 +- services/src/util/tests.rs | 4 +- test_data/dataset_defs/germany_polygon.json | 5 +- test_data/dataset_defs/landcover.json | 5 +- test_data/dataset_defs/mock.json | 5 +- test_data/dataset_defs/ndvi (3587).json | 5 +- test_data/dataset_defs/ndvi.json | 5 +- test_data/dataset_defs/ndvi_list.json | 5 +- .../dataset_defs/ne_10m_ports (3857).json | 5 +- test_data/dataset_defs/ne_10m_ports.json | 5 +- test_data/dataset_defs/points_with_time.json | 5 +- test_data/layer_defs/ports_in_germany.json | 4 +- 71 files changed, 1137 insertions(+), 1417 deletions(-) diff --git a/datatypes/src/dataset.rs b/datatypes/src/dataset.rs index 42d58ee3b..ab8e70e62 100644 --- a/datatypes/src/dataset.rs +++ b/datatypes/src/dataset.rs @@ -1,34 +1,41 @@ use crate::identifier; use serde::{Deserialize, Serialize}; -identifier!(LayerProviderId); +identifier!(DataProviderId); -identifier!(InternalDatasetId); // TODO: rename to DatasetId as there are no external datasets anymore - -identifier!(StagingDatasetId); +// Identifier for datasets managed by Geo Engine +identifier!(DatasetId); #[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "camelCase", tag = "type")] -// TODO: distinguish between Datasets (stuff MANAGED by Geo Engine) and Data (stuff LOADABLE by Geo Engine) -// DatasetId should be used to refer to local dataset and DataId (or similar) should differentiate -// between local datasets and external providers -pub enum DatasetId { +/// The identifier for loadable data. It is used in the source operators to get the loading info (aka parametrization) +/// for accessing the data. Internal data is loaded from datasets, external from `DataProvider`s. +pub enum DataId { #[serde(rename_all = "camelCase")] Internal { - dataset_id: InternalDatasetId, + dataset_id: DatasetId, }, - External(ExternalDatasetId), + External(ExternalDataId), +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] +pub struct LayerId(pub String); + +impl std::fmt::Display for LayerId { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) + } } #[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] -pub struct ExternalDatasetId { - pub provider_id: LayerProviderId, - pub dataset_id: String, +pub struct ExternalDataId { + pub provider_id: DataProviderId, + pub layer_id: LayerId, } -impl DatasetId { - pub fn internal(&self) -> Option { +impl DataId { + pub fn internal(&self) -> Option { if let Self::Internal { dataset_id: dataset, } = self @@ -38,7 +45,7 @@ impl DatasetId { None } - pub fn external(&self) -> Option { + pub fn external(&self) -> Option { if let Self::External(id) = self { return Some(id.clone()); } @@ -46,14 +53,14 @@ impl DatasetId { } } -impl From for DatasetId { - fn from(value: InternalDatasetId) -> Self { - DatasetId::Internal { dataset_id: value } +impl From for DataId { + fn from(value: DatasetId) -> Self { + DataId::Internal { dataset_id: value } } } -impl From for DatasetId { - fn from(value: ExternalDatasetId) -> Self { - DatasetId::External(value) +impl From for DataId { + fn from(value: ExternalDataId) -> Self { + DataId::External(value) } } diff --git a/operators/benches/expression.rs b/operators/benches/expression.rs index a3c4edcec..61a0eb893 100644 --- a/operators/benches/expression.rs +++ b/operators/benches/expression.rs @@ -43,7 +43,7 @@ fn ndvi_source(execution_context: &mut MockExecutionContext) -> Box where R: ResultDescriptor, { - async fn meta_data(&self, dataset: &DatasetId) -> Result>>; + async fn meta_data(&self, id: &DataId) -> Result>>; } #[async_trait] @@ -60,7 +60,7 @@ where pub struct MockExecutionContext { pub thread_pool: Arc, - pub meta_data: HashMap>, + pub meta_data: HashMap>, pub tiling_specification: TilingSpecification, } @@ -94,17 +94,14 @@ impl MockExecutionContext { } } - pub fn add_meta_data( - &mut self, - dataset: DatasetId, - meta_data: Box>, - ) where + pub fn add_meta_data(&mut self, data: DataId, meta_data: Box>) + where L: Send + Sync + 'static, R: Send + Sync + 'static + ResultDescriptor, Q: Send + Sync + 'static, { self.meta_data - .insert(dataset, Box::new(meta_data) as Box); + .insert(data, Box::new(meta_data) as Box); } pub fn mock_query_context(&self, chunk_byte_size: ChunkByteSize) -> MockQueryContext { @@ -132,13 +129,13 @@ where R: 'static + ResultDescriptor, Q: 'static, { - async fn meta_data(&self, dataset: &DatasetId) -> Result>> { + async fn meta_data(&self, id: &DataId) -> Result>> { let meta_data = self .meta_data - .get(dataset) - .ok_or(Error::UnknownDatasetId)? + .get(id) + .ok_or(Error::UnknownDataId)? .downcast_ref::>>() - .ok_or(Error::DatasetLoadingInfoProviderMismatch)?; + .ok_or(Error::InvalidMetaDataType)?; Ok(meta_data.clone()) } diff --git a/operators/src/engine/mod.rs b/operators/src/engine/mod.rs index b8628dc74..0a7f505e3 100644 --- a/operators/src/engine/mod.rs +++ b/operators/src/engine/mod.rs @@ -6,8 +6,8 @@ pub use execution_context::{ ExecutionContext, MetaData, MetaDataProvider, MockExecutionContext, StaticMetaData, }; pub use operator::{ - InitializedPlotOperator, InitializedRasterOperator, InitializedVectorOperator, - OperatorDatasets, OperatorName, PlotOperator, RasterOperator, TypedOperator, VectorOperator, + InitializedPlotOperator, InitializedRasterOperator, InitializedVectorOperator, OperatorData, + OperatorName, PlotOperator, RasterOperator, TypedOperator, VectorOperator, }; pub use operator_impl::{ MultipleRasterOrSingleVectorSource, MultipleRasterSources, MultipleVectorSources, Operator, diff --git a/operators/src/engine/operator.rs b/operators/src/engine/operator.rs index f87ab7fcd..4fc1f0739 100644 --- a/operators/src/engine/operator.rs +++ b/operators/src/engine/operator.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use crate::error; use crate::util::Result; use async_trait::async_trait; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use super::{ query_processor::{TypedRasterQueryProcessor, TypedVectorQueryProcessor}, @@ -11,23 +11,22 @@ use super::{ PlotResultDescriptor, RasterResultDescriptor, TypedPlotQueryProcessor, VectorResultDescriptor, }; -pub trait OperatorDatasets { - /// Get the dataset ids of all the datasets involoved in this operator and its sources - fn datasets(&self) -> Vec { +pub trait OperatorData { + /// Get the ids of all the data involoved in this operator and its sources + fn data_ids(&self) -> Vec { let mut datasets = vec![]; - self.datasets_collect(&mut datasets); + self.data_ids_collect(&mut datasets); datasets } - #[allow(clippy::ptr_arg)] // must allow `push` on `datasets` - fn datasets_collect(&self, datasets: &mut Vec); + fn data_ids_collect(&self, data_ids: &mut Vec); } /// Common methods for `RasterOperator`s #[typetag::serde(tag = "type")] #[async_trait] pub trait RasterOperator: - CloneableRasterOperator + OperatorDatasets + Send + Sync + std::fmt::Debug + CloneableRasterOperator + OperatorData + Send + Sync + std::fmt::Debug { async fn initialize( self: Box, @@ -47,7 +46,7 @@ pub trait RasterOperator: #[typetag::serde(tag = "type")] #[async_trait] pub trait VectorOperator: - CloneableVectorOperator + OperatorDatasets + Send + Sync + std::fmt::Debug + CloneableVectorOperator + OperatorData + Send + Sync + std::fmt::Debug { async fn initialize( self: Box, @@ -67,7 +66,7 @@ pub trait VectorOperator: #[typetag::serde(tag = "type")] #[async_trait] pub trait PlotOperator: - CloneablePlotOperator + OperatorDatasets + Send + Sync + std::fmt::Debug + CloneablePlotOperator + OperatorData + Send + Sync + std::fmt::Debug { async fn initialize( self: Box, @@ -264,12 +263,12 @@ macro_rules! call_on_typed_operator { }; } -impl OperatorDatasets for TypedOperator { - fn datasets_collect(&self, datasets: &mut Vec) { +impl OperatorData for TypedOperator { + fn data_ids_collect(&self, data_ids: &mut Vec) { match self { - TypedOperator::Vector(v) => v.datasets_collect(datasets), - TypedOperator::Raster(r) => r.datasets_collect(datasets), - TypedOperator::Plot(p) => p.datasets_collect(datasets), + TypedOperator::Vector(v) => v.data_ids_collect(data_ids), + TypedOperator::Raster(r) => r.data_ids_collect(data_ids), + TypedOperator::Plot(p) => p.data_ids_collect(data_ids), } } } diff --git a/operators/src/engine/operator_impl.rs b/operators/src/engine/operator_impl.rs index 49c7f7bfc..ef666b197 100644 --- a/operators/src/engine/operator_impl.rs +++ b/operators/src/engine/operator_impl.rs @@ -1,9 +1,9 @@ -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use serde::{Deserialize, Serialize}; use crate::util::input::{MultiRasterOrVectorOperator, RasterOrVectorOperator}; -use super::{OperatorDatasets, RasterOperator, VectorOperator}; +use super::{OperatorData, RasterOperator, VectorOperator}; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] @@ -125,69 +125,69 @@ impl From>> for MultipleRasterOrSingleVectorSource { } } -impl OperatorDatasets for Operator +impl OperatorData for Operator where - Sources: OperatorDatasets, + Sources: OperatorData, { - fn datasets_collect(&self, datasets: &mut Vec) { - self.sources.datasets_collect(datasets); + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.sources.data_ids_collect(data_ids); } } -impl OperatorDatasets for SourceOperator +impl OperatorData for SourceOperator where - Params: OperatorDatasets, + Params: OperatorData, { - fn datasets_collect(&self, datasets: &mut Vec) { - self.params.datasets_collect(datasets); + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.params.data_ids_collect(data_ids); } } -impl OperatorDatasets for SingleRasterOrVectorSource { - fn datasets_collect(&self, datasets: &mut Vec) { - self.source.datasets_collect(datasets); +impl OperatorData for SingleRasterOrVectorSource { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.source.data_ids_collect(data_ids); } } -impl OperatorDatasets for MultipleRasterOrSingleVectorSource { - fn datasets_collect(&self, datasets: &mut Vec) { - self.source.datasets_collect(datasets); +impl OperatorData for MultipleRasterOrSingleVectorSource { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.source.data_ids_collect(data_ids); } } -impl OperatorDatasets for SingleVectorSource { - fn datasets_collect(&self, datasets: &mut Vec) { - self.vector.datasets_collect(datasets); +impl OperatorData for SingleVectorSource { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.vector.data_ids_collect(data_ids); } } -impl OperatorDatasets for SingleRasterSource { - fn datasets_collect(&self, datasets: &mut Vec) { - self.raster.datasets_collect(datasets); +impl OperatorData for SingleRasterSource { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.raster.data_ids_collect(data_ids); } } -impl OperatorDatasets for MultipleRasterSources { - fn datasets_collect(&self, datasets: &mut Vec) { +impl OperatorData for MultipleRasterSources { + fn data_ids_collect(&self, data_ids: &mut Vec) { for source in &self.rasters { - source.datasets_collect(datasets); + source.data_ids_collect(data_ids); } } } -impl OperatorDatasets for MultipleVectorSources { - fn datasets_collect(&self, datasets: &mut Vec) { +impl OperatorData for MultipleVectorSources { + fn data_ids_collect(&self, data_ids: &mut Vec) { for source in &self.vectors { - source.datasets_collect(datasets); + source.data_ids_collect(data_ids); } } } -impl OperatorDatasets for SingleVectorMultipleRasterSources { - fn datasets_collect(&self, datasets: &mut Vec) { - self.vector.datasets_collect(datasets); +impl OperatorData for SingleVectorMultipleRasterSources { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.vector.data_ids_collect(data_ids); for source in &self.rasters { - source.datasets_collect(datasets); + source.data_ids_collect(data_ids); } } } diff --git a/operators/src/error.rs b/operators/src/error.rs index 79ca635b4..633e0849a 100644 --- a/operators/src/error.rs +++ b/operators/src/error.rs @@ -1,5 +1,5 @@ use crate::util::statistics::StatisticsError; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::error::ErrorSource; use geoengine_datatypes::primitives::FeatureDataType; use snafu::prelude::*; @@ -165,15 +165,15 @@ pub enum Error { source: arrow::error::ArrowError, }, - NoDatasetWithGivenId { - id: DatasetId, + NoDataWithGivenId { + id: DataId, }, RasterRootPathNotConfigured, // TODO: remove when GdalSource uses LoadingInfo - InvalidDatasetId, - DatasetLoadingInfoProviderMismatch, - UnknownDatasetId, + InvalidDataId, + InvalidMetaDataType, + UnknownDataId, // TODO: this error should not be propagated to user #[snafu(display("Could not open gdal dataset for file path {:?}", file_path))] diff --git a/operators/src/mock/mock_dataset_data_source.rs b/operators/src/mock/mock_dataset_data_source.rs index 3fb41cbc5..86627ed45 100644 --- a/operators/src/mock/mock_dataset_data_source.rs +++ b/operators/src/mock/mock_dataset_data_source.rs @@ -1,5 +1,5 @@ use crate::engine::{ - ExecutionContext, InitializedVectorOperator, MetaData, OperatorDatasets, OperatorName, + ExecutionContext, InitializedVectorOperator, MetaData, OperatorData, OperatorName, QueryContext, ResultDescriptor, SourceOperator, TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, VectorResultDescriptor, }; @@ -9,7 +9,7 @@ use futures::stream; use futures::stream::BoxStream; use futures::StreamExt; use geoengine_datatypes::collections::{MultiPointCollection, VectorDataType}; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::{Coordinate2D, TimeInterval, VectorQueryRectangle}; use geoengine_datatypes::spatial_reference::SpatialReferenceOption; use serde::{Deserialize, Serialize}; @@ -106,7 +106,7 @@ impl VectorQueryProcessor for MockDatasetDataSourceProcessor { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct MockDatasetDataSourceParams { - pub dataset: DatasetId, + pub data: DataId, } pub type MockDatasetDataSource = SourceOperator; @@ -122,7 +122,7 @@ impl VectorOperator for MockDatasetDataSource { self: Box, context: &dyn ExecutionContext, ) -> Result> { - let loading_info = context.meta_data(&self.params.dataset).await?; + let loading_info = context.meta_data(&self.params.data).await?; Ok(InitializedMockDatasetDataSource { result_descriptor: loading_info.result_descriptor().await?, @@ -132,9 +132,9 @@ impl VectorOperator for MockDatasetDataSource { } } -impl OperatorDatasets for MockDatasetDataSource { - fn datasets_collect(&self, datasets: &mut Vec) { - datasets.push(self.params.dataset.clone()); +impl OperatorData for MockDatasetDataSource { + fn data_ids_collect(&self, data_ids: &mut Vec) { + data_ids.push(self.params.data.clone()); } } @@ -167,7 +167,7 @@ mod tests { use crate::engine::{MockExecutionContext, MockQueryContext}; use futures::executor::block_on_stream; use geoengine_datatypes::collections::FeatureCollectionInfos; - use geoengine_datatypes::dataset::InternalDatasetId; + use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{BoundingBox2D, SpatialResolution}; use geoengine_datatypes::util::test::TestDefault; use geoengine_datatypes::util::Identifier; @@ -176,9 +176,7 @@ mod tests { async fn test() { let mut execution_context = MockExecutionContext::test_default(); - let id = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); execution_context.add_meta_data( id.clone(), Box::new(MockDatasetDataSourceLoadingInfo { @@ -187,7 +185,7 @@ mod tests { ); let mps = MockDatasetDataSource { - params: MockDatasetDataSourceParams { dataset: id }, + params: MockDatasetDataSourceParams { data: id }, } .boxed(); let initialized = mps.initialize(&execution_context).await.unwrap(); diff --git a/operators/src/mock/mock_feature_collection_source.rs b/operators/src/mock/mock_feature_collection_source.rs index a1f715cae..0c1c6564d 100644 --- a/operators/src/mock/mock_feature_collection_source.rs +++ b/operators/src/mock/mock_feature_collection_source.rs @@ -2,9 +2,8 @@ use std::collections::HashMap; use crate::engine::QueryContext; use crate::engine::{ - ExecutionContext, InitializedVectorOperator, OperatorDatasets, ResultDescriptor, - SourceOperator, TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, - VectorResultDescriptor, + ExecutionContext, InitializedVectorOperator, OperatorData, ResultDescriptor, SourceOperator, + TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, VectorResultDescriptor, }; use crate::util::Result; use async_trait::async_trait; @@ -12,7 +11,7 @@ use futures::stream::{self, BoxStream, StreamExt}; use geoengine_datatypes::collections::{ FeatureCollection, FeatureCollectionInfos, FeatureCollectionModifications, }; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::{ Geometry, Measurement, MultiLineString, MultiPoint, MultiPolygon, NoGeometry, TimeInterval, VectorQueryRectangle, @@ -84,11 +83,11 @@ where pub type MockFeatureCollectionSource = SourceOperator>; -impl OperatorDatasets for MockFeatureCollectionSource +impl OperatorData for MockFeatureCollectionSource where G: Geometry + ArrowTyped, { - fn datasets_collect(&self, _datasets: &mut Vec) {} + fn data_ids_collect(&self, _data_ids: &mut Vec) {} } impl MockFeatureCollectionSource diff --git a/operators/src/mock/mock_point_source.rs b/operators/src/mock/mock_point_source.rs index 49cad69bc..0830a5015 100644 --- a/operators/src/mock/mock_point_source.rs +++ b/operators/src/mock/mock_point_source.rs @@ -1,4 +1,4 @@ -use crate::engine::{OperatorDatasets, QueryContext}; +use crate::engine::{OperatorData, QueryContext}; use crate::{ engine::{ ExecutionContext, InitializedVectorOperator, SourceOperator, TypedVectorQueryProcessor, @@ -9,7 +9,7 @@ use crate::{ use async_trait::async_trait; use futures::stream::{self, BoxStream, StreamExt}; use geoengine_datatypes::collections::VectorDataType; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_datatypes::{ collections::MultiPointCollection, @@ -52,8 +52,8 @@ pub struct MockPointSourceParams { pub type MockPointSource = SourceOperator; -impl OperatorDatasets for MockPointSource { - fn datasets_collect(&self, _datasets: &mut Vec) {} +impl OperatorData for MockPointSource { + fn data_ids_collect(&self, _data_ids: &mut Vec) {} } #[typetag::serde] diff --git a/operators/src/mock/mock_raster_source.rs b/operators/src/mock/mock_raster_source.rs index a643cefc7..9765926bb 100644 --- a/operators/src/mock/mock_raster_source.rs +++ b/operators/src/mock/mock_raster_source.rs @@ -1,12 +1,12 @@ use crate::adapters::SparseTilesFillAdapter; use crate::engine::{ - InitializedRasterOperator, OperatorDatasets, RasterOperator, RasterQueryProcessor, + InitializedRasterOperator, OperatorData, RasterOperator, RasterQueryProcessor, RasterResultDescriptor, SourceOperator, TypedRasterQueryProcessor, }; use crate::util::Result; use async_trait::async_trait; use futures::{stream, stream::StreamExt}; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::{RasterQueryRectangle, SpatialPartitioned}; use geoengine_datatypes::raster::{ GridShape2D, GridShapeAccess, GridSize, Pixel, RasterTile2D, TilingSpecification, @@ -156,8 +156,8 @@ pub struct MockRasterSourceParams { pub type MockRasterSource = SourceOperator>; -impl OperatorDatasets for MockRasterSource { - fn datasets_collect(&self, _datasets: &mut Vec) {} +impl OperatorData for MockRasterSource { + fn data_ids_collect(&self, _data_ids: &mut Vec) {} } /// Implement a mock raster source with typetag for a specific generic type diff --git a/operators/src/plot/class_histogram.rs b/operators/src/plot/class_histogram.rs index 3e0d043e1..df8d8fc20 100644 --- a/operators/src/plot/class_histogram.rs +++ b/operators/src/plot/class_histogram.rs @@ -386,7 +386,7 @@ mod tests { OgrSourceColumnSpec, OgrSourceDataset, OgrSourceDatasetTimeType, OgrSourceErrorSpec, }; use crate::test_data; - use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; + use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{ BoundingBox2D, DateTime, FeatureData, NoGeometry, SpatialResolution, TimeInterval, }; @@ -721,7 +721,7 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn text_attribute() { - let dataset_id = InternalDatasetId::new(); + let dataset_id = DatasetId::new(); let workflow = serde_json::json!({ "type": "Histogram", @@ -732,7 +732,7 @@ mod tests { "source": { "type": "OgrSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": dataset_id }, @@ -745,7 +745,7 @@ mod tests { let mut execution_context = MockExecutionContext::test_default(); execution_context.add_meta_data::<_, _, VectorQueryRectangle>( - DatasetId::Internal { dataset_id }, + DataId::Internal { dataset_id }, Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/ne_10m_ports/ne_10m_ports.shp").into(), diff --git a/operators/src/plot/histogram.rs b/operators/src/plot/histogram.rs index 5d56d6df0..71cfb5e76 100644 --- a/operators/src/plot/histogram.rs +++ b/operators/src/plot/histogram.rs @@ -609,7 +609,7 @@ mod tests { OgrSourceColumnSpec, OgrSourceDataset, OgrSourceDatasetTimeType, OgrSourceErrorSpec, }; use crate::test_data; - use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; + use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{ BoundingBox2D, DateTime, FeatureData, NoGeometry, SpatialResolution, TimeInterval, }; @@ -997,7 +997,7 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn text_attribute() { - let dataset_id = InternalDatasetId::new(); + let dataset_id = DatasetId::new(); let workflow = serde_json::json!({ "type": "Histogram", @@ -1009,7 +1009,7 @@ mod tests { "source": { "type": "OgrSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": dataset_id }, @@ -1022,7 +1022,7 @@ mod tests { let mut execution_context = MockExecutionContext::test_default(); execution_context.add_meta_data::<_, _, VectorQueryRectangle>( - DatasetId::Internal { dataset_id }, + DataId::Internal { dataset_id }, Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/ne_10m_ports/ne_10m_ports.shp").into(), diff --git a/operators/src/plot/temporal_raster_mean_plot.rs b/operators/src/plot/temporal_raster_mean_plot.rs index 707058f2c..834ba578b 100644 --- a/operators/src/plot/temporal_raster_mean_plot.rs +++ b/operators/src/plot/temporal_raster_mean_plot.rs @@ -259,7 +259,7 @@ mod tests { source::GdalSourceParameters, }; use geoengine_datatypes::{ - dataset::InternalDatasetId, + dataset::DatasetId, plots::{PlotData, PlotMetaData}, primitives::DateTime, }; @@ -285,7 +285,7 @@ mod tests { sources: SingleRasterSource { raster: GdalSource { params: GdalSourceParameters { - dataset: InternalDatasetId::new().into(), + data: DatasetId::new().into(), }, } .boxed(), @@ -302,7 +302,7 @@ mod tests { "raster": { "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "a626c880-1c41-489b-9e19-9596d129859c" } diff --git a/operators/src/processing/expression/mod.rs b/operators/src/processing/expression/mod.rs index c33ac109f..2396a8f1e 100644 --- a/operators/src/processing/expression/mod.rs +++ b/operators/src/processing/expression/mod.rs @@ -1,7 +1,7 @@ use self::{codegen::ExpressionAst, compiled::LinkedExpression, parser::ExpressionParser}; use crate::{ engine::{ - ExecutionContext, InitializedRasterOperator, Operator, OperatorDatasets, RasterOperator, + ExecutionContext, InitializedRasterOperator, Operator, OperatorData, RasterOperator, RasterQueryProcessor, RasterResultDescriptor, TypedRasterQueryProcessor, }, processing::expression::{codegen::Parameter, query_processor::ExpressionQueryProcessor}, @@ -10,7 +10,7 @@ use crate::{ use async_trait::async_trait; use futures::try_join; use geoengine_datatypes::{ - dataset::DatasetId, + dataset::DataId, primitives::{partitions_extent, time_interval_extent, Measurement}, raster::RasterDataType, }; @@ -65,10 +65,10 @@ pub struct ExpressionSources { h: Option>, } -impl OperatorDatasets for ExpressionSources { - fn datasets_collect(&self, datasets: &mut Vec) { +impl OperatorData for ExpressionSources { + fn data_ids_collect(&self, data_ids: &mut Vec) { for source in self.iter() { - source.datasets_collect(datasets); + source.data_ids_collect(data_ids); } } } diff --git a/operators/src/processing/meteosat/mod.rs b/operators/src/processing/meteosat/mod.rs index 8bba05096..ef0878db1 100644 --- a/operators/src/processing/meteosat/mod.rs +++ b/operators/src/processing/meteosat/mod.rs @@ -42,7 +42,7 @@ mod test_util { use geoengine_datatypes::util::test::TestDefault; use num_traits::AsPrimitive; - use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; + use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{ ContinuousMeasurement, DateTime, DateTimeParseFormat, Measurement, RasterQueryRectangle, SpatialPartition2D, SpatialResolution, TimeGranularity, TimeInstance, TimeInterval, @@ -209,7 +209,7 @@ mod test_util { } pub(crate) fn _create_gdal_src(ctx: &mut MockExecutionContext) -> GdalSource { - let dataset_id: DatasetId = InternalDatasetId::new().into(); + let dataset_id: DataId = DatasetId::new().into(); let no_data_value = Some(0.); let meta = GdalMetaDataRegular { @@ -276,9 +276,7 @@ mod test_util { ctx.add_meta_data(dataset_id.clone(), Box::new(meta)); GdalSource { - params: GdalSourceParameters { - dataset: dataset_id, - }, + params: GdalSourceParameters { data: dataset_id }, } } } diff --git a/operators/src/processing/point_in_polygon.rs b/operators/src/processing/point_in_polygon.rs index 72baa9d07..6db8cbdbd 100644 --- a/operators/src/processing/point_in_polygon.rs +++ b/operators/src/processing/point_in_polygon.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use futures::stream::BoxStream; use futures::{StreamExt, TryStreamExt}; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::VectorQueryRectangle; use rayon::ThreadPool; use serde::{Deserialize, Serialize}; @@ -17,7 +17,7 @@ use crate::engine::{ ExecutionContext, InitializedVectorOperator, Operator, QueryContext, TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, VectorResultDescriptor, }; -use crate::engine::{OperatorDatasets, QueryProcessor}; +use crate::engine::{OperatorData, QueryProcessor}; use crate::error; use crate::util::Result; use arrow::array::BooleanArray; @@ -44,10 +44,10 @@ pub struct PointInPolygonFilterSource { pub polygons: Box, } -impl OperatorDatasets for PointInPolygonFilterSource { - fn datasets_collect(&self, datasets: &mut Vec) { - self.points.datasets_collect(datasets); - self.polygons.datasets_collect(datasets); +impl OperatorData for PointInPolygonFilterSource { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.points.data_ids_collect(data_ids); + self.polygons.data_ids_collect(data_ids); } } diff --git a/operators/src/processing/raster_vector_join/mod.rs b/operators/src/processing/raster_vector_join/mod.rs index 192b49c91..ce35369a4 100644 --- a/operators/src/processing/raster_vector_join/mod.rs +++ b/operators/src/processing/raster_vector_join/mod.rs @@ -275,7 +275,7 @@ mod tests { use crate::util::gdal::add_ndvi_dataset; use futures::StreamExt; use geoengine_datatypes::collections::{FeatureCollectionInfos, MultiPointCollection}; - use geoengine_datatypes::dataset::DatasetId; + use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::{ BoundingBox2D, DataRef, DateTime, FeatureDataRef, MultiPoint, SpatialResolution, TimeInterval, VectorQueryRectangle, @@ -324,9 +324,9 @@ mod tests { assert_eq!(deserialized.params, raster_vector_join.params); } - fn ndvi_source(id: DatasetId) -> Box { + fn ndvi_source(id: DataId) -> Box { let gdal_source = GdalSource { - params: GdalSourceParameters { dataset: id }, + params: GdalSourceParameters { data: id }, }; gdal_source.boxed() diff --git a/operators/src/processing/raster_vector_join/non_aggregated.rs b/operators/src/processing/raster_vector_join/non_aggregated.rs index c9ba563bf..5dc05d2d3 100644 --- a/operators/src/processing/raster_vector_join/non_aggregated.rs +++ b/operators/src/processing/raster_vector_join/non_aggregated.rs @@ -323,7 +323,7 @@ mod tests { let raster_source = GdalSource { params: GdalSourceParameters { - dataset: add_ndvi_dataset(&mut execution_context), + data: add_ndvi_dataset(&mut execution_context), }, } .boxed(); @@ -412,7 +412,7 @@ mod tests { let raster_source = GdalSource { params: GdalSourceParameters { - dataset: add_ndvi_dataset(&mut execution_context), + data: add_ndvi_dataset(&mut execution_context), }, } .boxed(); @@ -511,7 +511,7 @@ mod tests { let raster_source = GdalSource { params: GdalSourceParameters { - dataset: add_ndvi_dataset(&mut execution_context), + data: add_ndvi_dataset(&mut execution_context), }, } .boxed(); @@ -614,7 +614,7 @@ mod tests { let raster_source = GdalSource { params: GdalSourceParameters { - dataset: add_ndvi_dataset(&mut execution_context), + data: add_ndvi_dataset(&mut execution_context), }, } .boxed(); diff --git a/operators/src/processing/reprojection.rs b/operators/src/processing/reprojection.rs index b7323f090..a576351d3 100644 --- a/operators/src/processing/reprojection.rs +++ b/operators/src/processing/reprojection.rs @@ -524,7 +524,7 @@ mod tests { GeometryCollection, MultiLineStringCollection, MultiPointCollection, MultiPolygonCollection, }, - dataset::{DatasetId, InternalDatasetId}, + dataset::{DataId, DatasetId}, hashmap, primitives::{ BoundingBox2D, Measurement, MultiLineString, MultiPoint, MultiPolygon, QueryRectangle, @@ -885,9 +885,7 @@ mod tests { // 2014-01-01 let gdal_op = GdalSource { - params: GdalSourceParameters { - dataset: id.clone(), - }, + params: GdalSourceParameters { data: id.clone() }, } .boxed(); @@ -1031,7 +1029,7 @@ mod tests { }, }; - let id: DatasetId = InternalDatasetId::new().into(); + let id: DataId = DatasetId::new().into(); exe_ctx.add_meta_data(id.clone(), Box::new(m)); exe_ctx.tiling_specification = TilingSpecification::new((0.0, 0.0).into(), [60, 60].into()); @@ -1042,9 +1040,7 @@ mod tests { // 2014-04-01 let gdal_op = GdalSource { - params: GdalSourceParameters { - dataset: id.clone(), - }, + params: GdalSourceParameters { data: id.clone() }, } .boxed(); @@ -1162,7 +1158,7 @@ mod tests { }, }; - let id: DatasetId = InternalDatasetId::new().into(); + let id: DataId = DatasetId::new().into(); exe_ctx.add_meta_data(id.clone(), Box::new(m)); exe_ctx.tiling_specification = @@ -1174,9 +1170,7 @@ mod tests { let time_interval = TimeInterval::new_instant(1_388_534_400_000).unwrap(); // 2014-01-01 let gdal_op = GdalSource { - params: GdalSourceParameters { - dataset: id.clone(), - }, + params: GdalSourceParameters { data: id.clone() }, } .boxed(); diff --git a/operators/src/processing/time_shift.rs b/operators/src/processing/time_shift.rs index 5f57a2f5c..51cdc2a1f 100644 --- a/operators/src/processing/time_shift.rs +++ b/operators/src/processing/time_shift.rs @@ -472,7 +472,7 @@ mod tests { use futures::StreamExt; use geoengine_datatypes::{ collections::MultiPointCollection, - dataset::InternalDatasetId, + dataset::DatasetId, primitives::{ BoundingBox2D, DateTime, Measurement, MultiPoint, SpatialPartition2D, SpatialResolution, TimeGranularity, @@ -490,7 +490,7 @@ mod tests { source: RasterOrVectorOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: InternalDatasetId::from_u128(1337).into(), + data: DatasetId::from_u128(1337).into(), }, } .boxed(), @@ -520,7 +520,7 @@ mod tests { "source": { "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "00000000-0000-0000-0000-000000000539" } @@ -542,7 +542,7 @@ mod tests { source: RasterOrVectorOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: InternalDatasetId::from_u128(1337).into(), + data: DatasetId::from_u128(1337).into(), }, } .boxed(), @@ -568,7 +568,7 @@ mod tests { "source": { "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "00000000-0000-0000-0000-000000000539" } @@ -1095,7 +1095,7 @@ mod tests { let ndvi_source = GdalSource { params: GdalSourceParameters { - dataset: add_ndvi_dataset(&mut execution_context), + data: add_ndvi_dataset(&mut execution_context), }, } .boxed(); @@ -1173,7 +1173,7 @@ mod tests { let ndvi_source = GdalSource { params: GdalSourceParameters { - dataset: add_ndvi_dataset(&mut execution_context), + data: add_ndvi_dataset(&mut execution_context), }, } .boxed(); diff --git a/operators/src/processing/vector_join/mod.rs b/operators/src/processing/vector_join/mod.rs index 6585299fd..7974318b9 100644 --- a/operators/src/processing/vector_join/mod.rs +++ b/operators/src/processing/vector_join/mod.rs @@ -1,12 +1,12 @@ -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use serde::{Deserialize, Serialize}; use snafu::ensure; use geoengine_datatypes::collections::VectorDataType; use crate::engine::{ - ExecutionContext, InitializedVectorOperator, Operator, OperatorDatasets, - TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, VectorResultDescriptor, + ExecutionContext, InitializedVectorOperator, Operator, OperatorData, TypedVectorQueryProcessor, + VectorOperator, VectorQueryProcessor, VectorResultDescriptor, }; use crate::error; use crate::util::Result; @@ -37,10 +37,10 @@ pub struct VectorJoinSources { right: Box, } -impl OperatorDatasets for VectorJoinSources { - fn datasets_collect(&self, datasets: &mut Vec) { - self.left.datasets_collect(datasets); - self.right.datasets_collect(datasets); +impl OperatorData for VectorJoinSources { + fn data_ids_collect(&self, data_ids: &mut Vec) { + self.left.data_ids_collect(data_ids); + self.right.data_ids_collect(data_ids); } } diff --git a/operators/src/source/csv.rs b/operators/src/source/csv.rs index 08ab2b723..7a887a530 100644 --- a/operators/src/source/csv.rs +++ b/operators/src/source/csv.rs @@ -7,7 +7,7 @@ use csv::{Position, Reader, StringRecord}; use futures::stream::BoxStream; use futures::task::{Context, Poll}; use futures::{Stream, StreamExt}; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::primitives::VectorQueryRectangle; use serde::{Deserialize, Serialize}; use snafu::{ensure, OptionExt, ResultExt}; @@ -22,7 +22,7 @@ use geoengine_datatypes::{ use crate::engine::QueryProcessor; use crate::engine::{ - InitializedVectorOperator, OperatorDatasets, QueryContext, SourceOperator, + InitializedVectorOperator, OperatorData, QueryContext, SourceOperator, TypedVectorQueryProcessor, VectorOperator, VectorQueryProcessor, VectorResultDescriptor, }; use crate::error; @@ -145,8 +145,8 @@ pub struct CsvSourceStream { pub type CsvSource = SourceOperator; -impl OperatorDatasets for CsvSourceParameters { - fn datasets_collect(&self, _datasets: &mut Vec) {} +impl OperatorData for CsvSourceParameters { + fn data_ids_collect(&self, _data_ids: &mut Vec) {} } #[typetag::serde] diff --git a/operators/src/source/gdal_source/mod.rs b/operators/src/source/gdal_source/mod.rs index a43623593..de1777436 100755 --- a/operators/src/source/gdal_source/mod.rs +++ b/operators/src/source/gdal_source/mod.rs @@ -1,5 +1,5 @@ use crate::adapters::SparseTilesFillAdapter; -use crate::engine::{MetaData, OperatorDatasets, OperatorName, QueryProcessor}; +use crate::engine::{MetaData, OperatorData, OperatorName, QueryProcessor}; use crate::util::gdal::gdal_open_dataset_ex; use crate::util::input::float_option_with_nan; use crate::{ @@ -27,7 +27,7 @@ use geoengine_datatypes::raster::{ RasterTile2D, TilingStrategy, }; use geoengine_datatypes::util::test::TestDefault; -use geoengine_datatypes::{dataset::DatasetId, raster::TileInformation}; +use geoengine_datatypes::{dataset::DataId, raster::TileInformation}; use geoengine_datatypes::{ primitives::TimeInterval, raster::{ @@ -57,7 +57,7 @@ mod loading_info; /// ```rust /// use serde_json::{Result, Value}; /// use geoengine_operators::source::{GdalSource, GdalSourceParameters}; -/// use geoengine_datatypes::dataset::InternalDatasetId; +/// use geoengine_datatypes::dataset::DataId; /// use geoengine_datatypes::util::Identifier; /// use std::str::FromStr; /// @@ -65,7 +65,7 @@ mod loading_info; /// { /// "type": "GdalSource", /// "params": { -/// "dataset": { +/// "data": { /// "type": "internal", /// "datasetId": "a626c880-1c41-489b-9e19-9596d129859c" /// } @@ -76,18 +76,18 @@ mod loading_info; /// /// assert_eq!(operator, GdalSource { /// params: GdalSourceParameters { -/// dataset: InternalDatasetId::from_str("a626c880-1c41-489b-9e19-9596d129859c").unwrap().into() +/// data: DatasetId::from_str("a626c880-1c41-489b-9e19-9596d129859c").unwrap().into() /// }, /// }); /// ``` #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] pub struct GdalSourceParameters { - pub dataset: DatasetId, + pub data: DataId, } -impl OperatorDatasets for GdalSourceParameters { - fn datasets_collect(&self, datasets: &mut Vec) { - datasets.push(self.dataset.clone()); +impl OperatorData for GdalSourceParameters { + fn data_ids_collect(&self, data_ids: &mut Vec) { + data_ids.push(self.data.clone()); } } @@ -568,9 +568,9 @@ impl RasterOperator for GdalSource { self: Box, context: &dyn crate::engine::ExecutionContext, ) -> Result> { - let meta_data: GdalMetaData = context.meta_data(&self.params.dataset).await?; + let meta_data: GdalMetaData = context.meta_data(&self.params.data).await?; - debug!("Initializing GdalSource for {:?}.", &self.params.dataset); + debug!("Initializing GdalSource for {:?}.", &self.params.data); Ok(InitializedGdalSourceOperator { result_descriptor: meta_data.result_descriptor().await?, @@ -915,15 +915,13 @@ mod tests { async fn query_gdal_source( exe_ctx: &mut MockExecutionContext, query_ctx: &MockQueryContext, - id: DatasetId, + id: DataId, output_shape: GridShape2D, output_bounds: SpatialPartition2D, time_interval: TimeInterval, ) -> Vec>> { let op = GdalSource { - params: GdalSourceParameters { - dataset: id.clone(), - }, + params: GdalSourceParameters { data: id.clone() }, } .boxed(); diff --git a/operators/src/source/ogr_source/mod.rs b/operators/src/source/ogr_source/mod.rs index 1e0d94bd7..9e8ba56f7 100644 --- a/operators/src/source/ogr_source/mod.rs +++ b/operators/src/source/ogr_source/mod.rs @@ -36,7 +36,7 @@ use geoengine_datatypes::primitives::{ }; use geoengine_datatypes::util::arrow::ArrowTyped; -use crate::engine::{OperatorDatasets, OperatorName, QueryProcessor}; +use crate::engine::{OperatorData, OperatorName, QueryProcessor}; use crate::error::Error; use crate::util::input::StringOrNumberRange; use crate::util::Result; @@ -49,7 +49,7 @@ use crate::{ }; use async_trait::async_trait; use gdal::errors::GdalError; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use std::convert::{TryFrom, TryInto}; use self::dataset_iterator::OgrDatasetIterator; @@ -57,7 +57,7 @@ use self::dataset_iterator::OgrDatasetIterator; #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "camelCase")] pub struct OgrSourceParameters { - pub dataset: DatasetId, + pub data: DataId, pub attribute_projection: Option>, pub attribute_filters: Option>, } @@ -70,9 +70,9 @@ pub struct AttributeFilter { pub keep_nulls: bool, } -impl OperatorDatasets for OgrSourceParameters { - fn datasets_collect(&self, datasets: &mut Vec) { - datasets.push(self.dataset.clone()); +impl OperatorData for OgrSourceParameters { + fn data_ids_collect(&self, data_ids: &mut Vec) { + data_ids.push(self.data.clone()); } } @@ -356,7 +356,7 @@ impl VectorOperator for OgrSource { let info: Box< dyn MetaData, - > = context.meta_data(&self.params.dataset).await?; + > = context.meta_data(&self.params.data).await?; let result_descriptor = info.result_descriptor().await?; @@ -1438,7 +1438,7 @@ mod tests { DataCollection, FeatureCollectionInfos, GeometryCollection, MultiPointCollection, MultiPolygonCollection, }; - use geoengine_datatypes::dataset::InternalDatasetId; + use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{ BoundingBox2D, FeatureData, Measurement, SpatialResolution, TimeGranularity, }; @@ -1794,12 +1794,10 @@ mod tests { #[tokio::test] async fn ne_10m_ports_bbox_filter() -> Result<()> { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/ne_10m_ports/ne_10m_ports.shp").into(), @@ -1827,7 +1825,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -1892,12 +1890,10 @@ mod tests { #[tokio::test] async fn ne_10m_ports_force_spatial_filter() -> Result<()> { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/ne_10m_ports/ne_10m_ports.shp").into(), @@ -1925,7 +1921,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -1990,12 +1986,10 @@ mod tests { #[tokio::test] async fn ne_10m_ports_fast_spatial_filter() -> Result<()> { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!( @@ -2026,7 +2020,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -2092,9 +2086,7 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn ne_10m_ports_columns() -> Result<()> { - let id = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( id.clone(), @@ -2178,7 +2170,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset: id.clone(), + data: id.clone(), attribute_projection: None, attribute_filters: None, }, @@ -2318,9 +2310,7 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn ne_10m_ports() -> Result<()> { - let id = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( id.clone(), @@ -2351,7 +2341,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset: id.clone(), + data: id.clone(), attribute_projection: None, attribute_filters: None, }, @@ -3725,9 +3715,7 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn chunked() -> Result<()> { - let id = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( id.clone(), @@ -3758,7 +3746,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset: id.clone(), + data: id.clone(), attribute_projection: None, attribute_filters: None, }, @@ -3975,12 +3963,10 @@ mod tests { #[tokio::test] async fn empty() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/ne_10m_ports/ne_10m_ports.shp").into(), @@ -4008,7 +3994,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4054,12 +4040,10 @@ mod tests { #[tokio::test] async fn polygon_gpkg() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/germany_polygon.gpkg").into(), @@ -4097,7 +4081,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4153,12 +4137,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn points_csv() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/points.csv").into(), @@ -4216,7 +4198,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4278,12 +4260,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn points_date_csv() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/lonlat_date.csv").into(), @@ -4341,7 +4321,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4399,12 +4379,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn points_date_time_csv() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/lonlat_date_time.csv").into(), @@ -4464,7 +4442,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4522,12 +4500,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn points_date_time_tz_csv() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/lonlat_date_time_tz.csv").into(), @@ -4587,7 +4563,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4645,12 +4621,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn points_unix_date() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/lonlat_unix_date.csv").into(), @@ -4706,7 +4680,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4764,12 +4738,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn vector_date_time_csv() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/lonlat_date_time.csv").into(), @@ -4838,7 +4810,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -4902,12 +4874,10 @@ mod tests { #[tokio::test] #[allow(clippy::too_many_lines)] async fn points_bool_csv() { - let dataset = DatasetId::Internal { - dataset_id: InternalDatasetId::new(), - }; + let id: DataId = DatasetId::new().into(); let mut exe_ctx = MockExecutionContext::test_default(); exe_ctx.add_meta_data::( - dataset.clone(), + id.clone(), Box::new(StaticMetaData { loading_info: OgrSourceDataset { file_name: test_data!("vector/data/points_with_bool.csv").into(), @@ -4956,7 +4926,7 @@ mod tests { let source = OgrSource { params: OgrSourceParameters { - dataset, + data: id, attribute_projection: None, attribute_filters: None, }, diff --git a/operators/src/util/gdal.rs b/operators/src/util/gdal.rs index 258fa0e2a..8a9751de7 100644 --- a/operators/src/util/gdal.rs +++ b/operators/src/util/gdal.rs @@ -6,7 +6,7 @@ use std::{ use gdal::{raster::GDALDataType, Dataset, DatasetOptions}; use geoengine_datatypes::{ - dataset::{DatasetId, InternalDatasetId}, + dataset::{DataId, DatasetId}, hashmap, primitives::{ DateTimeParseFormat, Measurement, SpatialPartition2D, TimeGranularity, TimeInstance, @@ -82,8 +82,8 @@ pub fn create_ndvi_meta_data() -> GdalMetaDataRegular { } // TODO: move test helper somewhere else? -pub fn add_ndvi_dataset(ctx: &mut MockExecutionContext) -> DatasetId { - let id: DatasetId = InternalDatasetId::new().into(); +pub fn add_ndvi_dataset(ctx: &mut MockExecutionContext) -> DataId { + let id: DataId = DatasetId::new().into(); ctx.add_meta_data(id.clone(), Box::new(create_ndvi_meta_data())); id } diff --git a/operators/src/util/input/multi_raster_or_vector.rs b/operators/src/util/input/multi_raster_or_vector.rs index 028d4e9d9..a98cb3ad1 100644 --- a/operators/src/util/input/multi_raster_or_vector.rs +++ b/operators/src/util/input/multi_raster_or_vector.rs @@ -1,5 +1,5 @@ -use crate::engine::{OperatorDatasets, RasterOperator, VectorOperator}; -use geoengine_datatypes::dataset::DatasetId; +use crate::engine::{OperatorData, RasterOperator, VectorOperator}; +use geoengine_datatypes::dataset::DataId; use serde::{Deserialize, Serialize}; /// It is either a set of `RasterOperator` or a single `VectorOperator` @@ -44,15 +44,15 @@ impl From> for MultiRasterOrVectorOperator { } } -impl OperatorDatasets for MultiRasterOrVectorOperator { - fn datasets_collect(&self, datasets: &mut Vec) { +impl OperatorData for MultiRasterOrVectorOperator { + fn data_ids_collect(&self, data_ids: &mut Vec) { match self { Self::Raster(rs) => { for r in rs { - r.datasets_collect(datasets); + r.data_ids_collect(data_ids); } } - Self::Vector(v) => v.datasets_collect(datasets), + Self::Vector(v) => v.data_ids_collect(data_ids), } } } @@ -60,7 +60,7 @@ impl OperatorDatasets for MultiRasterOrVectorOperator { #[cfg(test)] mod tests { use crate::source::{GdalSource, GdalSourceParameters}; - use geoengine_datatypes::dataset::InternalDatasetId; + use geoengine_datatypes::dataset::DatasetId; use std::str::FromStr; use super::*; @@ -69,7 +69,7 @@ mod tests { fn it_serializes() { let operator = MultiRasterOrVectorOperator::Raster(vec![GdalSource { params: GdalSourceParameters { - dataset: InternalDatasetId::from_str("fc734022-61e0-49da-b327-257ba9d602a7") + data: DatasetId::from_str("fc734022-61e0-49da-b327-257ba9d602a7") .unwrap() .into(), }, @@ -81,7 +81,7 @@ mod tests { serde_json::json!([{ "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "fc734022-61e0-49da-b327-257ba9d602a7" } @@ -95,7 +95,7 @@ mod tests { let workflow = serde_json::json!([{ "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "fc734022-61e0-49da-b327-257ba9d602a7" } @@ -115,7 +115,7 @@ mod tests { let workflow = serde_json::json!({ "type": "OgrSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "fc734022-61e0-49da-b327-257ba9d602a7" }, diff --git a/operators/src/util/input/raster_or_vector.rs b/operators/src/util/input/raster_or_vector.rs index e556f6f5b..6c6b26b18 100644 --- a/operators/src/util/input/raster_or_vector.rs +++ b/operators/src/util/input/raster_or_vector.rs @@ -1,5 +1,5 @@ -use crate::engine::{OperatorDatasets, RasterOperator, TypedOperator, VectorOperator}; -use geoengine_datatypes::dataset::DatasetId; +use crate::engine::{OperatorData, RasterOperator, TypedOperator, VectorOperator}; +use geoengine_datatypes::dataset::DataId; use serde::{Deserialize, Serialize}; /// It is either a `RasterOperator` or a `VectorOperator` @@ -47,11 +47,11 @@ impl From> for RasterOrVectorOperator { } } -impl OperatorDatasets for RasterOrVectorOperator { - fn datasets_collect(&self, datasets: &mut Vec) { +impl OperatorData for RasterOrVectorOperator { + fn data_ids_collect(&self, data_ids: &mut Vec) { match self { - RasterOrVectorOperator::Raster(r) => r.datasets_collect(datasets), - RasterOrVectorOperator::Vector(v) => v.datasets_collect(datasets), + RasterOrVectorOperator::Raster(r) => r.data_ids_collect(data_ids), + RasterOrVectorOperator::Vector(v) => v.data_ids_collect(data_ids), } } } @@ -59,7 +59,7 @@ impl OperatorDatasets for RasterOrVectorOperator { #[cfg(test)] mod tests { use crate::source::{GdalSource, GdalSourceParameters}; - use geoengine_datatypes::dataset::InternalDatasetId; + use geoengine_datatypes::dataset::DatasetId; use std::str::FromStr; use super::*; @@ -70,11 +70,9 @@ mod tests { GdalSource { params: GdalSourceParameters { - dataset: InternalDatasetId::from_str( - "fc734022-61e0-49da-b327-257ba9d602a7", - ) - .unwrap() - .into(), + data: DatasetId::from_str("fc734022-61e0-49da-b327-257ba9d602a7") + .unwrap() + .into(), }, } .boxed(), @@ -85,7 +83,7 @@ mod tests { serde_json::json!({ "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "fc734022-61e0-49da-b327-257ba9d602a7" } @@ -99,7 +97,7 @@ mod tests { let workflow = serde_json::json!({ "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "fc734022-61e0-49da-b327-257ba9d602a7" } @@ -119,7 +117,7 @@ mod tests { let workflow = serde_json::json!({ "type": "OgrSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "fc734022-61e0-49da-b327-257ba9d602a7" }, diff --git a/services/src/contexts/mod.rs b/services/src/contexts/mod.rs index fdf6b2272..8a78b764d 100644 --- a/services/src/contexts/mod.rs +++ b/services/src/contexts/mod.rs @@ -14,7 +14,7 @@ mod simple_context; use crate::datasets::storage::DatasetDb; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_datatypes::raster::TilingSpecification; use geoengine_operators::engine::{ @@ -171,7 +171,7 @@ where { async fn meta_data( &self, - dataset_id: &DatasetId, + data_id: &DataId, ) -> Result< Box< dyn MetaData< @@ -182,22 +182,22 @@ where >, geoengine_operators::error::Error, > { - match dataset_id { - DatasetId::Internal { dataset_id: _ } => self + match data_id { + DataId::Internal { dataset_id: _ } => self .dataset_db - .session_meta_data(&self.session, dataset_id) + .session_meta_data(&self.session, data_id) .await .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), }), - DatasetId::External(external) => { + DataId::External(external) => { self.layer_provider_db .layer_provider(external.provider_id) .await .map_err(|e| geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), })? - .meta_data(dataset_id) + .meta_data(data_id) .await } } @@ -216,27 +216,27 @@ where { async fn meta_data( &self, - dataset_id: &DatasetId, + data_id: &DataId, ) -> Result< Box>, geoengine_operators::error::Error, > { - match dataset_id { - DatasetId::Internal { dataset_id: _ } => self + match data_id { + DataId::Internal { dataset_id: _ } => self .dataset_db - .session_meta_data(&self.session, dataset_id) + .session_meta_data(&self.session, data_id) .await .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), }), - DatasetId::External(external) => { + DataId::External(external) => { self.layer_provider_db .layer_provider(external.provider_id) .await .map_err(|e| geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), })? - .meta_data(dataset_id) + .meta_data(data_id) .await } } @@ -255,27 +255,27 @@ where { async fn meta_data( &self, - dataset_id: &DatasetId, + data_id: &DataId, ) -> Result< Box>, geoengine_operators::error::Error, > { - match dataset_id { - DatasetId::Internal { dataset_id: _ } => self + match data_id { + DataId::Internal { dataset_id: _ } => self .dataset_db - .session_meta_data(&self.session, dataset_id) + .session_meta_data(&self.session, data_id) .await .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), }), - DatasetId::External(external) => { + DataId::External(external) => { self.layer_provider_db .layer_provider(external.provider_id) .await .map_err(|e| geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), })? - .meta_data(dataset_id) + .meta_data(data_id) .await } } diff --git a/services/src/datasets/add_from_directory.rs b/services/src/datasets/add_from_directory.rs index 5b81eaa27..9494da175 100644 --- a/services/src/datasets/add_from_directory.rs +++ b/services/src/datasets/add_from_directory.rs @@ -6,7 +6,7 @@ use std::{ }; use crate::error::Result; -use crate::layers::external::ExternalLayerProviderDefinition; +use crate::layers::external::DataProviderDefinition; use crate::layers::storage::LayerProviderDb; use crate::util::user_input::UserInput; use crate::{contexts::MockableSession, datasets::storage::DatasetDb}; @@ -67,7 +67,7 @@ pub async fn add_providers_from_directory(db: &mut D, file_p db: &mut D, entry: &DirEntry, ) -> Result<()> { - let def: Box = + let def: Box = serde_json::from_reader(BufReader::new(File::open(entry.path())?))?; db.add_layer_provider(def).await?; // TODO: add as system user diff --git a/services/src/datasets/external/gfbio.rs b/services/src/datasets/external/gfbio.rs index 5417a24d5..ba33a783a 100644 --- a/services/src/datasets/external/gfbio.rs +++ b/services/src/datasets/external/gfbio.rs @@ -4,11 +4,11 @@ use std::marker::PhantomData; use crate::datasets::listing::{Provenance, ProvenanceOutput}; use crate::error::Result; use crate::error::{self, Error}; -use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::external::{DataProvider, DataProviderDefinition}; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use async_trait::async_trait; @@ -16,7 +16,7 @@ use bb8_postgres::bb8::{Pool, PooledConnection}; use bb8_postgres::tokio_postgres::{Config, NoTls}; use bb8_postgres::PostgresConnectionManager; use geoengine_datatypes::collections::VectorDataType; -use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataId, DataProviderId, ExternalDataId, LayerId}; use geoengine_datatypes::primitives::{ FeatureDataType, Measurement, RasterQueryRectangle, VectorQueryRectangle, }; @@ -36,8 +36,8 @@ use geoengine_operators::{ use serde::{Deserialize, Serialize}; use snafu::ensure; -pub const GFBIO_PROVIDER_ID: LayerProviderId = - LayerProviderId::from_u128(0x907f_9f5b_0304_4a0e_a5ef_28de_62d1_c0f9); +pub const GFBIO_PROVIDER_ID: DataProviderId = + DataProviderId::from_u128(0x907f_9f5b_0304_4a0e_a5ef_28de_62d1_c0f9); #[derive(Clone, Debug, Serialize, Deserialize)] struct DatabaseConnectionConfig { @@ -77,8 +77,8 @@ pub struct GfbioDataProviderDefinition { #[typetag::serde] #[async_trait] -impl ExternalLayerProviderDefinition for GfbioDataProviderDefinition { - async fn initialize(self: Box) -> Result> { +impl DataProviderDefinition for GfbioDataProviderDefinition { + async fn initialize(self: Box) -> Result> { Ok(Box::new(GfbioDataProvider::new(self.db_config).await?)) } @@ -90,7 +90,7 @@ impl ExternalLayerProviderDefinition for GfbioDataProviderDefinition { self.name.clone() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { GFBIO_PROVIDER_ID } } @@ -222,8 +222,8 @@ impl LayerCollectionProvider for GfbioDataProvider { .map(|row| { CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: GFBIO_PROVIDER_ID, - item: LayerId(row.get::(0).to_string()), + provider_id: GFBIO_PROVIDER_ID, + layer_id: LayerId(row.get::(0).to_string()), }, name: row.get(1), description: row.try_get(2).unwrap_or_else(|_| "".to_owned()), @@ -239,7 +239,7 @@ impl LayerCollectionProvider for GfbioDataProvider { } async fn get_layer(&self, id: &LayerId) -> Result { - let surrogate_key: i32 = id.0.parse().map_err(|_| Error::InvalidDatasetId)?; + let surrogate_key: i32 = id.0.parse().map_err(|_| Error::InvalidDataId)?; let conn = self.pool.get().await?; @@ -265,8 +265,8 @@ impl LayerCollectionProvider for GfbioDataProvider { Ok(Layer { id: ProviderLayerId { - provider: GFBIO_PROVIDER_ID, - item: id.clone(), + provider_id: GFBIO_PROVIDER_ID, + layer_id: id.clone(), }, name: row.get(0), description: row.try_get(1).unwrap_or_else(|_| "".to_owned()), @@ -274,9 +274,9 @@ impl LayerCollectionProvider for GfbioDataProvider { operator: TypedOperator::Vector( OgrSource { params: OgrSourceParameters { - dataset: DatasetId::External(ExternalDatasetId { + data: DataId::External(ExternalDataId { provider_id: GFBIO_PROVIDER_ID, - dataset_id: id.0.clone(), + layer_id: id.clone(), }), attribute_projection: None, attribute_filters: None, @@ -291,15 +291,16 @@ impl LayerCollectionProvider for GfbioDataProvider { } #[async_trait] -impl ExternalLayerProvider for GfbioDataProvider { - async fn provenance(&self, dataset: &DatasetId) -> Result { - let surrogate_key: i32 = dataset +impl DataProvider for GfbioDataProvider { + async fn provenance(&self, id: &DataId) -> Result { + let surrogate_key: i32 = id .external() - .ok_or(Error::InvalidDatasetId) + .ok_or(Error::InvalidDataId) .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), })? - .dataset_id + .layer_id + .0 .parse() .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), @@ -331,7 +332,7 @@ impl ExternalLayerProvider for GfbioDataProvider { let row = conn.query_one(&stmt, &[&surrogate_key]).await?; Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: id.clone(), provenance: Some(Provenance { citation: row.try_get(0).unwrap_or_else(|_| "".to_owned()), license: row.try_get(1).unwrap_or_else(|_| "".to_owned()), @@ -351,18 +352,19 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, > { - let surrogate_key: i32 = dataset + let surrogate_key: i32 = id .external() - .ok_or(Error::InvalidDatasetId) + .ok_or(Error::InvalidDataId) .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), })? - .dataset_id + .layer_id + .0 .parse() .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), @@ -436,7 +438,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, @@ -452,7 +454,7 @@ impl { async fn meta_data( &self, - _dataset: &DatasetId, + _id: &DataId, ) -> Result< Box< dyn MetaData< @@ -472,7 +474,7 @@ mod tests { use bb8_postgres::bb8::ManageConnection; use futures::StreamExt; use geoengine_datatypes::collections::MultiPointCollection; - use geoengine_datatypes::dataset::ExternalDatasetId; + use geoengine_datatypes::dataset::{ExternalDataId, LayerId}; use geoengine_datatypes::primitives::{ BoundingBox2D, FeatureData, MultiPoint, SpatialResolution, TimeInterval, }; @@ -576,8 +578,8 @@ mod tests { listing, vec![CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: GFBIO_PROVIDER_ID, - item: LayerId("1".to_string()), + provider_id: GFBIO_PROVIDER_ID, + layer_id: LayerId("1".to_string()), }, name: "Example Title".to_string(), description: "".to_string(), @@ -611,9 +613,9 @@ mod tests { let meta: Box< dyn MetaData, > = provider - .meta_data(&DatasetId::External(ExternalDatasetId { + .meta_data(&DataId::External(ExternalDataId { provider_id: GFBIO_PROVIDER_ID, - dataset_id: "1".to_string(), + layer_id: LayerId("1".to_string()), })) .await .map_err(|e| e.to_string())?; @@ -788,9 +790,9 @@ mod tests { let meta: Box< dyn MetaData, > = provider - .meta_data(&DatasetId::External(ExternalDatasetId { + .meta_data(&DataId::External(ExternalDataId { provider_id: GFBIO_PROVIDER_ID, - dataset_id: "1".to_string(), + layer_id: LayerId("1".to_string()), })) .await .map_err(|e| e.to_string())?; @@ -888,9 +890,9 @@ mod tests { .await .map_err(|e| e.to_string())?; - let dataset = DatasetId::External(ExternalDatasetId { + let dataset = DataId::External(ExternalDataId { provider_id: GFBIO_PROVIDER_ID, - dataset_id: "1".to_owned(), + layer_id: LayerId("1".to_owned()), }); let result = provider @@ -899,9 +901,9 @@ mod tests { .map_err(|e| e.to_string())?; let expected = ProvenanceOutput { - dataset: DatasetId::External(ExternalDatasetId { + data: DataId::External(ExternalDataId { provider_id: GFBIO_PROVIDER_ID, - dataset_id: "1".to_owned(), + layer_id: LayerId("1".to_owned()), }), provenance: Some(Provenance { citation: "Example Description".to_owned(), diff --git a/services/src/datasets/external/mock.rs b/services/src/datasets/external/mock.rs index 7167fc996..c88d53e9e 100644 --- a/services/src/datasets/external/mock.rs +++ b/services/src/datasets/external/mock.rs @@ -1,10 +1,10 @@ use crate::datasets::listing::ProvenanceOutput; use crate::error::Result; -use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::external::{DataProvider, DataProviderDefinition}; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::workflows::workflow::Workflow; use crate::{ datasets::storage::{DatasetDefinition, MetaDataDefinition}, @@ -12,7 +12,7 @@ use crate::{ util::user_input::Validated, }; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataId, DataProviderId, LayerId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{TypedOperator, VectorOperator}; use geoengine_operators::mock::{MockDatasetDataSource, MockDatasetDataSourceParams}; @@ -29,14 +29,14 @@ pub const ROOT_COLLECTION_ID: Uuid = Uuid::from_u128(0xd630_e723_63d4_440c_9e15_ #[derive(Clone, Debug, Serialize, Deserialize)] pub struct MockExternalLayerProviderDefinition { - pub id: LayerProviderId, + pub id: DataProviderId, pub datasets: Vec, } #[typetag::serde] #[async_trait] -impl ExternalLayerProviderDefinition for MockExternalLayerProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl DataProviderDefinition for MockExternalLayerProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(MockExternalDataProvider { id: self.id, datasets: self.datasets, @@ -51,31 +51,22 @@ impl ExternalLayerProviderDefinition for MockExternalLayerProviderDefinition { "MockName".to_owned() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { self.id } } #[derive(Debug)] pub struct MockExternalDataProvider { - id: LayerProviderId, + id: DataProviderId, datasets: Vec, } -// this provider uses dataset and layer ids interchangably -// TODO: remove this when external dataset ids are reworked -fn layer_id_from_dataset_id(id: &DatasetId) -> LayerId { - match id { - DatasetId::Internal { dataset_id } => LayerId(dataset_id.to_string()), - DatasetId::External(s) => LayerId(s.dataset_id.clone()), - } -} - #[async_trait] -impl ExternalLayerProvider for MockExternalDataProvider { - async fn provenance(&self, dataset: &DatasetId) -> Result { +impl DataProvider for MockExternalDataProvider { + async fn provenance(&self, id: &DataId) -> Result { Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: id.clone(), provenance: None, }) } @@ -105,13 +96,13 @@ impl LayerCollectionProvider for MockExternalDataProvider { for dataset in &self.datasets { listing.push(Ok(CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: self.id, - item: dataset + provider_id: self.id, + layer_id: dataset .properties .id .as_ref() .ok_or(error::Error::MissingDatasetId) - .map(layer_id_from_dataset_id)?, + .map(|id| LayerId(id.to_string()))?, }, name: dataset.properties.name.clone(), description: dataset.properties.description.clone(), @@ -135,16 +126,16 @@ impl LayerCollectionProvider for MockExternalDataProvider { d.properties .id .as_ref() - .map(layer_id_from_dataset_id) + .map(|id| LayerId(id.to_string())) .as_ref() == Some(id) }) - .ok_or(error::Error::UnknownDatasetId) + .ok_or(error::Error::UnknownDataId) .and_then(|d| { Ok(Layer { id: ProviderLayerId { - provider: self.id, - item: id.clone(), + provider_id: self.id, + layer_id: id.clone(), }, name: d.properties.name.clone(), description: d.properties.description.clone(), @@ -152,11 +143,11 @@ impl LayerCollectionProvider for MockExternalDataProvider { operator: TypedOperator::Vector( MockDatasetDataSource { params: MockDatasetDataSourceParams { - dataset: d + data: d .properties .id - .clone() - .ok_or(error::Error::MissingDatasetId)?, + .ok_or(error::Error::MissingDatasetId)? + .into(), }, } .boxed(), @@ -175,7 +166,7 @@ impl { async fn meta_data( &self, - dataset: &DatasetId, + id: &DataId, ) -> Result< Box< dyn MetaData< @@ -186,19 +177,24 @@ impl >, geoengine_operators::error::Error, > { + let dataset = id + .internal() + .ok_or(geoengine_operators::error::Error::DatasetMetaData { + source: Box::new(error::Error::DataIdTypeMissMatch), + })?; let dataset_def = self .datasets .iter() - .find(|d| d.properties.id.as_ref() == Some(dataset)) + .find(|d| d.properties.id.as_ref() == Some(&dataset)) .ok_or(geoengine_operators::error::Error::DatasetMetaData { - source: Box::new(error::Error::UnknownDatasetId), + source: Box::new(error::Error::UnknownDataId), })?; if let MetaDataDefinition::MockMetaData(m) = &dataset_def.meta_data { Ok(Box::new(m.clone())) } else { Err(geoengine_operators::error::Error::DatasetMetaData { - source: Box::new(error::Error::DatasetIdTypeMissMatch), + source: Box::new(error::Error::DataIdTypeMissMatch), }) } } @@ -210,7 +206,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, @@ -225,7 +221,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, diff --git a/services/src/datasets/external/nature40.rs b/services/src/datasets/external/nature40.rs index a6182efa2..c3fe0f980 100644 --- a/services/src/datasets/external/nature40.rs +++ b/services/src/datasets/external/nature40.rs @@ -3,11 +3,11 @@ use std::path::Path; use crate::datasets::listing::ProvenanceOutput; use crate::error::Error; use crate::error::Result; -use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::external::{DataProvider, DataProviderDefinition}; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::util::parsing::{deserialize_base_url, string_or_string_array}; use crate::util::retry::retry; use crate::workflows::workflow::Workflow; @@ -16,8 +16,9 @@ use async_trait::async_trait; use futures::future::join_all; use gdal::DatasetOptions; use gdal::Metadata; -use geoengine_datatypes::dataset::ExternalDatasetId; -use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::ExternalDataId; +use geoengine_datatypes::dataset::LayerId; +use geoengine_datatypes::dataset::{DataId, DataProviderId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::RasterOperator; use geoengine_operators::engine::TypedOperator; @@ -44,7 +45,7 @@ use url::Url; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Nature40DataProviderDefinition { - id: LayerProviderId, + id: DataProviderId, name: String, #[serde(deserialize_with = "deserialize_base_url")] base_url: Url, @@ -75,8 +76,8 @@ impl Default for RequestRetries { #[typetag::serde] #[async_trait] -impl ExternalLayerProviderDefinition for Nature40DataProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl DataProviderDefinition for Nature40DataProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(Nature40DataProvider { id: self.id, base_url: self.base_url, @@ -94,14 +95,14 @@ impl ExternalLayerProviderDefinition for Nature40DataProviderDefinition { self.name.clone() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { self.id } } #[derive(Debug)] pub struct Nature40DataProvider { - id: LayerProviderId, + id: DataProviderId, base_url: Url, user: String, password: String, @@ -140,10 +141,10 @@ struct RasterDbs { } #[async_trait] -impl ExternalLayerProvider for Nature40DataProvider { - async fn provenance(&self, dataset: &DatasetId) -> Result { +impl DataProvider for Nature40DataProvider { + async fn provenance(&self, id: &DataId) -> Result { Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: id.clone(), provenance: None, }) } @@ -186,8 +187,8 @@ impl LayerCollectionProvider for Nature40DataProvider { for band_index in 1..=dataset.raster_count() { listing.push(Ok(CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: self.id, - item: LayerId(format!("{}:{}", db.name.clone(), band_index)), + provider_id: self.id, + layer_id: LayerId(format!("{}:{}", db.name.clone(), band_index)), }, name: db.title.clone(), description: format!( @@ -224,11 +225,11 @@ impl LayerCollectionProvider for Nature40DataProvider { if let Ok(band_index) = band_index.parse::() { (db, band_index) } else { - return Err(Error::InvalidExternalDatasetId { provider: self.id }); + return Err(Error::InvalidExternalDataId { provider: self.id }); } } _ => { - return Err(Error::InvalidExternalDatasetId { provider: self.id }); + return Err(Error::InvalidExternalDataId { provider: self.id }); } }; @@ -248,8 +249,8 @@ impl LayerCollectionProvider for Nature40DataProvider { Ok(Layer { id: ProviderLayerId { - provider: self.id, - item: id.clone(), + provider_id: self.id, + layer_id: id.clone(), }, name: db.title.clone(), description: format!( @@ -263,9 +264,9 @@ impl LayerCollectionProvider for Nature40DataProvider { operator: TypedOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: DatasetId::External(ExternalDatasetId { + data: DataId::External(ExternalDataId { provider_id: self.id, - dataset_id: id.0.clone(), + layer_id: id.clone(), }), }, } @@ -389,17 +390,17 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, > { - let dataset = dataset + let dataset = id .external() .ok_or(geoengine_operators::error::Error::LoadingInfo { - source: Box::new(Error::InvalidExternalDatasetId { provider: self.id }), + source: Box::new(Error::InvalidExternalDataId { provider: self.id }), })?; - let split: Vec<_> = dataset.dataset_id.split(':').collect(); + let split: Vec<_> = dataset.layer_id.0.split(':').collect(); let (db_name, band_index) = match *split.as_slice() { [db, band_index] => { @@ -407,13 +408,13 @@ impl MetaDataProvider { return Err(geoengine_operators::error::Error::LoadingInfo { - source: Box::new(Error::InvalidExternalDatasetId { provider: self.id }), + source: Box::new(Error::InvalidExternalDataId { provider: self.id }), }) } }; @@ -450,7 +451,7 @@ impl { async fn meta_data( &self, - _dataset: &DatasetId, + _id: &DataId, ) -> Result< Box< dyn MetaData< @@ -471,7 +472,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, @@ -485,7 +486,7 @@ mod tests { use std::{fs::File, io::Read, path::PathBuf, str::FromStr}; use geoengine_datatypes::{ - dataset::ExternalDatasetId, + dataset::{ExternalDataId, LayerId}, primitives::{ Measurement, QueryRectangle, SpatialPartition2D, SpatialResolution, TimeInterval, }, @@ -757,7 +758,7 @@ mod tests { expect_lidar_requests(&mut server); let provider = Box::new(Nature40DataProviderDefinition { - id: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), + id: DataProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), name: "Nature40".to_owned(), base_url: Url::parse(&server.url_str("")).unwrap(), user: "geoengine".to_owned(), @@ -786,36 +787,44 @@ mod tests { vec![ CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") - .unwrap(), - item: LayerId("geonode_ortho_muf_1m:1".to_owned()) + provider_id: DataProviderId::from_str( + "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" + ) + .unwrap(), + layer_id: LayerId("geonode_ortho_muf_1m:1".to_owned()) }, name: "MOF Luftbild".to_owned(), description: "Band 1: band1".to_owned(), }), CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") - .unwrap(), - item: LayerId("geonode_ortho_muf_1m:2".to_owned()) + provider_id: DataProviderId::from_str( + "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" + ) + .unwrap(), + layer_id: LayerId("geonode_ortho_muf_1m:2".to_owned()) }, name: "MOF Luftbild".to_owned(), description: "Band 2: band2".to_owned(), }), CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") - .unwrap(), - item: LayerId("geonode_ortho_muf_1m:3".to_owned()) + provider_id: DataProviderId::from_str( + "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" + ) + .unwrap(), + layer_id: LayerId("geonode_ortho_muf_1m:3".to_owned()) }, name: "MOF Luftbild".to_owned(), description: "Band 3: band3".to_owned(), }), CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") - .unwrap(), - item: LayerId("lidar_2018_wetness_1m:1".to_owned()) + provider_id: DataProviderId::from_str( + "2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd" + ) + .unwrap(), + layer_id: LayerId("lidar_2018_wetness_1m:1".to_owned()) }, name: "Topografic Wetness index".to_owned(), description: "Band 1: wetness".to_owned(), @@ -832,7 +841,7 @@ mod tests { expect_lidar_requests(&mut server); let provider = Box::new(Nature40DataProviderDefinition { - id: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), + id: DataProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd").unwrap(), name: "Nature40".to_owned(), base_url: Url::parse(&server.url_str("")).unwrap(), user: "geoengine".to_owned(), @@ -845,10 +854,10 @@ mod tests { let meta: Box> = provider - .meta_data(&DatasetId::External(ExternalDatasetId { - provider_id: LayerProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") + .meta_data(&DataId::External(ExternalDataId { + provider_id: DataProviderId::from_str("2cb964d5-b9fa-4f8f-ab6f-f6c7fb47d4cd") .unwrap(), - dataset_id: "lidar_2018_wetness_1m:1".to_owned(), + layer_id: LayerId("lidar_2018_wetness_1m:1".to_owned()), })) .await .unwrap(); diff --git a/services/src/datasets/external/netcdfcf/error.rs b/services/src/datasets/external/netcdfcf/error.rs index 6f46e23e8..05a69357a 100644 --- a/services/src/datasets/external/netcdfcf/error.rs +++ b/services/src/datasets/external/netcdfcf/error.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use gdal::errors::GdalError; use snafu::Snafu; -use geoengine_datatypes::{dataset::LayerProviderId, error::ErrorSource}; +use geoengine_datatypes::{dataset::DataProviderId, error::ErrorSource}; #[derive(Debug, Snafu)] #[snafu(visibility(pub(crate)))] @@ -85,10 +85,10 @@ pub enum NetCdfCf4DProviderError { CannotCalculateStepsInTimeCoverageInterval { source: geoengine_datatypes::error::Error, }, - InvalidExternalDatasetId { - provider: LayerProviderId, + InvalidExternalDataId { + provider: DataProviderId, }, - InvalidDatasetIdLength { + InvalidDataIdLength { length: usize, }, InvalidDatasetIdFile { diff --git a/services/src/datasets/external/netcdfcf/mod.rs b/services/src/datasets/external/netcdfcf/mod.rs index c24e04523..4fe1521cf 100644 --- a/services/src/datasets/external/netcdfcf/mod.rs +++ b/services/src/datasets/external/netcdfcf/mod.rs @@ -3,8 +3,8 @@ use self::gdalmd::MdGroup; pub use self::overviews::OverviewGeneration; use self::overviews::{create_overviews, METADATA_FILE_NAME}; use crate::datasets::listing::ProvenanceOutput; -use crate::layers::external::ExternalLayerProvider; -use crate::layers::external::ExternalLayerProviderDefinition; +use crate::layers::external::DataProvider; +use crate::layers::external::DataProviderDefinition; use crate::layers::layer::CollectionItem; use crate::layers::layer::Layer; use crate::layers::layer::LayerCollectionListOptions; @@ -12,14 +12,13 @@ use crate::layers::layer::LayerListing; use crate::layers::layer::ProviderLayerId; use crate::layers::listing::LayerCollectionId; use crate::layers::listing::LayerCollectionProvider; -use crate::layers::listing::LayerId; -use crate::projects::{RasterSymbology, Symbology}; +use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; -use crate::{datasets::listing::DatasetListing, util::user_input::Validated}; use async_trait::async_trait; use gdal::{DatasetOptions, GdalOpenFlags}; -use geoengine_datatypes::dataset::LayerProviderId; -use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId}; +use geoengine_datatypes::dataset::DataProviderId; +use geoengine_datatypes::dataset::LayerId; +use geoengine_datatypes::dataset::{DataId, ExternalDataId}; use geoengine_datatypes::operations::image::{Colorizer, RgbaColor}; use geoengine_datatypes::primitives::{ DateTime, DateTimeParseFormat, Measurement, RasterQueryRectangle, TimeGranularity, @@ -29,7 +28,6 @@ use geoengine_datatypes::raster::{GdalGeoTransform, RasterDataType}; use geoengine_datatypes::spatial_reference::SpatialReference; use geoengine_operators::engine::RasterOperator; use geoengine_operators::engine::TypedOperator; -use geoengine_operators::engine::TypedResultDescriptor; use geoengine_operators::source::GdalSource; use geoengine_operators::source::GdalSourceParameters; use geoengine_operators::source::{ @@ -59,8 +57,8 @@ mod overviews; type Result = std::result::Result; /// Singleton Provider with id `1690c483-b17f-4d98-95c8-00a64849cd0b` -pub const NETCDF_CF_PROVIDER_ID: LayerProviderId = - LayerProviderId::from_u128(0x1690_c483_b17f_4d98_95c8_00a6_4849_cd0b); +pub const NETCDF_CF_PROVIDER_ID: DataProviderId = + DataProviderId::from_u128(0x1690_c483_b17f_4d98_95c8_00a6_4849_cd0b); #[derive(Clone, Debug, Serialize, Deserialize)] pub struct NetCdfCfDataProviderDefinition { @@ -77,8 +75,8 @@ pub struct NetCdfCfDataProvider { #[typetag::serde] #[async_trait] -impl ExternalLayerProviderDefinition for NetCdfCfDataProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl DataProviderDefinition for NetCdfCfDataProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(NetCdfCfDataProvider { path: self.path, overviews: self.overviews, @@ -93,7 +91,7 @@ impl ExternalLayerProviderDefinition for NetCdfCfDataProviderDefinition { self.name.clone() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { NETCDF_CF_PROVIDER_ID } } @@ -294,12 +292,12 @@ impl NetCdfCfDataProvider { #[allow(dead_code)] pub(crate) fn listing_from_netcdf( - id: LayerProviderId, + id: DataProviderId, provider_path: &Path, overview_path: Option<&Path>, dataset_path: &Path, compute_stats: bool, - ) -> Result> { + ) -> Result> { let tree = Self::build_netcdf_tree(provider_path, overview_path, dataset_path, compute_stats)?; @@ -330,8 +328,6 @@ impl NetCdfCfDataProvider { let group_names = path.iter().map(|s| s.name.clone()).collect::>(); - let data_type = tail.data_type.context(error::MissingDataType)?; - for entity in &tree.entities { let dataset_id = NetCdfCf4DDatasetId { file_name: tree.file_name.clone(), @@ -339,31 +335,17 @@ impl NetCdfCfDataProvider { entity: entity.id, }; - listings.push(DatasetListing { - id: DatasetId::External(ExternalDatasetId { + listings.push(LayerListing { + id: ProviderLayerId { provider_id: id, - dataset_id: serde_json::to_string(&dataset_id).unwrap_or_default(), - }), + layer_id: LayerId(serde_json::to_string(&dataset_id).unwrap_or_default()), + }, name: format!( "{title}: {group_title_path} > {entity_name}", title = tree.title, entity_name = entity.name ), description: tree.summary.clone(), - tags: vec![], // TODO: where to get from file? - source_operator: "GdalSource".to_owned(), - result_descriptor: TypedResultDescriptor::Raster(RasterResultDescriptor { - data_type, - spatial_reference: tree.spatial_reference.into(), - measurement: derive_measurement(tail.unit.clone()), - no_data_value: None, // we don't want to open the dataset at this point. We should get rid of the result descriptor in the listing in general - time: None, // TODO: determine time - bbox: None, // TODO: determine bbox - }), - symbology: Some(Symbology::Raster(RasterSymbology { - opacity: 1.0, - colorizer: tree.colorizer.clone(), - })), }); } } @@ -375,18 +357,17 @@ impl NetCdfCfDataProvider { fn meta_data( path: &Path, overviews: &Path, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { - let dataset = - dataset - .external() - .ok_or(NetCdfCf4DProviderError::InvalidExternalDatasetId { - provider: NETCDF_CF_PROVIDER_ID, - })?; + let dataset = id + .external() + .ok_or(NetCdfCf4DProviderError::InvalidExternalDataId { + provider: NETCDF_CF_PROVIDER_ID, + })?; let dataset_id: NetCdfCf4DDatasetId = - serde_json::from_str(&dataset.dataset_id).context(error::CannotParseDatasetId)?; + serde_json::from_str(&dataset.layer_id.0).context(error::CannotParseDatasetId)?; // try to load from overviews if let Some(loading_info) = Self::meta_data_from_overviews(overviews, &dataset_id) { @@ -397,7 +378,7 @@ impl NetCdfCfDataProvider { } let dataset_id: NetCdfCf4DDatasetId = - serde_json::from_str(&dataset.dataset_id).context(error::CannotParseDatasetId)?; + serde_json::from_str(&dataset.layer_id.0).context(error::CannotParseDatasetId)?; let path = path.join(&dataset_id.file_name); @@ -846,10 +827,10 @@ enum Metadata { } #[async_trait] -impl ExternalLayerProvider for NetCdfCfDataProvider { - async fn provenance(&self, dataset: &DatasetId) -> crate::error::Result { +impl DataProvider for NetCdfCfDataProvider { + async fn provenance(&self, id: &DataId) -> crate::error::Result { Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: id.clone(), provenance: None, }) } @@ -902,14 +883,7 @@ impl LayerCollectionProvider for NetCdfCfDataProvider { l.into_iter() .map(|l| { CollectionItem::Layer(LayerListing { - id: crate::layers::layer::ProviderLayerId { - provider: NETCDF_CF_PROVIDER_ID, - item: LayerId( - l.id.external() - .expect("listing produces only external datasets") - .dataset_id, - ), - }, + id: l.id, name: l.name, description: l.description, }) @@ -943,8 +917,8 @@ impl LayerCollectionProvider for NetCdfCfDataProvider { async fn get_layer(&self, id: &LayerId) -> crate::error::Result { Ok(Layer { id: ProviderLayerId { - provider: NETCDF_CF_PROVIDER_ID, - item: id.clone(), + provider_id: NETCDF_CF_PROVIDER_ID, + layer_id: id.clone(), }, name: "".to_string(), // TODO: get from file or overview description: "".to_string(), // TODO: get from file or overview @@ -952,9 +926,9 @@ impl LayerCollectionProvider for NetCdfCfDataProvider { operator: TypedOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: DatasetId::External(ExternalDatasetId { + data: DataId::External(ExternalDataId { provider_id: NETCDF_CF_PROVIDER_ID, - dataset_id: id.0.clone(), + layer_id: id.clone(), }), }, } @@ -972,12 +946,12 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, > { - let dataset = dataset.clone(); + let dataset = id.clone(); let path = self.path.clone(); let overviews = self.overviews.clone(); crate::util::spawn_blocking(move || { @@ -998,7 +972,7 @@ impl { async fn meta_data( &self, - _dataset: &DatasetId, + _id: &DataId, ) -> Result< Box< dyn MetaData< @@ -1019,7 +993,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, @@ -1031,16 +1005,22 @@ impl MetaDataProvider entity01".into(), description: "CFake description of test dataset with metric.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[1], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_m.nc", - "groupNames": ["metric_1"], - "entity": 1 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_m.nc", + "groupNames": ["metric_1"], + "entity": 1 + }) + .to_string() + ), + }, name: "Test dataset metric: Random metric 1 > entity02".into(), description: "CFake description of test dataset with metric.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[2], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_m.nc", - "groupNames": ["metric_1"], - "entity": 2 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_m.nc", + "groupNames": ["metric_1"], + "entity": 2 + }) + .to_string() + ), + }, name: "Test dataset metric: Random metric 1 > entity03".into(), description: "CFake description of test dataset with metric.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[3], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_m.nc", - "groupNames": ["metric_2"], - "entity": 0 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_m.nc", + "groupNames": ["metric_2"], + "entity": 0 + }) + .to_string() + ), + }, name: "Test dataset metric: Random metric 2 > entity01".into(), description: "CFake description of test dataset with metric.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[4], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_m.nc", - "groupNames": ["metric_2"], - "entity": 1 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_m.nc", + "groupNames": ["metric_2"], + "entity": 1 + }) + .to_string() + ), + }, name: "Test dataset metric: Random metric 2 > entity02".into(), description: "CFake description of test dataset with metric.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[5], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_m.nc", - "groupNames": ["metric_2"], - "entity": 2 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_m.nc", + "groupNames": ["metric_2"], + "entity": 2 + }) + .to_string() + ), + }, name: "Test dataset metric: Random metric 2 > entity03".into(), description: "CFake description of test dataset with metric.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor, - symbology, } ); } #[tokio::test] async fn test_listing_from_netcdf_sm() { - let provider_id = - LayerProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); + let provider_id = DataProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); let listing = NetCdfCfDataProvider::listing_from_netcdf( provider_id, @@ -1336,69 +1241,40 @@ mod tests { assert_eq!(listing.len(), 20); - let result_descriptor: TypedResultDescriptor = RasterResultDescriptor { - data_type: RasterDataType::I16, - spatial_reference: SpatialReference::new(SpatialReferenceAuthority::Epsg, 3035).into(), - measurement: Measurement::Unitless, - no_data_value: None, - time: None, - bbox: None, - } - .into(); - - let symbology = Some(Symbology::Raster(RasterSymbology { - opacity: 1.0, - colorizer: Colorizer::LinearGradient { - breakpoints: vec![ - (0.0.try_into().unwrap(), RgbaColor::new(68, 1, 84, 255)).into(), - (50.0.try_into().unwrap(), RgbaColor::new(33, 145, 140, 255)).into(), - (100.0.try_into().unwrap(), RgbaColor::new(253, 231, 37, 255)).into(), - ], - no_data_color: RgbaColor::new(0, 0, 0, 0), - default_color: RgbaColor::new(0, 0, 0, 0), - }, - })); - assert_eq!( listing[0], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_sm.nc", - "groupNames": ["scenario_1", "metric_1"], - "entity": 0 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_sm.nc", + "groupNames": ["scenario_1", "metric_1"], + "entity": 0 + }) + .to_string() + ), + }, name: "Test dataset metric and scenario: Sustainability > Random metric 1 > entity01" .into(), description: "Fake description of test dataset with metric and scenario.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[19], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ + layer_id: LayerId(serde_json::json!({ "fileName": "dataset_sm.nc", "groupNames": ["scenario_5", "metric_2"], "entity": 1 }) - .to_string(), - }), + .to_string()), + }, name: "Test dataset metric and scenario: Fossil-fueled Development > Random metric 2 > entity02".into(), description: "Fake description of test dataset with metric and scenario.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor, - symbology, } ); } @@ -1411,14 +1287,16 @@ mod tests { }; let metadata = provider - .meta_data(&DatasetId::External(ExternalDatasetId { + .meta_data(&DataId::External(ExternalDataId { provider_id: NETCDF_CF_PROVIDER_ID, - dataset_id: serde_json::json!({ - "fileName": "dataset_sm.nc", - "groupNames": ["scenario_5", "metric_2"], - "entity": 1 - }) - .to_string(), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_sm.nc", + "groupNames": ["scenario_5", "metric_2"], + "entity": 1 + }) + .to_string(), + ), })) .await .unwrap(); @@ -1522,14 +1400,16 @@ mod tests { .unwrap(); let metadata = provider - .meta_data(&DatasetId::External(ExternalDatasetId { + .meta_data(&DataId::External(ExternalDataId { provider_id: NETCDF_CF_PROVIDER_ID, - dataset_id: serde_json::json!({ - "fileName": "dataset_sm.nc", - "groupNames": ["scenario_5", "metric_2"], - "entity": 1 - }) - .to_string(), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_sm.nc", + "groupNames": ["scenario_5", "metric_2"], + "entity": 1 + }) + .to_string(), + ), })) .await .unwrap(); @@ -1613,8 +1493,7 @@ mod tests { .create_overviews(Path::new("dataset_sm.nc")) .unwrap(); - let provider_id = - LayerProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); + let provider_id = DataProviderId::from_str("bf6bb6ea-5d5d-467d-bad1-267bf3a54470").unwrap(); let listing = NetCdfCfDataProvider::listing_from_netcdf( provider_id, @@ -1627,7 +1506,7 @@ mod tests { assert_eq!(listing.len(), 20); - let result_descriptor: TypedResultDescriptor = RasterResultDescriptor { + let _result_descriptor: TypedResultDescriptor = RasterResultDescriptor { data_type: RasterDataType::I16, spatial_reference: SpatialReference::new(SpatialReferenceAuthority::Epsg, 3035).into(), measurement: Measurement::Unitless, @@ -1637,7 +1516,7 @@ mod tests { } .into(); - let symbology = Some(Symbology::Raster(RasterSymbology { + let _symbology = Some(Symbology::Raster(RasterSymbology { opacity: 1.0, colorizer: Colorizer::LinearGradient { breakpoints: vec![ @@ -1652,44 +1531,38 @@ mod tests { assert_eq!( listing[0], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ - "fileName": "dataset_sm.nc", - "groupNames": ["scenario_1", "metric_1"], - "entity": 0 - }) - .to_string(), - }), + layer_id: LayerId( + serde_json::json!({ + "fileName": "dataset_sm.nc", + "groupNames": ["scenario_1", "metric_1"], + "entity": 0 + }) + .to_string() + ), + }, name: "Test dataset metric and scenario: Sustainability > Random metric 1 > entity01" .into(), description: "Fake description of test dataset with metric and scenario.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor: result_descriptor.clone(), - symbology: symbology.clone(), } ); assert_eq!( listing[19], - DatasetListing { - id: DatasetId::External(ExternalDatasetId { + LayerListing { + id: ProviderLayerId { provider_id, - dataset_id: serde_json::json!({ + layer_id: LayerId(serde_json::json!({ "fileName": "dataset_sm.nc", "groupNames": ["scenario_5", "metric_2"], "entity": 1 }) - .to_string(), - }), + .to_string()), + }, name: "Test dataset metric and scenario: Fossil-fueled Development > Random metric 2 > entity02".into(), description: "Fake description of test dataset with metric and scenario.".into(), - tags: vec![], - source_operator: "GdalSource".into(), - result_descriptor, - symbology, } ); } diff --git a/services/src/datasets/external/nfdi/mod.rs b/services/src/datasets/external/nfdi/mod.rs index 2cfb127e9..5373d0c89 100644 --- a/services/src/datasets/external/nfdi/mod.rs +++ b/services/src/datasets/external/nfdi/mod.rs @@ -2,15 +2,15 @@ use crate::datasets::external::nfdi::metadata::{DataType, GEMetadata, RasterInfo use crate::datasets::listing::{ ProvenanceOutput, }; -use crate::datasets::storage::{Dataset}; use crate::error::{Error, Result, self}; -use crate::layers::external::{ExternalLayerProviderDefinition, ExternalLayerProvider}; +use crate::layers::external::{DataProviderDefinition, DataProvider}; use crate::layers::layer::{LayerCollectionListOptions, CollectionItem, Layer, LayerListing, ProviderLayerId}; -use crate::layers::listing::{LayerCollectionProvider, LayerCollectionId, LayerId}; +use crate::layers::listing::{LayerCollectionProvider, LayerCollectionId}; +use crate::util::operators::source_operator_from_dataset; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use geoengine_datatypes::collections::VectorDataType; -use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataId, ExternalDataId, DataProviderId, LayerId}; use geoengine_datatypes::primitives::{ FeatureDataType, Measurement, RasterQueryRectangle, VectorQueryRectangle, }; @@ -18,7 +18,7 @@ use geoengine_datatypes::spatial_reference::SpatialReferenceOption; use geoengine_operators::engine::{ MetaData, MetaDataProvider, RasterResultDescriptor, ResultDescriptor, TypedResultDescriptor, VectorResultDescriptor, VectorOperator, TypedOperator, RasterOperator, - VectorColumnInfo, + VectorColumnInfo, OperatorName, }; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; use geoengine_operators::source::{ @@ -54,7 +54,7 @@ const URL_REPLACEMENT: &str = "%URL%"; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct NFDIDataProviderDefinition { - id: LayerProviderId, + id: DataProviderId, name: String, api_url: String, project_id: String, @@ -63,8 +63,8 @@ pub struct NFDIDataProviderDefinition { #[typetag::serde] #[async_trait::async_trait] -impl ExternalLayerProviderDefinition for NFDIDataProviderDefinition { - async fn initialize(self: Box) -> Result> { +impl DataProviderDefinition for NFDIDataProviderDefinition { + async fn initialize(self: Box) -> Result> { Ok(Box::new(NFDIDataProvider::new(self).await?)) } @@ -76,7 +76,7 @@ impl ExternalLayerProviderDefinition for NFDIDataProviderDefinition { self.name.clone() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { self.id } } @@ -114,7 +114,7 @@ impl Interceptor for APITokenInterceptor { /// is cheap. #[derive(Debug)] pub struct NFDIDataProvider { - id: LayerProviderId, + id: DataProviderId, project_id: String, project_stub: ProjectServiceClient>, dataset_stub: DatasetServiceClient>, @@ -149,10 +149,10 @@ impl NFDIDataProvider { } /// Extracts the core store id from the given dataset id - fn dataset_nfdi_id(id: &DatasetId) -> Result { + fn dataset_nfdi_id(id: &DataId) -> Result { match id { - DatasetId::External(id) => Ok(id.dataset_id.clone()), - DatasetId::Internal { .. } => Err(Error::InvalidDatasetId), + DataId::External(id) => Ok(id.layer_id.0.clone()), + DataId::Internal { .. } => Err(Error::InvalidDataId), } } @@ -171,7 +171,7 @@ impl NFDIDataProvider { } /// Retrieves information for the datasat with the given id. - async fn dataset_info(&self, id: &DatasetId) -> Result<(Dataset, GEMetadata)> { + async fn dataset_info(&self, id: &DataId) -> Result<(Layer, GEMetadata)> { let id = Self::dataset_nfdi_id(id)?; let mut stub = self.dataset_stub.clone(); @@ -180,51 +180,57 @@ impl NFDIDataProvider { .await? .into_inner(); - resp.dataset.ok_or(Error::InvalidDatasetId).and_then(|ds| { + resp.dataset.ok_or(Error::InvalidDataId).and_then(|ds| { // Extract and parse geoengine metadata let md = Self::extract_metadata(&ds)?; - Ok((self.map_dataset(&ds, &md), md)) + Ok((self.map_layer(&ds, &md)?, md)) }) } /// Maps the `gRPC` dataset representation to geoengine's internal representation. - fn map_dataset( + fn map_layer( &self, ds: &scienceobjectsdb_rust_api::sciobjectsdb::sciobjsdb::api::storage::models::v1::Dataset, md: &GEMetadata, - ) -> Dataset { - let id = DatasetId::External(ExternalDatasetId { + ) -> Result { + let id = ProviderLayerId { provider_id: self.id, - dataset_id: ds.id.clone(), - }); + layer_id: LayerId(ds.id.clone()), + }; // Create type specific infos - let (result_descriptor, source_operator) = match &md.data_type { + let (_result_descriptor, source_operator) = match &md.data_type { DataType::SingleVectorFile(info) => ( TypedResultDescriptor::Vector(Self::create_vector_result_descriptor( md.crs.into(), info, )), - "OgrSource".to_string(), + OgrSource::TYPE_NAME, ), DataType::SingleRasterFile(info) => ( TypedResultDescriptor::Raster(Self::create_raster_result_descriptor( md.crs.into(), info, )), - "GdalSource".to_string(), + GdalSource::TYPE_NAME, ), }; - Dataset { + Ok(Layer { id, name: ds.name.clone(), description: ds.description.clone(), - source_operator, - result_descriptor, + workflow: Workflow { + operator: source_operator_from_dataset( + source_operator, + &DataId::External(ExternalDataId { + provider_id: self.id, + layer_id: LayerId(ds.id.clone()), + }), + )?, + }, symbology: None, - provenance: md.provenance.clone(), - } + }) } /// Creates a result descriptor for vector data @@ -275,7 +281,7 @@ impl NFDIDataProvider { /// Retrieves a file-object from the core-storage. It assumes, that the dataset consists /// only of a single object group with a single object (the file). - async fn get_single_file_object(&self, id: &DatasetId) -> Result { + async fn get_single_file_object(&self, id: &DataId) -> Result { let mut ds_stub = self.dataset_stub.clone(); let group = ds_stub @@ -420,7 +426,7 @@ impl { async fn meta_data( &self, - _dataset: &DatasetId, + _id: &DataId, ) -> geoengine_operators::util::Result< Box< dyn MetaData< @@ -440,17 +446,17 @@ impl MetaDataProvider geoengine_operators::util::Result< Box>, > { - let (_, md) = self.dataset_info(dataset).await.map_err(|e| { + let (_, md) = self.dataset_info(id).await.map_err(|e| { geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), } })?; - let object = self.get_single_file_object(dataset).await.map_err(|e| { + let object = self.get_single_file_object(id).await.map_err(|e| { geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), } @@ -484,17 +490,17 @@ impl MetaDataProvider geoengine_operators::util::Result< Box>, > { - let (_, md) = self.dataset_info(dataset).await.map_err(|e| { + let (_, md) = self.dataset_info(id).await.map_err(|e| { geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), } })?; - let object = self.get_single_file_object(dataset).await.map_err(|e| { + let object = self.get_single_file_object(id).await.map_err(|e| { geoengine_operators::error::Error::DatasetMetaData { source: Box::new(e), } @@ -523,13 +529,13 @@ impl MetaDataProvider Result { - let (ds, _) = self.dataset_info(dataset).await?; +impl DataProvider for NFDIDataProvider { + async fn provenance(&self, id: &DataId) -> Result { + let (_, metadata) = self.dataset_info(id).await?; Ok(ProvenanceOutput { - dataset: dataset.clone(), - provenance: ds.provenance, + data: id.clone(), + provenance: metadata.provenance, }) } @@ -567,8 +573,8 @@ impl LayerCollectionProvider for NFDIDataProvider { .map(|ds| { CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: self.id, - item: LayerId(ds.id), + provider_id: self.id, + layer_id: LayerId(ds.id), }, name: ds.name, description: ds.description, @@ -595,7 +601,7 @@ impl LayerCollectionProvider for NFDIDataProvider { .datasets .into_iter() .find(|ds| ds.id == id.0) - .ok_or(Error::UnknownDatasetId)?; + .ok_or(Error::UnknownDataId)?; let meta_data = Self::extract_metadata(&dataset)?; @@ -603,9 +609,9 @@ impl LayerCollectionProvider for NFDIDataProvider { DataType::SingleVectorFile(_) => TypedOperator::Vector( OgrSource { params: OgrSourceParameters { - dataset: DatasetId::External(ExternalDatasetId { + data: DataId::External(ExternalDataId { provider_id: self.id, - dataset_id: id.0.clone(), + layer_id: id.clone(), }), attribute_projection: None, attribute_filters: None, @@ -616,9 +622,9 @@ impl LayerCollectionProvider for NFDIDataProvider { DataType::SingleRasterFile(_) => TypedOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: DatasetId::External(ExternalDatasetId { + data: DataId::External(ExternalDataId { provider_id: self.id, - dataset_id: id.0.clone(), + layer_id: id.clone(), }), }, } @@ -628,8 +634,8 @@ impl LayerCollectionProvider for NFDIDataProvider { Ok(Layer { id: ProviderLayerId { - provider: self.id, - item: id.clone(), + provider_id: self.id, + layer_id: id.clone(), }, name: dataset.name, description: dataset.description, @@ -782,11 +788,11 @@ mod tests { use crate::datasets::external::nfdi::{ ExpiringDownloadLink, NFDIDataProvider, NFDIDataProviderDefinition, }; - use crate::layers::external::ExternalLayerProvider; + use crate::layers::external::DataProvider; use crate::layers::layer::LayerCollectionListOptions; use crate::layers::listing::LayerCollectionProvider; use futures::StreamExt; - use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; + use geoengine_datatypes::dataset::{DataId, DataProviderId, ExternalDataId, LayerId}; use httptest::responders::status_code; use httptest::{Expectation, Server}; use scienceobjectsdb_rust_api::sciobjectsdb::sciobjsdb::api::storage::models::v1::{ @@ -796,7 +802,7 @@ mod tests { CreateDownloadLinkResponse, GetDatasetObjectGroupsResponse, GetDatasetResponse, GetProjectDatasetsResponse, }; - use serde_json::Value; + use serde_json::{json, Value}; use std::str::FromStr; use tokio::fs::File; use tokio::io::AsyncReadExt; @@ -827,7 +833,7 @@ mod tests { async fn new_provider_with_url(url: String) -> NFDIDataProvider { let def = NFDIDataProviderDefinition { - id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), + id: DataProviderId::from_str(PROVIDER_ID).unwrap(), api_token: TOKEN.to_string(), api_url: url, project_id: PROJECT_ID.to_string(), @@ -977,12 +983,29 @@ mod tests { let provider = new_provider_with_url(addr).await; let md = NFDIDataProvider::extract_metadata(&ds).unwrap(); - let ds = provider.map_dataset(&ds, &md); + let layer = provider.map_layer(&ds, &md).unwrap(); assert!(matches!( md.data_type, super::metadata::DataType::SingleVectorFile(_) )); - assert_eq!("OgrSource".to_string(), ds.source_operator); + assert_eq!( + json!({ + "type": "Vector", + "operator": { + "type": "OgrSource", + "params": { + "data": { + "type": "external", + "providerId": "86a7f7ce-1bab-4ce9-a32b-172c0f958ee0", + "layerId": "C" + }, + "attributeProjection": null, + "attributeFilters": null + } + } + }), + serde_json::to_value(&layer.workflow.operator).unwrap() + ); } #[tokio::test] @@ -1015,9 +1038,24 @@ mod tests { let addr = format!("http://{}", server.address()); let provider = new_provider_with_url(addr).await; - let ds = provider.map_dataset(&ds, &md); - - assert_eq!("GdalSource".to_string(), ds.source_operator); + let layer = provider.map_layer(&ds, &md).unwrap(); + + assert_eq!( + json!({ + "type": "Raster", + "operator": { + "type": "GdalSource", + "params": { + "data": { + "type": "external", + "providerId": "86a7f7ce-1bab-4ce9-a32b-172c0f958ee0", + "layerId": "C" + } + } + } + }), + serde_json::to_value(&layer.workflow.operator).unwrap() + ); } #[tokio::test] @@ -1184,9 +1222,9 @@ mod tests { }), ); - let id = DatasetId::External(ExternalDatasetId { - provider_id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), - dataset_id: DATASET_ID.to_string(), + let id = DataId::External(ExternalDataId { + provider_id: DataProviderId::from_str(PROVIDER_ID).unwrap(), + layer_id: LayerId(DATASET_ID.to_string()), }); let addr = format!("http://{}", server.address()); @@ -1268,9 +1306,9 @@ mod tests { }), ); - let id = DatasetId::External(ExternalDatasetId { - provider_id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), - dataset_id: DATASET_ID.to_string(), + let id = DataId::External(ExternalDataId { + provider_id: DataProviderId::from_str(PROVIDER_ID).unwrap(), + layer_id: LayerId(DATASET_ID.to_string()), }); let addr = format!("http://{}", server.address()); @@ -1383,9 +1421,9 @@ mod tests { .respond_with(responder), ); - let id = DatasetId::External(ExternalDatasetId { - provider_id: LayerProviderId::from_str(PROVIDER_ID).unwrap(), - dataset_id: DATASET_ID.to_string(), + let id = DataId::External(ExternalDataId { + provider_id: DataProviderId::from_str(PROVIDER_ID).unwrap(), + layer_id: LayerId(DATASET_ID.to_string()), }); let provider = new_provider_with_url(addr).await; @@ -1399,7 +1437,7 @@ mod tests { let src = OgrSource { params: OgrSourceParameters { - dataset: id, + data: id, attribute_projection: None, attribute_filters: None, }, diff --git a/services/src/datasets/external/pangaea/mod.rs b/services/src/datasets/external/pangaea/mod.rs index afe7d3f85..5f5bf7c11 100644 --- a/services/src/datasets/external/pangaea/mod.rs +++ b/services/src/datasets/external/pangaea/mod.rs @@ -1,10 +1,10 @@ use crate::datasets::external::pangaea::meta::PangeaMetaData; use crate::datasets::listing::{Provenance, ProvenanceOutput}; -use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::external::{DataProvider, DataProviderDefinition}; use crate::layers::layer::{CollectionItem, Layer, LayerCollectionListOptions}; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataId, DataProviderId, LayerId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{ MetaData, MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor, @@ -19,8 +19,8 @@ use serde::{Deserialize, Serialize}; mod meta; -pub const PANGAEA_PROVIDER_ID: LayerProviderId = - LayerProviderId::from_u128(0xe3b9_3bf3_1bc1_48db_80e8_97cf_b068_5e8d); +pub const PANGAEA_PROVIDER_ID: DataProviderId = + DataProviderId::from_u128(0xe3b9_3bf3_1bc1_48db_80e8_97cf_b068_5e8d); /// The pangaea provider allows to include datasets from /// @@ -33,8 +33,8 @@ pub struct PangaeaDataProviderDefinition { #[typetag::serde] #[async_trait] -impl ExternalLayerProviderDefinition for PangaeaDataProviderDefinition { - async fn initialize(self: Box) -> Result> { +impl DataProviderDefinition for PangaeaDataProviderDefinition { + async fn initialize(self: Box) -> Result> { Ok(Box::new(PangaeaDataProvider::new(self.base_url))) } @@ -46,7 +46,7 @@ impl ExternalLayerProviderDefinition for PangaeaDataProviderDefinition { self.name.clone() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { PANGAEA_PROVIDER_ID } } @@ -67,15 +67,15 @@ impl PangaeaDataProvider { } #[async_trait] -impl ExternalLayerProvider for PangaeaDataProvider { - async fn provenance(&self, dataset: &DatasetId) -> Result { - let doi = dataset +impl DataProvider for PangaeaDataProvider { + async fn provenance(&self, id: &DataId) -> Result { + let doi = id .external() - .ok_or(Error::InvalidDatasetId) + .ok_or(Error::InvalidDataId) .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), })? - .dataset_id; + .layer_id; let pmd: PangeaMetaData = self .client @@ -97,7 +97,7 @@ impl ExternalLayerProvider for PangaeaDataProvider { .await?; Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: id.clone(), provenance: Some(Provenance { citation: citation_text, license: pmd.license.unwrap_or_else(|| "".to_string()), @@ -136,18 +136,18 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, > { - let doi = dataset + let doi = id .external() - .ok_or(Error::InvalidDatasetId) + .ok_or(Error::InvalidDataId) .map_err(|e| geoengine_operators::error::Error::LoadingInfo { source: Box::new(e), })? - .dataset_id; + .layer_id; let pmd: PangeaMetaData = self .client @@ -179,7 +179,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, @@ -195,7 +195,7 @@ impl { async fn meta_data( &self, - _dataset: &DatasetId, + _id: &DataId, ) -> Result< Box< dyn MetaData< @@ -214,13 +214,13 @@ impl mod tests { use crate::datasets::external::pangaea::{PangaeaDataProviderDefinition, PANGAEA_PROVIDER_ID}; use crate::error::Error; - use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; + use crate::layers::external::{DataProvider, DataProviderDefinition}; use futures::StreamExt; use geoengine_datatypes::collections::{ DataCollection, FeatureCollectionInfos, IntoGeometryIterator, MultiPointCollection, MultiPolygonCollection, VectorDataType, }; - use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId}; + use geoengine_datatypes::dataset::{DataId, ExternalDataId, LayerId}; use geoengine_datatypes::primitives::{ BoundingBox2D, Coordinate2D, MultiPointAccess, SpatialResolution, TimeInterval, VectorQueryRectangle, @@ -246,7 +246,7 @@ mod tests { crate::test_data!(String::from("pangaea/") + file_name).into() } - async fn create_provider(server: &Server) -> Result, Error> { + async fn create_provider(server: &Server) -> Result, Error> { Box::new(PangaeaDataProviderDefinition { name: "Pangaea".to_string(), base_url: server.url_str("").strip_suffix('/').unwrap().to_owned(), @@ -255,10 +255,10 @@ mod tests { .await } - fn create_id(doi: &str) -> DatasetId { - DatasetId::External(ExternalDatasetId { + fn create_id(doi: &str) -> DataId { + DataId::External(ExternalDataId { provider_id: PANGAEA_PROVIDER_ID, - dataset_id: doi.to_owned(), + layer_id: LayerId(doi.to_owned()), }) } @@ -434,7 +434,7 @@ mod tests { let src = OgrSource { params: OgrSourceParameters { - dataset: id, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -490,7 +490,7 @@ mod tests { let src = OgrSource { params: OgrSourceParameters { - dataset: id, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -558,7 +558,7 @@ mod tests { let src = OgrSource { params: OgrSourceParameters { - dataset: id, + data: id, attribute_projection: None, attribute_filters: None, }, @@ -622,7 +622,7 @@ mod tests { let src = OgrSource { params: OgrSourceParameters { - dataset: id, + data: id, attribute_projection: None, attribute_filters: None, }, diff --git a/services/src/datasets/in_memory.rs b/services/src/datasets/in_memory.rs index 97d668160..58c2ed0d4 100644 --- a/services/src/datasets/in_memory.rs +++ b/services/src/datasets/in_memory.rs @@ -6,12 +6,12 @@ use crate::error::Result; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::util::operators::source_operator_from_dataset; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; +use geoengine_datatypes::dataset::{DataId, DatasetId, LayerId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_datatypes::util::Identifier; use geoengine_operators::engine::{ @@ -38,11 +38,11 @@ use super::{ struct HashMapDatasetDbBackend { datasets: Vec, ogr_datasets: HashMap< - InternalDatasetId, + DatasetId, StaticMetaData, >, mock_datasets: HashMap< - InternalDatasetId, + DatasetId, StaticMetaData< MockDatasetDataSourceLoadingInfo, VectorResultDescriptor, @@ -50,7 +50,7 @@ struct HashMapDatasetDbBackend { >, >, gdal_datasets: HashMap< - InternalDatasetId, + DatasetId, Box>, >, uploads: HashMap, @@ -65,7 +65,7 @@ impl DatasetDb for HashMapDatasetDb {} #[async_trait] pub trait HashMapStorable: Send + Sync { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor; + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor; } impl DatasetStorer for HashMapDatasetDb { @@ -74,7 +74,7 @@ impl DatasetStorer for HashMapDatasetDb { #[async_trait] impl HashMapStorable for MetaDataDefinition { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { match self { MetaDataDefinition::MockMetaData(d) => d.store(id, db).await, MetaDataDefinition::OgrMetaData(d) => d.store(id, db).await, @@ -90,7 +90,7 @@ impl HashMapStorable for MetaDataDefinition { impl HashMapStorable for StaticMetaData { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -108,7 +108,7 @@ impl HashMapStorable VectorQueryRectangle, > { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -120,7 +120,7 @@ impl HashMapStorable #[async_trait] impl HashMapStorable for GdalMetaDataRegular { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -132,7 +132,7 @@ impl HashMapStorable for GdalMetaDataRegular { #[async_trait] impl HashMapStorable for GdalMetaDataStatic { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -144,7 +144,7 @@ impl HashMapStorable for GdalMetaDataStatic { #[async_trait] impl HashMapStorable for GdalMetadataNetCdfCf { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -156,7 +156,7 @@ impl HashMapStorable for GdalMetadataNetCdfCf { #[async_trait] impl HashMapStorable for GdalMetaDataList { - async fn store(&self, id: InternalDatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &HashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -175,15 +175,11 @@ impl DatasetStore for HashMapDatasetDb { meta_data: Box, ) -> Result { let dataset = dataset.user_input; - let id = dataset - .id - .unwrap_or_else(|| InternalDatasetId::new().into()); - let result_descriptor = meta_data - .store(id.internal().expect("from AddDataset"), self) - .await; + let id = dataset.id.unwrap_or_else(DatasetId::new); + let result_descriptor = meta_data.store(id, self).await; let d: Dataset = Dataset { - id: id.clone(), + id, name: dataset.name, description: dataset.description, result_descriptor, @@ -265,7 +261,7 @@ impl DatasetProvider for HashMapDatasetDb { .iter() .find(|d| d.id == *dataset) .map(|d| ProvenanceOutput { - dataset: d.id.clone(), + data: d.id.into(), provenance: d.provenance.clone(), }) .ok_or(error::Error::UnknownDatasetId) @@ -284,7 +280,7 @@ impl async fn session_meta_data( &self, _session: &SimpleSession, - dataset: &DatasetId, + id: &DataId, ) -> Result< Box< dyn MetaData< @@ -299,12 +295,8 @@ impl .read() .await .mock_datasets - .get( - &dataset - .internal() - .ok_or(error::Error::DatasetIdTypeMissMatch)?, - ) - .ok_or(error::Error::UnknownDatasetId)? + .get(&id.internal().ok_or(error::Error::DataIdTypeMissMatch)?) + .ok_or(error::Error::UnknownDataId)? .clone(), )) } @@ -322,7 +314,7 @@ impl async fn session_meta_data( &self, _session: &SimpleSession, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { Ok(Box::new( @@ -330,11 +322,12 @@ impl .read() .await .ogr_datasets - .get(&dataset.internal().ok_or( - geoengine_operators::error::Error::DatasetMetaData { - source: Box::new(error::Error::DatasetIdTypeMissMatch), - }, - )?) + .get( + &id.internal() + .ok_or(geoengine_operators::error::Error::DatasetMetaData { + source: Box::new(error::Error::DataIdTypeMissMatch), + })?, + ) .ok_or(geoengine_operators::error::Error::DatasetMetaData { source: Box::new(error::Error::UnknownDatasetId), })? @@ -355,12 +348,10 @@ impl async fn session_meta_data( &self, _session: &SimpleSession, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { - let id = dataset - .internal() - .ok_or(error::Error::DatasetIdTypeMissMatch)?; + let id = id.internal().ok_or(error::Error::DataIdTypeMissMatch)?; Ok(self .backend @@ -417,13 +408,9 @@ impl LayerCollectionProvider for HashMapDatasetDb { .map(|d| { CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: DATASET_DB_LAYER_PROVIDER_ID, + provider_id: DATASET_DB_LAYER_PROVIDER_ID, // use the dataset id also as layer id, TODO: maybe prefix it? - item: LayerId( - d.id.internal() - .expect("Dataset DB contains only internal datasets") - .to_string(), - ), + layer_id: LayerId(d.id.to_string()), }, name: d.name.clone(), description: d.description.clone(), @@ -439,9 +426,7 @@ impl LayerCollectionProvider for HashMapDatasetDb { } async fn get_layer(&self, id: &LayerId) -> Result { - let dataset_id = DatasetId::Internal { - dataset_id: InternalDatasetId::from_str(&id.0)?, - }; + let dataset_id = DatasetId::from_str(&id.0)?; let backend = self.backend.read().await; @@ -451,12 +436,12 @@ impl LayerCollectionProvider for HashMapDatasetDb { .find(|d| d.id == dataset_id) .ok_or(error::Error::UnknownDatasetId)?; - let operator = source_operator_from_dataset(&dataset.source_operator, &dataset.id)?; + let operator = source_operator_from_dataset(&dataset.source_operator, &dataset.id.into())?; Ok(Layer { id: ProviderLayerId { - provider: DATASET_DB_LAYER_PROVIDER_ID, - item: id.clone(), + provider_id: DATASET_DB_LAYER_PROVIDER_ID, + layer_id: id.clone(), }, name: dataset.name.clone(), description: dataset.description.clone(), @@ -528,7 +513,7 @@ mod tests { let meta: Box< dyn MetaData, - > = exe_ctx.meta_data(&id).await?; + > = exe_ctx.meta_data(&id.into()).await?; assert_eq!( meta.result_descriptor().await?, diff --git a/services/src/datasets/listing.rs b/services/src/datasets/listing.rs index 7171d10d3..0881e4119 100644 --- a/services/src/datasets/listing.rs +++ b/services/src/datasets/listing.rs @@ -6,7 +6,7 @@ use crate::projects::Symbology; use crate::util::config::{get_config_element, DatasetService}; use crate::util::user_input::{UserInput, Validated}; use async_trait::async_trait; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{ MetaData, RasterResultDescriptor, ResultDescriptor, TypedResultDescriptor, @@ -80,7 +80,7 @@ where async fn session_meta_data( &self, session: &S, - dataset: &DatasetId, + id: &DataId, ) -> Result>>; } @@ -111,7 +111,7 @@ pub trait DatasetProvider: #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] pub struct ProvenanceOutput { - pub dataset: DatasetId, + pub data: DataId, pub provenance: Option, } diff --git a/services/src/datasets/storage.rs b/services/src/datasets/storage.rs index 0f7e598bb..802131996 100644 --- a/services/src/datasets/storage.rs +++ b/services/src/datasets/storage.rs @@ -8,7 +8,7 @@ use crate::layers::listing::LayerCollectionProvider; use crate::projects::Symbology; use crate::util::user_input::{UserInput, Validated}; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataProviderId, DatasetId}; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_operators::engine::MetaData; use geoengine_operators::source::{GdalMetaDataList, GdalMetadataNetCdfCf}; @@ -25,8 +25,8 @@ use uuid::Uuid; use super::listing::Provenance; -pub const DATASET_DB_LAYER_PROVIDER_ID: LayerProviderId = - LayerProviderId::from_u128(0xac50_ed0d_c9a0_41f8_9ce8_35fc_9e38_299b); +pub const DATASET_DB_LAYER_PROVIDER_ID: DataProviderId = + DataProviderId::from_u128(0xac50_ed0d_c9a0_41f8_9ce8_35fc_9e38_299b); pub const DATASET_DB_ROOT_COLLECTION_ID: Uuid = Uuid::from_u128(0x5460_73b6_d535_4205_b601_9967_5c9f_6dd7); @@ -46,7 +46,7 @@ pub struct Dataset { impl Dataset { pub fn listing(&self) -> DatasetListing { DatasetListing { - id: self.id.clone(), + id: self.id, name: self.name.clone(), description: self.description.clone(), tags: vec![], // TODO diff --git a/services/src/error.rs b/services/src/error.rs index 0377090ff..8a5b12389 100644 --- a/services/src/error.rs +++ b/services/src/error.rs @@ -2,10 +2,8 @@ use crate::{datasets::external::netcdfcf::NetCdfCf4DProviderError, handlers::Err use crate::{layers::listing::LayerCollectionId, workflows::workflow::WorkflowId}; use actix_web::http::StatusCode; use actix_web::HttpResponse; -use geoengine_datatypes::{ - dataset::{DatasetId, LayerProviderId}, - spatial_reference::SpatialReferenceOption, -}; +use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::{dataset::DataProviderId, spatial_reference::SpatialReferenceOption}; use snafu::prelude::*; use strum::IntoStaticStr; use tonic::Status; @@ -143,11 +141,13 @@ pub enum Error { MissingSettingsDirectory, - DatasetIdTypeMissMatch, - UnknownDatasetId, + DataIdTypeMissMatch, + UnknownDataId, UnknownProviderId, MissingDatasetId, + UnknownDatasetId, + #[snafu(display("Permission denied for dataset with id {:?}", dataset))] DatasetPermissionDenied { dataset: DatasetId, @@ -225,10 +225,11 @@ pub enum Error { PangaeaNoTsv, GfbioMissingAbcdField, - ExpectedExternalDatasetId, - InvalidExternalDatasetId { - provider: LayerProviderId, + ExpectedExternalDataId, + InvalidExternalDataId { + provider: DataProviderId, }, + InvalidDataId, #[cfg(feature = "nature40")] Nature40UnknownRasterDbname, @@ -318,7 +319,6 @@ pub enum Error { NetCdfCf4DProvider { source: NetCdfCf4DProviderError, }, - #[cfg(feature = "ebv")] #[snafu(context(false))] EbvHandler { diff --git a/services/src/handlers/datasets.rs b/services/src/handlers/datasets.rs index ad5f2d772..599da9796 100644 --- a/services/src/handlers/datasets.rs +++ b/services/src/handlers/datasets.rs @@ -26,7 +26,7 @@ use gdal::{vector::Layer, Dataset}; use gdal::{vector::OGRFieldType, DatasetOptions}; use geoengine_datatypes::{ collections::VectorDataType, - dataset::InternalDatasetId, + dataset::DatasetId, primitives::{FeatureDataType, Measurement, VectorQueryRectangle}, spatial_reference::{SpatialReference, SpatialReferenceOption}, }; @@ -125,13 +125,13 @@ async fn list_datasets_handler( /// } /// ``` async fn get_dataset_handler( - dataset: web::Path, + dataset: web::Path, session: C::Session, ctx: web::Data, ) -> Result { let dataset = ctx .dataset_db_ref() - .load(&session, &dataset.into_inner().into()) + .load(&session, &dataset.into_inner()) .await?; Ok(web::Json(dataset)) } @@ -710,7 +710,7 @@ mod tests { use geoengine_datatypes::collections::{ GeometryCollection, MultiPointCollection, VectorDataType, }; - use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; + use geoengine_datatypes::dataset::DatasetId; use geoengine_datatypes::primitives::{BoundingBox2D, SpatialResolution}; use geoengine_datatypes::raster::{GridShape2D, TilingSpecification}; use geoengine_datatypes::spatial_reference::SpatialReferenceOption; @@ -740,10 +740,7 @@ mod tests { bbox: None, }; - let id = DatasetId::Internal { - dataset_id: InternalDatasetId::from_str("370e99ec-9fd8-401d-828d-d67b431a8742") - .unwrap(), - }; + let id = DatasetId::from_str("370e99ec-9fd8-401d-828d-d67b431a8742")?; let ds = AddDataset { id: Some(id), name: "OgrDataset".to_string(), @@ -776,10 +773,8 @@ mod tests { .add_dataset(&SimpleSession::default(), ds.validated()?, Box::new(meta)) .await?; - let id2 = DatasetId::Internal { - dataset_id: InternalDatasetId::from_str("370e99ec-9fd8-401d-828d-d67b431a8742") - .unwrap(), - }; + let id2 = DatasetId::from_str("370e99ec-9fd8-401d-828d-d67b431a8742")?; + let ds = AddDataset { id: Some(id2), name: "OgrDataset2".to_string(), @@ -831,10 +826,7 @@ mod tests { assert_eq!( read_body_string(res).await, json!([{ - "id": { - "type": "internal", - "datasetId": "370e99ec-9fd8-401d-828d-d67b431a8742" - }, + "id": "370e99ec-9fd8-401d-828d-d67b431a8742", "name": "OgrDataset2", "description": "My Ogr dataset2", "tags": [], @@ -870,10 +862,7 @@ mod tests { "text": null } }, { - "id": { - "type": "internal", - "datasetId": "370e99ec-9fd8-401d-828d-d67b431a8742" - }, + "id": "370e99ec-9fd8-401d-828d-d67b431a8742", "name": "OgrDataset", "description": "My Ogr dataset", "tags": [], @@ -1018,7 +1007,7 @@ mod tests { ) -> Result> { OgrSource { params: OgrSourceParameters { - dataset: dataset_id, + data: dataset_id.into(), attribute_projection: None, attribute_filters: None, }, @@ -1602,7 +1591,7 @@ mod tests { .await?; let req = actix_web::test::TestRequest::get() - .uri(&format!("/dataset/internal/{}", id.internal().unwrap())) + .uri(&format!("/dataset/internal/{}", id)) .append_header((header::CONTENT_LENGTH, 0)) .append_header((header::AUTHORIZATION, Bearer::new(session_id.to_string()))); let res = send_test_request(req, ctx).await; @@ -1614,10 +1603,7 @@ mod tests { assert_eq!( res_body, json!({ - "id": { - "type": "internal", - "datasetId": id.internal().unwrap() - }, + "id": id, "name": "OgrDataset", "description": "My Ogr dataset", "resultDescriptor": { diff --git a/services/src/handlers/ebv.rs b/services/src/handlers/ebv.rs index 1c15ee070..0cf5f4e82 100644 --- a/services/src/handlers/ebv.rs +++ b/services/src/handlers/ebv.rs @@ -8,7 +8,7 @@ use crate::datasets::external::netcdfcf::{ NetCdfOverview, OverviewGeneration, NETCDF_CF_PROVIDER_ID, }; use crate::error::Result; -use crate::layers::external::ExternalLayerProvider; +use crate::layers::external::DataProvider; use crate::layers::storage::LayerProviderDb; use crate::tasks::{Task, TaskContext, TaskManager, TaskStatusInfo}; use crate::{contexts::Context, datasets::external::netcdfcf::NetCdfCfDataProvider}; @@ -16,7 +16,7 @@ use actix_web::{ web::{self, ServiceConfig}, FromRequest, Responder, }; -use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::dataset::DataProviderId; use geoengine_datatypes::error::{BoxedResultExt, ErrorSource}; use log::{debug, warn}; use serde::{Deserialize, Serialize}; @@ -118,9 +118,9 @@ pub enum EbvError { #[snafu(display("Cannot lookup dataset with id {id}"))] CannotLookupDataset { id: usize }, #[snafu(display("Cannot find NetCdfCf provider with id {id}"))] - NoNetCdfCfProviderForId { id: LayerProviderId }, + NoNetCdfCfProviderForId { id: DataProviderId }, #[snafu(display("NetCdfCf provider with id {id} cannot list files"))] - CdfCfProviderCannotListFiles { id: LayerProviderId }, + CdfCfProviderCannotListFiles { id: DataProviderId }, #[snafu(display("Internal server error"))] Internal { source: Box }, } @@ -267,7 +267,7 @@ async fn get_dataset_metadata(base_url: &BaseUrl, id: usize) -> Result Result + Send + 'static, { - let provider: Box = ctx + let provider: Box = ctx .layer_provider_db_ref() .layer_provider(NETCDF_CF_PROVIDER_ID) .await diff --git a/services/src/handlers/gfbio.rs b/services/src/handlers/gfbio.rs index 443bc83fc..ada743a9b 100644 --- a/services/src/handlers/gfbio.rs +++ b/services/src/handlers/gfbio.rs @@ -5,7 +5,7 @@ use crate::util::config::{get_config_element, GFBio}; use actix_web::{web, FromRequest, Responder}; use futures::stream::FuturesUnordered; use futures::stream::StreamExt; -use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId}; +use geoengine_datatypes::dataset::{DataId, ExternalDataId, LayerId}; use geoengine_datatypes::primitives::{DateTime, VectorQueryRectangle}; use geoengine_operators::engine::{ MetaDataProvider, TypedResultDescriptor, VectorResultDescriptor, @@ -142,9 +142,9 @@ impl Basket { }; } - let id = DatasetId::External(ExternalDatasetId { + let id = DataId::External(ExternalDataId { provider_id: PANGAEA_PROVIDER_ID, - dataset_id: entry.doi, + layer_id: LayerId(entry.doi), }); let mdp = ec as &dyn MetaDataProvider< OgrSourceDataset, @@ -215,9 +215,9 @@ impl Basket { } }; - let id = DatasetId::External(ExternalDatasetId { + let id = DataId::External(ExternalDataId { provider_id: GFBIO_PROVIDER_ID, - dataset_id: sg_id.to_string(), + layer_id: LayerId(sg_id.to_string()), }); let mdp = provider @@ -282,7 +282,7 @@ impl Basket { async fn generate_loading_info( title: String, - id: DatasetId, + id: DataId, mdp: &dyn MetaDataProvider, filter: Option>, ) -> BasketEntry { @@ -333,7 +333,7 @@ enum BasketEntryStatus { #[derive(Serialize, Debug)] #[serde(rename_all = "camelCase")] struct BasketEntryLoadingDetails { - dataset_id: DatasetId, + dataset_id: DataId, source_operator: String, result_descriptor: TypedResultDescriptor, #[serde(skip_serializing_if = "Option::is_none")] @@ -435,7 +435,7 @@ mod tests { BasketInternal, TypedBasketEntry, }; use geoengine_datatypes::collections::VectorDataType; - use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; + use geoengine_datatypes::dataset::{DataId, DataProviderId, ExternalDataId, LayerId}; use geoengine_datatypes::spatial_reference::{SpatialReference, SpatialReferenceOption}; use geoengine_operators::engine::{TypedResultDescriptor, VectorResultDescriptor}; use geoengine_operators::source::AttributeFilter; @@ -477,9 +477,9 @@ mod tests { #[test] fn basket_entry_serialization_ok() { - let id = DatasetId::External(ExternalDatasetId { - provider_id: LayerProviderId(Uuid::default()), - dataset_id: "1".to_string(), + let id = DataId::External(ExternalDataId { + provider_id: DataProviderId(Uuid::default()), + layer_id: LayerId("1".to_string()), }); let be = BasketEntry { @@ -520,9 +520,9 @@ mod tests { #[test] fn basket_entry_serialization_ok_with_filter() { - let id = DatasetId::External(ExternalDatasetId { - provider_id: LayerProviderId(Uuid::default()), - dataset_id: "1".to_string(), + let id = DataId::External(ExternalDataId { + provider_id: DataProviderId(Uuid::default()), + layer_id: LayerId("1".to_string()), }); let be = BasketEntry { diff --git a/services/src/handlers/layers.rs b/services/src/handlers/layers.rs index cfd4594fe..d9fd72314 100644 --- a/services/src/handlers/layers.rs +++ b/services/src/handlers/layers.rs @@ -1,10 +1,10 @@ use actix_web::{web, FromRequest, Responder}; -use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::dataset::{DataProviderId, LayerId}; use crate::error::Result; use crate::layers::layer::{CollectionItem, LayerCollectionListing, ProviderLayerCollectionId}; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::layers::storage::{LayerProviderDb, LayerProviderListingOptions}; use crate::util::user_input::UserInput; use crate::{contexts::Context, layers::layer::LayerCollectionListOptions}; @@ -34,8 +34,8 @@ async fn list_root_collections_handler( if options.offset == 0 && options.limit > 0 { providers.push(CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: crate::datasets::storage::DATASET_DB_LAYER_PROVIDER_ID, - item: LayerCollectionId( + provider_id: crate::datasets::storage::DATASET_DB_LAYER_PROVIDER_ID, + collection_id: LayerCollectionId( crate::datasets::storage::DATASET_DB_ROOT_COLLECTION_ID.to_string(), ), }, @@ -49,8 +49,8 @@ async fn list_root_collections_handler( if options.offset <= 1 && options.limit > 1 { providers.push(CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: crate::layers::storage::INTERNAL_PROVIDER_ID, - item: LayerCollectionId( + provider_id: crate::layers::storage::INTERNAL_PROVIDER_ID, + collection_id: LayerCollectionId( crate::layers::storage::INTERNAL_LAYER_DB_ROOT_COLLECTION_ID.to_string(), ), }, @@ -77,8 +77,8 @@ async fn list_root_collections_handler( let provider = external.layer_provider(provider_listing.id).await?; providers.push(CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: provider_listing.id, - item: provider.root_collection_id().await?, + provider_id: provider_listing.id, + collection_id: provider.root_collection_id().await?, }, name: provider_listing.name, description: provider_listing.description, @@ -90,7 +90,7 @@ async fn list_root_collections_handler( async fn list_collection_handler( ctx: web::Data, - path: web::Path<(LayerProviderId, LayerCollectionId)>, + path: web::Path<(DataProviderId, LayerCollectionId)>, options: web::Query, ) -> Result { let (provider, item) = path.into_inner(); @@ -125,7 +125,7 @@ async fn list_collection_handler( async fn layer_handler( ctx: web::Data, - path: web::Path<(LayerProviderId, LayerId)>, + path: web::Path<(DataProviderId, LayerId)>, ) -> Result { let (provider, item) = path.into_inner(); diff --git a/services/src/handlers/wfs.rs b/services/src/handlers/wfs.rs index d6bf7917e..26b7bc37f 100644 --- a/services/src/handlers/wfs.rs +++ b/services/src/handlers/wfs.rs @@ -603,7 +603,7 @@ mod tests { use actix_web::http::header; use actix_web::{http::Method, test}; use actix_web_httpauth::headers::authorization::Bearer; - use geoengine_datatypes::dataset::DatasetId; + use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::raster::{GridShape2D, TilingSpecification}; use geoengine_datatypes::test_data; use geoengine_datatypes::util::test::TestDefault; @@ -1064,13 +1064,16 @@ x;y let session_id = ctx.default_session_ref().await.id(); - let ndvi_id = - add_dataset_definition_to_datasets(&ctx, test_data!("dataset_defs/ndvi.json")).await; - let ne_10m_ports_id = add_dataset_definition_to_datasets( + let ndvi_id: DataId = + add_dataset_definition_to_datasets(&ctx, test_data!("dataset_defs/ndvi.json")) + .await + .into(); + let ne_10m_ports_id: DataId = add_dataset_definition_to_datasets( &ctx, test_data!("dataset_defs/points_with_time.json"), ) - .await; + .await + .into(); let workflow = serde_json::json!({ "type": "Vector", @@ -1087,14 +1090,14 @@ x;y "vector": { "type": "OgrSource", "params": { - "dataset": ne_10m_ports_id, + "data": ne_10m_ports_id, "attributeProjection": null } }, "rasters": [{ "type": "GdalSource", "params": { - "dataset": ndvi_id, + "data": ndvi_id, } }], } diff --git a/services/src/handlers/workflows.rs b/services/src/handlers/workflows.rs index f417e3105..7b1a15e0d 100755 --- a/services/src/handlers/workflows.rs +++ b/services/src/handlers/workflows.rs @@ -6,6 +6,7 @@ use crate::datasets::upload::{UploadId, UploadRootPath}; use crate::error; use crate::error::Result; use crate::handlers::Context; +use crate::layers::storage::LayerProviderDb; use crate::util::config::get_config_element; use crate::util::user_input::UserInput; use crate::util::IdResponse; @@ -13,11 +14,11 @@ use crate::workflows::registry::WorkflowRegistry; use crate::workflows::workflow::{Workflow, WorkflowId}; use actix_web::{web, FromRequest, Responder}; use futures::future::join_all; -use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; +use geoengine_datatypes::dataset::{DataId, DatasetId}; use geoengine_datatypes::primitives::{AxisAlignedRectangle, RasterQueryRectangle}; use geoengine_datatypes::spatial_reference::SpatialReference; use geoengine_datatypes::util::Identifier; -use geoengine_operators::engine::{OperatorDatasets, TypedOperator, TypedResultDescriptor}; +use geoengine_operators::engine::{OperatorData, TypedOperator, TypedResultDescriptor}; use geoengine_operators::source::{ FileNotFoundHandling, GdalDatasetGeoTransform, GdalDatasetParameters, GdalMetaDataStatic, }; @@ -225,7 +226,7 @@ async fn get_workflow_provenance_handler( ) -> Result { let workflow = ctx.workflow_registry_ref().load(&id.into_inner()).await?; - let datasets = workflow.operator.datasets(); + let datasets = workflow.operator.data_ids(); let db = ctx.dataset_db_ref(); let providers = ctx.layer_provider_db_ref(); @@ -246,12 +247,12 @@ async fn get_workflow_provenance_handler( async fn resolve_provenance( session: &C::Session, datasets: &C::DatasetDB, - _providers: &C::LayerProviderDB, - id: &DatasetId, + providers: &C::LayerProviderDB, + id: &DataId, ) -> Result { match id { - DatasetId::Internal { dataset_id: _ } => datasets.provenance(session, id).await, - DatasetId::External(_) => todo!(), + DataId::Internal { dataset_id } => datasets.provenance(session, dataset_id).await, + DataId::External(e) => providers.layer_provider(e.provider_id).await?.provenance(id).await, } } @@ -316,7 +317,7 @@ struct RasterDatasetFromWorkflowResult { /// ```text /// { /// "upload": "3086f494-d5a4-4b51-a14b-3b29f8bf7bb0", -/// "dataset": { +/// "data": { /// "type": "internal", /// "datasetId": "94230f0b-4e8a-4cba-9adc-3ace837fe5d4" /// } @@ -401,7 +402,7 @@ async fn create_dataset( ctx: &C, session: ::Session, ) -> Result { - let dataset_id = InternalDatasetId::new().into(); + let dataset_id = DatasetId::new(); let dataset_definition = DatasetDefinition { properties: AddDataset { id: Some(dataset_id), @@ -890,7 +891,7 @@ mod tests { operator: TypedOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: dataset.clone(), + data: dataset.into(), }, } .boxed(), @@ -915,9 +916,9 @@ mod tests { assert_eq!( serde_json::from_str::(&res_body).unwrap(), serde_json::json!([{ - "dataset": { + "data": { "type": "internal", - "datasetId": dataset.internal().unwrap().to_string() + "datasetId": dataset.to_string() }, "provenance": { "citation": "Sample Citation", @@ -950,7 +951,7 @@ mod tests { operator: TypedOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: dataset.clone(), + data: dataset.into(), }, } .boxed(), @@ -1007,7 +1008,7 @@ mod tests { // query the newly created dataset let op = GdalSource { params: GdalSourceParameters { - dataset: response.dataset.clone(), + data: response.dataset.into(), }, } .boxed(); @@ -1069,7 +1070,7 @@ mod tests { "source": { "type": "GdalSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "36574dc3-560a-4b09-9d22-d5945f2b8093" } diff --git a/services/src/layers/external.rs b/services/src/layers/external.rs index 2739defa5..3a213706f 100644 --- a/services/src/layers/external.rs +++ b/services/src/layers/external.rs @@ -1,5 +1,5 @@ use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataId, DataProviderId}; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_operators::engine::{ MetaDataProvider, RasterResultDescriptor, VectorResultDescriptor, @@ -14,11 +14,11 @@ use super::listing::LayerCollectionProvider; #[typetag::serde(tag = "type")] #[async_trait] -pub trait ExternalLayerProviderDefinition: - CloneableLayerProviderDefinition + Send + Sync + std::fmt::Debug +pub trait DataProviderDefinition: + CloneableDataProviderDefinition + Send + Sync + std::fmt::Debug { /// create the actual provider for data listing and access - async fn initialize(self: Box) -> Result>; + async fn initialize(self: Box) -> Result>; /// the type of the provider fn type_name(&self) -> String; @@ -27,24 +27,24 @@ pub trait ExternalLayerProviderDefinition: fn name(&self) -> String; /// id of the provider - fn id(&self) -> LayerProviderId; + fn id(&self) -> DataProviderId; } -pub trait CloneableLayerProviderDefinition { - fn clone_boxed_provider(&self) -> Box; +pub trait CloneableDataProviderDefinition { + fn clone_boxed_provider(&self) -> Box; } -impl CloneableLayerProviderDefinition for T +impl CloneableDataProviderDefinition for T where - T: 'static + ExternalLayerProviderDefinition + Clone, + T: 'static + DataProviderDefinition + Clone, { - fn clone_boxed_provider(&self) -> Box { + fn clone_boxed_provider(&self) -> Box { Box::new(self.clone()) } } -impl Clone for Box { - fn clone(&self) -> Box { +impl Clone for Box { + fn clone(&self) -> Box { self.clone_boxed_provider() } } @@ -56,7 +56,7 @@ impl Clone for Box { // checks that the necessary information is present and how they are incorporated in // the requests. #[async_trait] -pub trait ExternalLayerProvider: LayerCollectionProvider +pub trait DataProvider: LayerCollectionProvider + MetaDataProvider + MetaDataProvider + MetaDataProvider @@ -64,9 +64,8 @@ pub trait ExternalLayerProvider: LayerCollectionProvider + Sync + std::fmt::Debug { - // TODO: datasetId should be named something else because there are not external datasets anymore, only external layers - // TODO: rename trait ProvenanceProvider or smth? - async fn provenance(&self, dataset: &DatasetId) -> Result; + // TODO: unify provenance method for internal and external provider as a separate trait. We need to figure out session handling before, though. + async fn provenance(&self, id: &DataId) -> Result; /// Propagates `Any`-casting to the underlying provider fn as_any(&self) -> &dyn std::any::Any; diff --git a/services/src/layers/layer.rs b/services/src/layers/layer.rs index 58e42d3ad..d5ab98fcd 100644 --- a/services/src/layers/layer.rs +++ b/services/src/layers/layer.rs @@ -1,23 +1,25 @@ use serde::{Deserialize, Serialize}; -use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::dataset::{DataProviderId, LayerId}; use crate::{ error::Result, projects::Symbology, util::user_input::UserInput, workflows::workflow::Workflow, }; -use super::listing::{LayerCollectionId, LayerId}; +use super::listing::LayerCollectionId; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] pub struct ProviderLayerId { - pub provider: LayerProviderId, - pub item: LayerId, + pub provider_id: DataProviderId, + pub layer_id: LayerId, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] pub struct ProviderLayerCollectionId { - pub provider: LayerProviderId, - pub item: LayerCollectionId, + pub provider_id: DataProviderId, + pub collection_id: LayerCollectionId, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] diff --git a/services/src/layers/listing.rs b/services/src/layers/listing.rs index 8a4394f9c..434c4feac 100644 --- a/services/src/layers/listing.rs +++ b/services/src/layers/listing.rs @@ -1,6 +1,7 @@ use std::fmt; use async_trait::async_trait; +use geoengine_datatypes::dataset::LayerId; use crate::error::Result; use crate::util::user_input::Validated; @@ -9,15 +10,6 @@ use super::layer::{CollectionItem, Layer, LayerCollectionListOptions}; use serde::{Deserialize, Serialize}; -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] -pub struct LayerId(pub String); - -impl fmt::Display for LayerId { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) - } -} - #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] pub struct LayerCollectionId(pub String); diff --git a/services/src/layers/storage.rs b/services/src/layers/storage.rs index c649b2035..ac6d144b6 100644 --- a/services/src/layers/storage.rs +++ b/services/src/layers/storage.rs @@ -2,17 +2,17 @@ use std::cmp::Ordering; use std::collections::HashMap; use std::sync::Arc; -use super::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use super::external::{DataProvider, DataProviderDefinition}; use super::layer::{ AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionListOptions, LayerCollectionListing, LayerListing, ProviderLayerCollectionId, ProviderLayerId, }; -use super::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use super::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::error::{Error, Result}; use crate::util::user_input::UserInput; use crate::{contexts::Db, util::user_input::Validated}; use async_trait::async_trait; -use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::dataset::{DataProviderId, LayerId}; use serde::{Deserialize, Serialize}; use snafu::Snafu; use tokio::sync::RwLock; @@ -26,8 +26,8 @@ pub enum LayerDbError { NoLayerForGivenId { id: LayerId }, } -pub const INTERNAL_PROVIDER_ID: LayerProviderId = - LayerProviderId::from_u128(0xce5e_84db_cbf9_48a2_9a32_d4b7_cc56_ea74); +pub const INTERNAL_PROVIDER_ID: DataProviderId = + DataProviderId::from_u128(0xce5e_84db_cbf9_48a2_9a32_d4b7_cc56_ea74); pub const INTERNAL_LAYER_DB_ROOT_COLLECTION_ID: Uuid = Uuid::from_u128(0x0510_2bb3_a855_4a37_8a8a_3002_6a91_fef1); @@ -87,7 +87,7 @@ pub trait LayerDb: LayerCollectionProvider + Send + Sync { #[derive(Debug, Clone, PartialEq, Eq)] pub struct LayerProviderListing { - pub id: LayerProviderId, + pub id: DataProviderId, pub name: String, pub description: String, } @@ -108,15 +108,15 @@ impl UserInput for LayerProviderListingOptions { pub trait LayerProviderDb: Send + Sync + 'static { async fn add_layer_provider( &self, - provider: Box, - ) -> Result; + provider: Box, + ) -> Result; async fn list_layer_providers( &self, options: Validated, ) -> Result>; - async fn layer_provider(&self, id: LayerProviderId) -> Result>; + async fn layer_provider(&self, id: DataProviderId) -> Result>; // TODO: share/remove/update layer providers } @@ -294,8 +294,8 @@ impl LayerCollectionProvider for HashMapLayerDb { .expect("collections reference existing collections as children"); CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: INTERNAL_PROVIDER_ID, - item: c.clone(), + provider_id: INTERNAL_PROVIDER_ID, + collection_id: c.clone(), }, name: collection.name.clone(), description: collection.description.clone(), @@ -317,8 +317,8 @@ impl LayerCollectionProvider for HashMapLayerDb { CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: l.clone(), + provider_id: INTERNAL_PROVIDER_ID, + layer_id: l.clone(), }, name: layer.name.clone(), description: layer.description.clone(), @@ -357,8 +357,8 @@ impl LayerCollectionProvider for HashMapLayerDb { Ok(Layer { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: id.clone(), + provider_id: INTERNAL_PROVIDER_ID, + layer_id: id.clone(), }, name: layer.name.clone(), description: layer.description.clone(), @@ -370,15 +370,15 @@ impl LayerCollectionProvider for HashMapLayerDb { #[derive(Default)] pub struct HashMapLayerProviderDb { - external_providers: Db>>, + external_providers: Db>>, } #[async_trait] impl LayerProviderDb for HashMapLayerProviderDb { async fn add_layer_provider( &self, - provider: Box, - ) -> Result { + provider: Box, + ) -> Result { let id = provider.id(); self.external_providers.write().await.insert(id, provider); @@ -414,7 +414,7 @@ impl LayerProviderDb for HashMapLayerProviderDb { .collect()) } - async fn layer_provider(&self, id: LayerProviderId) -> Result> { + async fn layer_provider(&self, id: DataProviderId) -> Result> { self.external_providers .read() .await @@ -496,16 +496,16 @@ mod tests { vec![ CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: INTERNAL_PROVIDER_ID, - item: empty_c_id, + provider_id: INTERNAL_PROVIDER_ID, + collection_id: empty_c_id, }, name: "empty collection".to_string(), description: "description".to_string() }), CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: l_id, + provider_id: INTERNAL_PROVIDER_ID, + layer_id: l_id, }, name: "layer".to_string(), description: "description".to_string(), diff --git a/services/src/pro/contexts/postgres.rs b/services/src/pro/contexts/postgres.rs index 5fa6a2fad..96ccab5b4 100644 --- a/services/src/pro/contexts/postgres.rs +++ b/services/src/pro/contexts/postgres.rs @@ -636,9 +636,7 @@ mod tests { use bb8_postgres::tokio_postgres::{self, NoTls}; use futures::Future; use geoengine_datatypes::collections::VectorDataType; - use geoengine_datatypes::dataset::{ - DatasetId, ExternalDatasetId, InternalDatasetId, LayerProviderId, - }; + use geoengine_datatypes::dataset::{DataProviderId, DatasetId}; use geoengine_datatypes::primitives::{ BoundingBox2D, Coordinate2D, FeatureDataType, Measurement, SpatialResolution, TimeInterval, VectorQueryRectangle, @@ -1067,10 +1065,7 @@ mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 1)] async fn it_persists_datasets() { with_temp_context(|ctx, _| async move { - let dataset_id = DatasetId::Internal { - dataset_id: InternalDatasetId::from_str("2e8af98d-3b98-4e2c-a35b-e487bffad7b6") - .unwrap(), - }; + let dataset_id = DatasetId::from_str("2e8af98d-3b98-4e2c-a35b-e487bffad7b6").unwrap(); let loading_info = OgrSourceDataset { file_name: PathBuf::from("test.csv"), @@ -1133,7 +1128,7 @@ mod tests { db.add_dataset( &session, AddDataset { - id: Some(dataset_id.clone()), + id: Some(dataset_id), name: "Ogr Test".to_owned(), description: "desc".to_owned(), source_operator: "OgrSource".to_owned(), @@ -1171,7 +1166,7 @@ mod tests { assert_eq!( datasets[0], DatasetListing { - id: dataset_id.clone(), + id: dataset_id, name: "Ogr Test".to_owned(), description: "desc".to_owned(), source_operator: "OgrSource".to_owned(), @@ -1200,7 +1195,7 @@ mod tests { assert_eq!( provenance, ProvenanceOutput { - dataset: dataset_id.clone(), + data: dataset_id.into(), provenance: Some(Provenance { citation: "citation".to_owned(), license: "license".to_owned(), @@ -1209,8 +1204,10 @@ mod tests { } ); - let meta_data: Box> = - db.session_meta_data(&session, &dataset_id).await.unwrap(); + let meta_data: Box> = db + .session_meta_data(&session, &dataset_id.into()) + .await + .unwrap(); assert_eq!( meta_data @@ -1262,7 +1259,7 @@ mod tests { let db = ctx.layer_provider_db_ref(); let provider_id = - LayerProviderId::from_str("7b20c8d7-d754-4f8f-ad44-dddd25df22d2").unwrap(); + DataProviderId::from_str("7b20c8d7-d754-4f8f-ad44-dddd25df22d2").unwrap(); let loading_info = OgrSourceDataset { file_name: PathBuf::from("test.csv"), @@ -1322,10 +1319,7 @@ mod tests { id: provider_id, datasets: vec![DatasetDefinition { properties: AddDataset { - id: Some(DatasetId::External(ExternalDatasetId { - provider_id, - dataset_id: "test".to_owned(), - })), + id: Some(DatasetId::new()), name: "test".to_owned(), description: "desc".to_owned(), source_operator: "MockPointSource".to_owned(), @@ -1597,7 +1591,7 @@ mod tests { &session1, DatasetPermission { role: session2.user.id.into(), - dataset: id.clone(), + dataset: id, permission: Permission::Read, }, ) @@ -1669,7 +1663,7 @@ mod tests { &session1, DatasetPermission { role: session2.user.id.into(), - dataset: id.clone(), + dataset: id, permission: Permission::Read, }, ) @@ -1734,13 +1728,19 @@ mod tests { let meta: Result< Box>, - > = ctx.dataset_db_ref().session_meta_data(&session1, &id).await; + > = ctx + .dataset_db_ref() + .session_meta_data(&session1, &id.into()) + .await; assert!(meta.is_ok()); let meta: Result< Box>, - > = ctx.dataset_db_ref().session_meta_data(&session2, &id).await; + > = ctx + .dataset_db_ref() + .session_meta_data(&session2, &id.into()) + .await; assert!(meta.is_err()); @@ -1749,7 +1749,7 @@ mod tests { &session1, DatasetPermission { role: session2.user.id.into(), - dataset: id.clone(), + dataset: id, permission: Permission::Read, }, ) @@ -1758,7 +1758,10 @@ mod tests { let meta: Result< Box>, - > = ctx.dataset_db_ref().session_meta_data(&session2, &id).await; + > = ctx + .dataset_db_ref() + .session_meta_data(&session2, &id.into()) + .await; assert!(meta.is_ok()); }) @@ -1840,8 +1843,8 @@ mod tests { layer_db.get_layer(&layer1).await.unwrap(), crate::layers::layer::Layer { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: layer1.clone(), + provider_id: INTERNAL_PROVIDER_ID, + layer_id: layer1.clone(), }, name: "Layer1".to_string(), description: "Layer 1".to_string(), @@ -1914,16 +1917,16 @@ mod tests { vec![ CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: INTERNAL_PROVIDER_ID, - item: collection1.clone(), + provider_id: INTERNAL_PROVIDER_ID, + collection_id: collection1.clone(), }, name: "Collection1".to_string(), description: "Collection 1".to_string(), }), CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: layer1, + provider_id: INTERNAL_PROVIDER_ID, + layer_id: layer1, }, name: "Layer1".to_string(), description: "Layer 1".to_string(), @@ -1949,16 +1952,16 @@ mod tests { vec![ CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: INTERNAL_PROVIDER_ID, - item: collection2, + provider_id: INTERNAL_PROVIDER_ID, + collection_id: collection2, }, name: "Collection2".to_string(), description: "Collection 2".to_string(), }), CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: layer2, + provider_id: INTERNAL_PROVIDER_ID, + layer_id: layer2, }, name: "Layer2".to_string(), description: "Layer 2".to_string(), diff --git a/services/src/pro/datasets/add_from_directory.rs b/services/src/pro/datasets/add_from_directory.rs index 06cbcd239..2ea375785 100644 --- a/services/src/pro/datasets/add_from_directory.rs +++ b/services/src/pro/datasets/add_from_directory.rs @@ -44,7 +44,7 @@ pub async fn add_datasets_from_directory + UpdateDatas system_session, DatasetPermission { role: Role::user_role_id(), - dataset: dataset_id.clone(), + dataset: dataset_id, permission: Permission::Read, }, ) @@ -55,7 +55,7 @@ pub async fn add_datasets_from_directory + UpdateDatas system_session, DatasetPermission { role: Role::anonymous_role_id(), - dataset: dataset_id.clone(), + dataset: dataset_id, permission: Permission::Read, }, ) diff --git a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs index ad49878a9..5cc59c6fc 100644 --- a/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs +++ b/services/src/pro/datasets/external/sentinel_s2_l2a_cogs.rs @@ -1,17 +1,18 @@ -use crate::datasets::listing::{DatasetListing, ProvenanceOutput}; +use crate::datasets::listing::ProvenanceOutput; use crate::error::{self, Error, Result}; -use crate::layers::external::{ExternalLayerProvider, ExternalLayerProviderDefinition}; +use crate::layers::external::{DataProvider, DataProviderDefinition}; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::projects::{RasterSymbology, Symbology}; use crate::stac::{Feature as StacFeature, FeatureCollection as StacCollection, StacAsset}; +use crate::util::operators::source_operator_from_dataset; use crate::util::retry::retry; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use async_trait::async_trait; -use geoengine_datatypes::dataset::{DatasetId, ExternalDatasetId, LayerProviderId}; +use geoengine_datatypes::dataset::{DataId, DataProviderId, ExternalDataId, LayerId}; use geoengine_datatypes::operations::image::{Colorizer, RgbaColor}; use geoengine_datatypes::operations::reproject::{ CoordinateProjection, CoordinateProjector, ReprojectClipped, @@ -23,8 +24,8 @@ use geoengine_datatypes::primitives::{ use geoengine_datatypes::raster::RasterDataType; use geoengine_datatypes::spatial_reference::{SpatialReference, SpatialReferenceAuthority}; use geoengine_operators::engine::{ - MetaData, MetaDataProvider, RasterOperator, RasterResultDescriptor, TypedOperator, - VectorResultDescriptor, + MetaData, MetaDataProvider, OperatorName, RasterOperator, RasterResultDescriptor, + TypedOperator, VectorResultDescriptor, }; use geoengine_operators::mock::MockDatasetDataSourceLoadingInfo; use geoengine_operators::source::{ @@ -44,7 +45,7 @@ use std::path::PathBuf; #[serde(rename_all = "camelCase")] pub struct SentinelS2L2ACogsProviderDefinition { name: String, - id: LayerProviderId, + id: DataProviderId, api_url: String, bands: Vec, zones: Vec, @@ -73,8 +74,8 @@ impl Default for StacApiRetries { #[typetag::serde] #[async_trait] -impl ExternalLayerProviderDefinition for SentinelS2L2ACogsProviderDefinition { - async fn initialize(self: Box) -> crate::error::Result> { +impl DataProviderDefinition for SentinelS2L2ACogsProviderDefinition { + async fn initialize(self: Box) -> crate::error::Result> { Ok(Box::new(SentinelS2L2aCogsDataProvider::new( self.id, self.api_url, @@ -92,7 +93,7 @@ impl ExternalLayerProviderDefinition for SentinelS2L2ACogsProviderDefinition { self.name.clone() } - fn id(&self) -> LayerProviderId { + fn id(&self) -> DataProviderId { self.id } } @@ -115,23 +116,23 @@ pub struct Zone { pub struct SentinelDataset { band: Band, zone: Zone, - listing: DatasetListing, + listing: Layer, } #[derive(Debug)] pub struct SentinelS2L2aCogsDataProvider { - id: LayerProviderId, + id: DataProviderId, api_url: String, - datasets: HashMap, + datasets: HashMap, stac_api_retries: StacApiRetries, } impl SentinelS2L2aCogsDataProvider { pub fn new( - id: LayerProviderId, + id: DataProviderId, api_url: String, bands: &[Band], zones: &[Zone], @@ -146,38 +147,32 @@ impl SentinelS2L2aCogsDataProvider { } fn create_datasets( - id: &LayerProviderId, + id: &DataProviderId, bands: &[Band], zones: &[Zone], - ) -> HashMap { + ) -> HashMap { zones .iter() .flat_map(|zone| { bands.iter().map(move |band| { - let dataset_id: DatasetId = ExternalDatasetId { - provider_id: *id, - dataset_id: format!("{}:{}", zone.name, band.name), - } - .into(); - let listing = DatasetListing { - id: dataset_id.clone(), + let layer_id = LayerId(format!("{}:{}", zone.name, band.name)); + let listing = Layer { + id: ProviderLayerId { + provider_id: *id, + layer_id: layer_id.clone(), + }, name: format!("Sentinel S2 L2A COGS {}:{}", zone.name, band.name), description: "".to_owned(), - tags: vec![], - source_operator: "GdalSource".to_owned(), - result_descriptor: RasterResultDescriptor { - data_type: band.data_type, - spatial_reference: SpatialReference::new( - SpatialReferenceAuthority::Epsg, - zone.epsg, + workflow: Workflow { + operator: source_operator_from_dataset( + GdalSource::TYPE_NAME, + &DataId::External(ExternalDataId { + provider_id: *id, + layer_id: layer_id.clone(), + }), ) - .into(), - measurement: Measurement::Unitless, // TODO: add measurement - no_data_value: band.no_data_value, - time: None, // TODO: determine time - bbox: None, // TODO: determine bbox - } - .into(), + .expect("gdal source is a valid operator"), + }, symbology: Some(Symbology::Raster(RasterSymbology { opacity: 1.0, colorizer: Colorizer::linear_gradient( @@ -202,7 +197,7 @@ impl SentinelS2L2aCogsDataProvider { listing, }; - (dataset_id, dataset) + (layer_id, dataset) }) }) .collect() @@ -210,10 +205,10 @@ impl SentinelS2L2aCogsDataProvider { } #[async_trait] -impl ExternalLayerProvider for SentinelS2L2aCogsDataProvider { - async fn provenance(&self, dataset: &DatasetId) -> Result { +impl DataProvider for SentinelS2L2aCogsDataProvider { + async fn provenance(&self, id: &DataId) -> Result { Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: id.clone(), provenance: None, // TODO }) } @@ -242,12 +237,8 @@ impl LayerCollectionProvider for SentinelS2L2aCogsDataProvider { .datasets .values() .map(|d| { - let id = d.listing.id.external().ok_or(Error::InvalidDatasetId)?; Ok(CollectionItem::Layer(LayerListing { - id: ProviderLayerId { - provider: id.provider_id, - item: LayerId(id.dataset_id), - }, + id: d.listing.id.clone(), name: d.listing.name.clone(), description: d.listing.description.clone(), })) @@ -262,20 +253,12 @@ impl LayerCollectionProvider for SentinelS2L2aCogsDataProvider { } async fn get_layer(&self, id: &LayerId) -> Result { - let dataset_id = DatasetId::External(ExternalDatasetId { - provider_id: self.id, - dataset_id: id.0.clone(), - }); - - let dataset = self - .datasets - .get(&dataset_id) - .ok_or(Error::UnknownDatasetId)?; + let dataset = self.datasets.get(id).ok_or(Error::UnknownDataId)?; Ok(Layer { id: ProviderLayerId { - provider: self.id, - item: id.clone(), + provider_id: self.id, + layer_id: id.clone(), }, name: dataset.listing.name.clone(), description: dataset.listing.description.clone(), @@ -283,7 +266,10 @@ impl LayerCollectionProvider for SentinelS2L2aCogsDataProvider { operator: TypedOperator::Raster( GdalSource { params: GdalSourceParameters { - dataset: dataset_id, + data: DataId::External(ExternalDataId { + provider_id: self.id, + layer_id: id.clone(), + }), }, } .boxed(), @@ -596,15 +582,21 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, > { let dataset = self .datasets - .get(dataset) - .ok_or(geoengine_operators::error::Error::UnknownDatasetId)?; + .get( + &id.external() + .ok_or(geoengine_operators::error::Error::LoadingInfo { + source: Box::new(error::Error::DataIdTypeMissMatch), + })? + .layer_id, + ) + .ok_or(geoengine_operators::error::Error::UnknownDataId)?; Ok(Box::new(SentinelS2L2aCogsMetaData { api_url: self.api_url.clone(), @@ -622,7 +614,7 @@ impl { async fn meta_data( &self, - _dataset: &DatasetId, + _id: &DataId, ) -> Result< Box< dyn MetaData< @@ -643,7 +635,7 @@ impl MetaDataProvider Result< Box>, geoengine_operators::error::Error, @@ -679,21 +671,20 @@ mod tests { async fn loading_info() -> Result<()> { // TODO: mock STAC endpoint - let def: Box = - serde_json::from_reader(BufReader::new(File::open(test_data!( - "provider_defs/pro/sentinel_s2_l2a_cogs.json" - ))?))?; + let def: Box = serde_json::from_reader(BufReader::new( + File::open(test_data!("provider_defs/pro/sentinel_s2_l2a_cogs.json"))?, + ))?; let provider = def.initialize().await?; let meta: Box> = provider .meta_data( - &ExternalDatasetId { - provider_id: LayerProviderId::from_str( + &ExternalDataId { + provider_id: DataProviderId::from_str( "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5", )?, - dataset_id: "UTM32N:B01".to_owned(), + layer_id: LayerId("UTM32N:B01".to_owned()), } .into(), ) @@ -751,30 +742,29 @@ mod tests { let mut exe = MockExecutionContext::test_default(); - let def: Box = - serde_json::from_reader(BufReader::new(File::open(test_data!( - "provider_defs/pro/sentinel_s2_l2a_cogs.json" - ))?))?; + let def: Box = serde_json::from_reader(BufReader::new( + File::open(test_data!("provider_defs/pro/sentinel_s2_l2a_cogs.json"))?, + ))?; let provider = def.initialize().await?; let meta: Box> = provider .meta_data( - &ExternalDatasetId { - provider_id: LayerProviderId::from_str( + &ExternalDataId { + provider_id: DataProviderId::from_str( "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5", )?, - dataset_id: "UTM32N:B01".to_owned(), + layer_id: LayerId("UTM32N:B01".to_owned()), } .into(), ) .await?; exe.add_meta_data( - ExternalDatasetId { - provider_id: LayerProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, - dataset_id: "UTM32N:B01".to_owned(), + ExternalDataId { + provider_id: DataProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, + layer_id: LayerId("UTM32N:B01".to_owned()), } .into(), meta, @@ -782,9 +772,9 @@ mod tests { let op = GdalSource { params: GdalSourceParameters { - dataset: ExternalDatasetId { - provider_id: LayerProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, - dataset_id: "UTM32N:B01".to_owned(), + data: ExternalDataId { + provider_id: DataProviderId::from_str("5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5")?, + layer_id: LayerId("UTM32N:B01".to_owned()), } .into(), }, @@ -861,9 +851,9 @@ mod tests { ]), ); - let provider_id: LayerProviderId = "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5".parse().unwrap(); + let provider_id: DataProviderId = "5779494c-f3a2-48b3-8a2d-5fbba8c5b6c5".parse().unwrap(); - let provider_def: Box = + let provider_def: Box = Box::new(SentinelS2L2ACogsProviderDefinition { name: "Element 84 AWS STAC".into(), id: provider_id, @@ -885,9 +875,9 @@ mod tests { let meta: Box> = provider .meta_data( - &ExternalDatasetId { + &ExternalDataId { provider_id, - dataset_id: "UTM36S:B04".to_owned(), + layer_id: LayerId("UTM36S:B04".to_owned()), } .into(), ) diff --git a/services/src/pro/datasets/in_memory.rs b/services/src/pro/datasets/in_memory.rs index 541929b79..28e43ffd4 100644 --- a/services/src/pro/datasets/in_memory.rs +++ b/services/src/pro/datasets/in_memory.rs @@ -13,16 +13,17 @@ use crate::error::Result; use crate::layers::layer::{ CollectionItem, Layer, LayerCollectionListOptions, LayerListing, ProviderLayerId, }; -use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider, LayerId}; +use crate::layers::listing::{LayerCollectionId, LayerCollectionProvider}; use crate::pro::datasets::Permission; use crate::pro::users::{UserId, UserSession}; use crate::util::operators::source_operator_from_dataset; use crate::util::user_input::Validated; use crate::workflows::workflow::Workflow; use async_trait::async_trait; +use geoengine_datatypes::dataset::LayerId; use geoengine_datatypes::primitives::{RasterQueryRectangle, VectorQueryRectangle}; use geoengine_datatypes::{ - dataset::{DatasetId, InternalDatasetId}, + dataset::{DataId, DatasetId}, util::Identifier, }; use geoengine_operators::engine::{ @@ -46,11 +47,11 @@ pub struct ProHashMapDatasetDbBackend { datasets: HashMap, dataset_permissions: Vec, ogr_datasets: HashMap< - InternalDatasetId, + DatasetId, StaticMetaData, >, mock_datasets: HashMap< - InternalDatasetId, + DatasetId, StaticMetaData< MockDatasetDataSourceLoadingInfo, VectorResultDescriptor, @@ -58,7 +59,7 @@ pub struct ProHashMapDatasetDbBackend { >, >, gdal_datasets: HashMap< - InternalDatasetId, + DatasetId, Box>, >, uploads: HashMap>, @@ -73,8 +74,7 @@ impl DatasetDb for ProHashMapDatasetDb {} #[async_trait] pub trait ProHashMapStorable: Send + Sync { - async fn store(&self, id: InternalDatasetId, db: &ProHashMapDatasetDb) - -> TypedResultDescriptor; + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor; } impl DatasetStorer for ProHashMapDatasetDb { @@ -83,11 +83,7 @@ impl DatasetStorer for ProHashMapDatasetDb { #[async_trait] impl ProHashMapStorable for MetaDataDefinition { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { match self { MetaDataDefinition::MockMetaData(d) => d.store(id, db).await, MetaDataDefinition::OgrMetaData(d) => d.store(id, db).await, @@ -103,11 +99,7 @@ impl ProHashMapStorable for MetaDataDefinition { impl ProHashMapStorable for StaticMetaData { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -125,11 +117,7 @@ impl ProHashMapStorable VectorQueryRectangle, > { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -141,11 +129,7 @@ impl ProHashMapStorable #[async_trait] impl ProHashMapStorable for GdalMetaDataRegular { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -157,11 +141,7 @@ impl ProHashMapStorable for GdalMetaDataRegular { #[async_trait] impl ProHashMapStorable for GdalMetaDataStatic { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -173,11 +153,7 @@ impl ProHashMapStorable for GdalMetaDataStatic { #[async_trait] impl ProHashMapStorable for GdalMetadataNetCdfCf { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -189,11 +165,7 @@ impl ProHashMapStorable for GdalMetadataNetCdfCf { #[async_trait] impl ProHashMapStorable for GdalMetaDataList { - async fn store( - &self, - id: InternalDatasetId, - db: &ProHashMapDatasetDb, - ) -> TypedResultDescriptor { + async fn store(&self, id: DatasetId, db: &ProHashMapDatasetDb) -> TypedResultDescriptor { db.backend .write() .await @@ -214,15 +186,11 @@ impl DatasetStore for ProHashMapDatasetDb { info!("Add dataset {:?}", dataset.user_input.name); let dataset = dataset.user_input; - let id = dataset - .id - .unwrap_or_else(|| InternalDatasetId::new().into()); - let result_descriptor = meta_data - .store(id.internal().expect("from AddDataset"), self) - .await; + let id = dataset.id.unwrap_or_else(DatasetId::new); + let result_descriptor = meta_data.store(id, self).await; let d: Dataset = Dataset { - id: id.clone(), + id, name: dataset.name, description: dataset.description, result_descriptor, @@ -230,7 +198,7 @@ impl DatasetStore for ProHashMapDatasetDb { symbology: dataset.symbology, provenance: dataset.provenance, }; - self.backend.write().await.datasets.insert(id.clone(), d); + self.backend.write().await.datasets.insert(id, d); self.backend .write() @@ -238,7 +206,7 @@ impl DatasetStore for ProHashMapDatasetDb { .dataset_permissions .push(DatasetPermission { role: session.user.id.into(), - dataset: id.clone(), + dataset: id, permission: Permission::Owner, }); @@ -304,9 +272,7 @@ impl DatasetProvider for ProHashMapDatasetDb { .dataset_permissions .iter() .any(|p| session.roles.contains(&p.role)), - error::DatasetPermissionDenied { - dataset: dataset.clone(), - } + error::DatasetPermissionDenied { dataset: *dataset } ); backend @@ -328,16 +294,14 @@ impl DatasetProvider for ProHashMapDatasetDb { .dataset_permissions .iter() .any(|p| session.roles.contains(&p.role)), - error::DatasetPermissionDenied { - dataset: dataset.clone(), - } + error::DatasetPermissionDenied { dataset: *dataset } ); backend .datasets .get(dataset) .map(|d| ProvenanceOutput { - dataset: d.id.clone(), + data: d.id.into(), provenance: d.provenance.clone(), }) .ok_or(error::Error::UnknownDatasetId) @@ -394,7 +358,7 @@ impl async fn session_meta_data( &self, session: &UserSession, - dataset: &DatasetId, + id: &DataId, ) -> Result< Box< dyn MetaData< @@ -405,24 +369,19 @@ impl >, > { let backend = self.backend.read().await; + let id = id.internal().ok_or(error::Error::DataIdTypeMissMatch)?; ensure!( backend .dataset_permissions .iter() - .any(|p| p.dataset == *dataset && session.roles.contains(&p.role)), - error::DatasetPermissionDenied { - dataset: dataset.clone(), - } + .any(|p| p.dataset == id && session.roles.contains(&p.role)), + error::DatasetPermissionDenied { dataset: id } ); Ok(Box::new( backend .mock_datasets - .get( - &dataset - .internal() - .ok_or(error::Error::DatasetIdTypeMissMatch)?, - ) + .get(&id) .ok_or(error::Error::UnknownDatasetId)? .clone(), )) @@ -441,28 +400,24 @@ impl async fn session_meta_data( &self, session: &UserSession, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { let backend = self.backend.read().await; + + let id = id.internal().ok_or(error::Error::DataIdTypeMissMatch)?; ensure!( backend .dataset_permissions .iter() - .any(|p| p.dataset == *dataset && session.roles.contains(&p.role)), - error::DatasetPermissionDenied { - dataset: dataset.clone(), - } + .any(|p| p.dataset == id && session.roles.contains(&p.role)), + error::DatasetPermissionDenied { dataset: id } ); Ok(Box::new( backend .ogr_datasets - .get( - &dataset - .internal() - .ok_or(error::Error::DatasetIdTypeMissMatch)?, - ) + .get(&id) .ok_or(error::Error::UnknownDatasetId)? .clone(), )) @@ -481,24 +436,20 @@ impl async fn session_meta_data( &self, session: &UserSession, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { let backend = self.backend.read().await; + + let id = id.internal().ok_or(error::Error::DataIdTypeMissMatch)?; ensure!( backend .dataset_permissions .iter() - .any(|p| p.dataset == *dataset && session.roles.contains(&p.role)), - error::DatasetPermissionDenied { - dataset: dataset.clone(), - } + .any(|p| p.dataset == id && session.roles.contains(&p.role)), + error::DatasetPermissionDenied { dataset: id } ); - let id = dataset - .internal() - .ok_or(error::Error::DatasetIdTypeMissMatch)?; - Ok(backend .gdal_datasets .get(&id) @@ -557,13 +508,9 @@ impl LayerCollectionProvider for ProHashMapDatasetDb { .map(|(_id, d)| { CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: DATASET_DB_LAYER_PROVIDER_ID, + provider_id: DATASET_DB_LAYER_PROVIDER_ID, // use the dataset id also as layer id - item: LayerId( - d.id.internal() - .expect("Dataset DB contains only internal datasets") - .to_string(), - ), + layer_id: LayerId(d.id.to_string()), }, name: d.name.clone(), description: d.description.clone(), @@ -579,9 +526,7 @@ impl LayerCollectionProvider for ProHashMapDatasetDb { } async fn get_layer(&self, id: &LayerId) -> Result { - let dataset_id = DatasetId::Internal { - dataset_id: InternalDatasetId::from_str(&id.0)?, - }; + let dataset_id = DatasetId::from_str(&id.0)?; let backend = self.backend.read().await; @@ -591,12 +536,12 @@ impl LayerCollectionProvider for ProHashMapDatasetDb { .find(|(_id, d)| d.id == dataset_id) .ok_or(error::Error::UnknownDatasetId)?; - let operator = source_operator_from_dataset(&dataset.source_operator, &dataset.id)?; + let operator = source_operator_from_dataset(&dataset.source_operator, &dataset.id.into())?; Ok(Layer { id: ProviderLayerId { - provider: DATASET_DB_LAYER_PROVIDER_ID, - item: id.clone(), + provider_id: DATASET_DB_LAYER_PROVIDER_ID, + layer_id: id.clone(), }, name: dataset.name.clone(), description: dataset.description.clone(), @@ -671,7 +616,7 @@ mod tests { let meta: Box< dyn MetaData, - > = exe_ctx.meta_data(&id).await?; + > = exe_ctx.meta_data(&id.into()).await?; assert_eq!( meta.result_descriptor().await?, @@ -916,7 +861,7 @@ mod tests { &session1, DatasetPermission { role: session2.user.id.into(), - dataset: id.clone(), + dataset: id, permission: Permission::Read, }, ) @@ -983,7 +928,7 @@ mod tests { &session1, DatasetPermission { role: Role::user_role_id(), - dataset: id.clone(), + dataset: id, permission: Permission::Read, }, ) @@ -1043,13 +988,19 @@ mod tests { let meta: Result< Box>, - > = ctx.dataset_db_ref().session_meta_data(&session1, &id).await; + > = ctx + .dataset_db_ref() + .session_meta_data(&session1, &id.into()) + .await; assert!(meta.is_ok()); let meta: Result< Box>, - > = ctx.dataset_db_ref().session_meta_data(&session2, &id).await; + > = ctx + .dataset_db_ref() + .session_meta_data(&session2, &id.into()) + .await; assert!(meta.is_err()); @@ -1058,7 +1009,7 @@ mod tests { &session1, DatasetPermission { role: Role::user_role_id(), - dataset: id.clone(), + dataset: id, permission: Permission::Read, }, ) @@ -1066,7 +1017,10 @@ mod tests { let meta: Result< Box>, - > = ctx.dataset_db_ref().session_meta_data(&session2, &id).await; + > = ctx + .dataset_db_ref() + .session_meta_data(&session2, &id.into()) + .await; assert!(meta.is_ok()); diff --git a/services/src/pro/datasets/postgres.rs b/services/src/pro/datasets/postgres.rs index 5efff865c..3ab94719d 100644 --- a/services/src/pro/datasets/postgres.rs +++ b/services/src/pro/datasets/postgres.rs @@ -17,7 +17,6 @@ use crate::layers::layer::LayerListing; use crate::layers::layer::ProviderLayerId; use crate::layers::listing::LayerCollectionId; use crate::layers::listing::LayerCollectionProvider; -use crate::layers::listing::LayerId; use crate::pro::datasets::storage::UpdateDatasetPermissions; use crate::pro::datasets::RoleId; use crate::projects::Symbology; @@ -33,7 +32,9 @@ use bb8_postgres::bb8::Pool; use bb8_postgres::tokio_postgres::tls::{MakeTlsConnect, TlsConnect}; use bb8_postgres::tokio_postgres::Socket; use bb8_postgres::PostgresConnectionManager; -use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; +use geoengine_datatypes::dataset::DataId; +use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::LayerId; use geoengine_datatypes::primitives::RasterQueryRectangle; use geoengine_datatypes::primitives::VectorQueryRectangle; use geoengine_datatypes::util::Identifier; @@ -124,9 +125,7 @@ where .iter() .map(|row| { Result::::Ok(DatasetListing { - id: DatasetId::Internal { - dataset_id: row.get(0), - }, + id: row.get(0), name: row.get(1), description: row.get(2), tags: row.get::<_, Option<_>>(3).unwrap_or_default(), @@ -140,8 +139,6 @@ where } async fn load(&self, session: &UserSession, dataset: &DatasetId) -> Result { - let id = dataset.internal().ok_or(Error::InvalidDatasetId)?; - let conn = self.conn_pool.get().await?; let stmt = conn .prepare( @@ -165,12 +162,10 @@ where .await?; // TODO: throw proper dataset does not exist/no permission error - let row = conn.query_one(&stmt, &[&session.user.id, &id]).await?; + let row = conn.query_one(&stmt, &[&session.user.id, dataset]).await?; Ok(Dataset { - id: DatasetId::Internal { - dataset_id: row.get(0), - }, + id: row.get(0), name: row.get(1), description: row.get(2), result_descriptor: serde_json::from_value(row.get(3))?, @@ -185,8 +180,6 @@ where session: &UserSession, dataset: &DatasetId, ) -> Result { - let id = dataset.internal().ok_or(Error::InvalidDatasetId)?; - let conn = self.conn_pool.get().await?; let stmt = conn @@ -202,10 +195,10 @@ where ) .await?; - let row = conn.query_one(&stmt, &[&session.user.id, &id]).await?; + let row = conn.query_one(&stmt, &[&session.user.id, dataset]).await?; Ok(ProvenanceOutput { - dataset: dataset.clone(), + data: (*dataset).into(), provenance: serde_json::from_value(row.get(0)).context(error::SerdeJson)?, }) } @@ -228,7 +221,7 @@ where async fn session_meta_data( &self, _session: &UserSession, - _dataset: &DatasetId, + _id: &DataId, ) -> Result< Box< dyn MetaData< @@ -259,10 +252,10 @@ where async fn session_meta_data( &self, session: &UserSession, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { - let id = dataset.internal().ok_or(Error::InvalidDatasetId)?; + let id = id.internal().ok_or(error::Error::DataIdTypeMissMatch)?; let conn = self.conn_pool.get().await?; let stmt = conn @@ -307,10 +300,10 @@ where async fn session_meta_data( &self, session: &UserSession, - dataset: &DatasetId, + id: &DataId, ) -> Result>> { - let id = dataset.internal().ok_or(Error::InvalidDatasetId)?; + let id = id.internal().ok_or(error::Error::DataIdTypeMissMatch)?; let conn = self.conn_pool.get().await?; let stmt = conn @@ -333,7 +326,7 @@ where Ok(match meta_data { MetaDataDefinition::GdalMetaDataRegular(m) => Box::new(m), MetaDataDefinition::GdalStatic(m) => Box::new(m), - _ => return Err(Error::DatasetIdTypeMissMatch), + _ => return Err(Error::DataIdTypeMissMatch), }) } } @@ -428,10 +421,7 @@ where meta_data: Box>, ) -> Result { let dataset = dataset.user_input; - let id = dataset - .id - .unwrap_or_else(|| InternalDatasetId::new().into()); - let internal_id = id.internal().ok_or(Error::InvalidDatasetId)?; + let id = dataset.id.unwrap_or_else(DatasetId::new); let meta_data_json = meta_data.to_json()?; @@ -459,7 +449,7 @@ where tx.execute( &stmt, &[ - &internal_id, + &id, &dataset.name, &dataset.description, &dataset.source_operator, @@ -485,11 +475,7 @@ where tx.execute( &stmt, - &[ - &RoleId::from(session.user.id), - &internal_id, - &Permission::Owner, - ], + &[&RoleId::from(session.user.id), &id, &Permission::Owner], ) .await?; @@ -521,11 +507,7 @@ where session, permission ); - let internal_id = permission.dataset.internal().ok_or( - geoengine_operators::error::Error::DatasetMetaData { - source: Box::new(error::Error::DatasetIdTypeMissMatch), - }, - )?; + let id = permission.dataset; let mut conn = self.conn_pool.get().await?; @@ -546,11 +528,7 @@ where let auth = tx .query_one( &stmt, - &[ - &RoleId::from(session.user.id), - &internal_id, - &Permission::Owner, - ], + &[&RoleId::from(session.user.id), &id, &Permission::Owner], ) .await; @@ -576,10 +554,7 @@ where .await?; let duplicate = tx - .query_one( - &stmt, - &[&permission.role, &internal_id, &permission.permission], - ) + .query_one(&stmt, &[&permission.role, &id, &permission.permission]) .await?; ensure!( @@ -603,11 +578,8 @@ where ) .await?; - tx.execute( - &stmt, - &[&permission.role, &internal_id, &permission.permission], - ) - .await?; + tx.execute(&stmt, &[&permission.role, &id, &permission.permission]) + .await?; tx.commit().await?; @@ -719,8 +691,8 @@ where .map(|row| { Result::::Ok(CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: DATASET_DB_LAYER_PROVIDER_ID, - item: LayerId(row.get(0)), + provider_id: DATASET_DB_LAYER_PROVIDER_ID, + layer_id: LayerId(row.get(0)), }, name: row.get(1), description: row.get(2), @@ -735,9 +707,7 @@ where } async fn get_layer(&self, id: &LayerId) -> Result { - let dataset_id = DatasetId::Internal { - dataset_id: InternalDatasetId::from_str(&id.0)?, - }; + let dataset_id = DatasetId::from_str(&id.0)?; let conn = self.conn_pool.get().await?; @@ -773,12 +743,12 @@ where let source_operator: String = row.get(2); let symbology: Option = serde_json::from_value(row.get(3))?; - let operator = source_operator_from_dataset(&source_operator, &dataset_id)?; + let operator = source_operator_from_dataset(&source_operator, &dataset_id.into())?; Ok(Layer { id: ProviderLayerId { - provider: DATASET_DB_LAYER_PROVIDER_ID, - item: id.clone(), + provider_id: DATASET_DB_LAYER_PROVIDER_ID, + layer_id: id.clone(), }, name, description, diff --git a/services/src/pro/handlers/drone_mapping.rs b/services/src/pro/handlers/drone_mapping.rs index 694d0f3d7..aeaa8a45e 100644 --- a/services/src/pro/handlers/drone_mapping.rs +++ b/services/src/pro/handlers/drone_mapping.rs @@ -14,7 +14,7 @@ use crate::util::IdResponse; use actix_web::{web, Responder}; use futures_util::StreamExt; -use geoengine_datatypes::dataset::{DatasetId, InternalDatasetId}; +use geoengine_datatypes::dataset::DatasetId; use geoengine_datatypes::primitives::Measurement; use geoengine_datatypes::raster::RasterDataType; use geoengine_datatypes::spatial_reference::SpatialReference; @@ -192,9 +192,7 @@ where /// ```text /// { /// "upload": "3086f494-d5a4-4b51-a14b-3b29f8bf7bb0", -/// "dataset": { -/// "type": "internal", -/// "datasetId": "94230f0b-4e8a-4cba-9adc-3ace837fe5d4" +/// "dataset": "94230f0b-4e8a-4cba-9adc-3ace837fe5d4" /// } /// } /// ``` @@ -297,7 +295,7 @@ async fn dataset_definition_from_geotiff( Ok(DatasetDefinition { properties: AddDataset { - id: Some(InternalDatasetId::new().into()), + id: Some(DatasetId::new()), name: "ODM Result".to_owned(), // TODO: more info description: "".to_owned(), // TODO: more info source_operator: "GdalSource".to_owned(), @@ -500,7 +498,7 @@ mod tests { let meta: Box> = ctx.execution_context(session.clone()) .unwrap() - .meta_data(&dataset_id) + .meta_data(&dataset_id.into()) .await .unwrap(); @@ -570,7 +568,7 @@ mod tests { // test if the data can be loaded let op = GdalSource { params: GdalSourceParameters { - dataset: dataset_id, + data: dataset_id.into(), }, } .boxed(); diff --git a/services/src/pro/layers/postgres_layer_db.rs b/services/src/pro/layers/postgres_layer_db.rs index ea1a44e5d..22f730b59 100644 --- a/services/src/pro/layers/postgres_layer_db.rs +++ b/services/src/pro/layers/postgres_layer_db.rs @@ -9,19 +9,19 @@ use bb8_postgres::{ }, PostgresConnectionManager, }; -use geoengine_datatypes::dataset::LayerProviderId; +use geoengine_datatypes::dataset::{DataProviderId, LayerId}; use snafu::ResultExt; use uuid::Uuid; use crate::{ error::{self, Result}, layers::{ - external::{ExternalLayerProvider, ExternalLayerProviderDefinition}, + external::{DataProvider, DataProviderDefinition}, layer::{ AddLayer, AddLayerCollection, CollectionItem, Layer, LayerCollectionListOptions, LayerCollectionListing, LayerListing, ProviderLayerCollectionId, ProviderLayerId, }, - listing::{LayerCollectionId, LayerCollectionProvider, LayerId}, + listing::{LayerCollectionId, LayerCollectionProvider}, storage::{ LayerDb, LayerDbError, LayerProviderDb, LayerProviderListing, LayerProviderListingOptions, INTERNAL_LAYER_DB_ROOT_COLLECTION_ID, @@ -401,8 +401,8 @@ where if is_layer { CollectionItem::Layer(LayerListing { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: LayerId(row.get(0)), + provider_id: INTERNAL_PROVIDER_ID, + layer_id: LayerId(row.get(0)), }, name: row.get(1), description: row.get(2), @@ -410,8 +410,8 @@ where } else { CollectionItem::Collection(LayerCollectionListing { id: ProviderLayerCollectionId { - provider: INTERNAL_PROVIDER_ID, - item: LayerCollectionId(row.get(0)), + provider_id: INTERNAL_PROVIDER_ID, + collection_id: LayerCollectionId(row.get(0)), }, name: row.get(1), description: row.get(2), @@ -454,8 +454,8 @@ where Ok(Layer { id: ProviderLayerId { - provider: INTERNAL_PROVIDER_ID, - item: id.clone(), + provider_id: INTERNAL_PROVIDER_ID, + layer_id: id.clone(), }, name: row.get(0), description: row.get(1), @@ -497,8 +497,8 @@ where { async fn add_layer_provider( &self, - provider: Box, - ) -> Result { + provider: Box, + ) -> Result { // TODO: permissions let conn = self.conn_pool.get().await?; @@ -570,7 +570,7 @@ where .collect()) } - async fn layer_provider(&self, id: LayerProviderId) -> Result> { + async fn layer_provider(&self, id: DataProviderId) -> Result> { // TODO: permissions let conn = self.conn_pool.get().await?; @@ -588,8 +588,7 @@ where let row = conn.query_one(&stmt, &[&id]).await?; - let definition = - serde_json::from_value::>(row.get(0))?; + let definition = serde_json::from_value::>(row.get(0))?; definition.initialize().await } diff --git a/services/src/util/operators.rs b/services/src/util/operators.rs index af9643f00..4bbb0ca0d 100644 --- a/services/src/util/operators.rs +++ b/services/src/util/operators.rs @@ -1,5 +1,5 @@ use crate::error::Result; -use geoengine_datatypes::dataset::DatasetId; +use geoengine_datatypes::dataset::DataId; use geoengine_operators::{ engine::{OperatorName, RasterOperator, TypedOperator, VectorOperator}, mock::{MockDatasetDataSource, MockDatasetDataSourceParams}, @@ -8,13 +8,13 @@ use geoengine_operators::{ pub fn source_operator_from_dataset( source_operator_name: &str, - dataset: &DatasetId, + id: &DataId, ) -> Result { Ok(match source_operator_name { OgrSource::TYPE_NAME => TypedOperator::Vector( OgrSource { params: OgrSourceParameters { - dataset: dataset.clone(), + data: id.clone(), attribute_projection: None, attribute_filters: None, }, @@ -23,17 +23,13 @@ pub fn source_operator_from_dataset( ), GdalSource::TYPE_NAME => TypedOperator::Raster( GdalSource { - params: GdalSourceParameters { - dataset: dataset.clone(), - }, + params: GdalSourceParameters { data: id.clone() }, } .boxed(), ), MockDatasetDataSource::TYPE_NAME => TypedOperator::Vector( MockDatasetDataSource { - params: MockDatasetDataSourceParams { - dataset: dataset.clone(), - }, + params: MockDatasetDataSourceParams { data: id.clone() }, } .boxed(), ), diff --git a/services/src/util/tests.rs b/services/src/util/tests.rs index f866497bf..d3a82fe02 100644 --- a/services/src/util/tests.rs +++ b/services/src/util/tests.rs @@ -91,7 +91,9 @@ pub async fn register_ndvi_workflow_helper(ctx: &InMemoryContext) -> (Workflow, let workflow = Workflow { operator: TypedOperator::Raster( GdalSource { - params: GdalSourceParameters { dataset }, + params: GdalSourceParameters { + data: dataset.into(), + }, } .boxed(), ), diff --git a/test_data/dataset_defs/germany_polygon.json b/test_data/dataset_defs/germany_polygon.json index fd117a3ad..ab6f2a225 100644 --- a/test_data/dataset_defs/germany_polygon.json +++ b/test_data/dataset_defs/germany_polygon.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "b6191257-6d61-4c6b-90a4-ebfb1b23899d" - }, + "id": "b6191257-6d61-4c6b-90a4-ebfb1b23899d", "name": "Germany", "description": "Boundaries of Germany", "sourceOperator": "OgrSource" diff --git a/test_data/dataset_defs/landcover.json b/test_data/dataset_defs/landcover.json index fd4a515fa..35cf9104f 100644 --- a/test_data/dataset_defs/landcover.json +++ b/test_data/dataset_defs/landcover.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "9ee3619e-d0f9-4ced-9c44-3d407c3aed69" - }, + "id": "9ee3619e-d0f9-4ced-9c44-3d407c3aed69", "name": "Land Cover", "description": "Land Cover derived from MODIS/Terra+Aqua Land Cover", "sourceOperator": "GdalSource", diff --git a/test_data/dataset_defs/mock.json b/test_data/dataset_defs/mock.json index 35d5a10e3..57576e5ef 100644 --- a/test_data/dataset_defs/mock.json +++ b/test_data/dataset_defs/mock.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "5c2e620c-0c25-49f4-a3dd-9ef518c95709" - }, + "id": "5c2e620c-0c25-49f4-a3dd-9ef518c95709", "name": "Mock", "description": "A mock dataset", "sourceOperator": "MockDatasetDataSource" diff --git a/test_data/dataset_defs/ndvi (3587).json b/test_data/dataset_defs/ndvi (3587).json index be4c5ea95..8f5ef2891 100644 --- a/test_data/dataset_defs/ndvi (3587).json +++ b/test_data/dataset_defs/ndvi (3587).json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "a3f6955e-53cc-4752-9e4a-e9f0493add80" - }, + "id": "a3f6955e-53cc-4752-9e4a-e9f0493add80", "name": "NDVI3857", "description": "NDVI data from MODIS reprojected to EPSG:3857", "sourceOperator": "GdalSource", diff --git a/test_data/dataset_defs/ndvi.json b/test_data/dataset_defs/ndvi.json index 29f1c6f25..e541b1a5e 100644 --- a/test_data/dataset_defs/ndvi.json +++ b/test_data/dataset_defs/ndvi.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "36574dc3-560a-4b09-9d22-d5945f2b8093" - }, + "id": "36574dc3-560a-4b09-9d22-d5945f2b8093", "name": "NDVI", "description": "NDVI data from MODIS", "sourceOperator": "GdalSource", diff --git a/test_data/dataset_defs/ndvi_list.json b/test_data/dataset_defs/ndvi_list.json index c6f81f3ea..e466fe98b 100644 --- a/test_data/dataset_defs/ndvi_list.json +++ b/test_data/dataset_defs/ndvi_list.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "5c512b9c-776a-4a05-b47a-c3c8b052ecfa" - }, + "id": "5c512b9c-776a-4a05-b47a-c3c8b052ecfa", "name": "NDVI as GdalMetaDataList", "description": "NDVI data from MODIS", "sourceOperator": "GdalSource", diff --git a/test_data/dataset_defs/ne_10m_ports (3857).json b/test_data/dataset_defs/ne_10m_ports (3857).json index a0095cf1b..a7bb4228a 100644 --- a/test_data/dataset_defs/ne_10m_ports (3857).json +++ b/test_data/dataset_defs/ne_10m_ports (3857).json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "f47fc99c-c085-4fe3-90f3-120491facb28" - }, + "id": "f47fc99c-c085-4fe3-90f3-120491facb28", "name": "Natural Earth 10m Ports 3857", "description": "Ports from Natural Earth reprojected to EPSG:3857", "sourceOperator": "OgrSource", diff --git a/test_data/dataset_defs/ne_10m_ports.json b/test_data/dataset_defs/ne_10m_ports.json index c00d898ea..7a922cef7 100644 --- a/test_data/dataset_defs/ne_10m_ports.json +++ b/test_data/dataset_defs/ne_10m_ports.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "a9623a5b-b6c5-404b-bc5a-313ff72e4e75" - }, + "id": "a9623a5b-b6c5-404b-bc5a-313ff72e4e75", "name": "Natural Earth 10m Ports", "description": "Ports from Natural Earth", "sourceOperator": "OgrSource", diff --git a/test_data/dataset_defs/points_with_time.json b/test_data/dataset_defs/points_with_time.json index 755ee2b4e..ab277e71d 100644 --- a/test_data/dataset_defs/points_with_time.json +++ b/test_data/dataset_defs/points_with_time.json @@ -1,9 +1,6 @@ { "properties": { - "id": { - "type": "internal", - "datasetId": "b526b59c-19f1-4d99-8773-5364b757f6d8" - }, + "id": "b526b59c-19f1-4d99-8773-5364b757f6d8", "name": "Points with Time", "description": "Some points in Europe in 2014", "sourceOperator": "OgrSource" diff --git a/test_data/layer_defs/ports_in_germany.json b/test_data/layer_defs/ports_in_germany.json index 778bdecc7..b14aabca1 100644 --- a/test_data/layer_defs/ports_in_germany.json +++ b/test_data/layer_defs/ports_in_germany.json @@ -11,7 +11,7 @@ "points": { "type": "OgrSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "a9623a5b-b6c5-404b-bc5a-313ff72e4e75" }, @@ -22,7 +22,7 @@ "polygons": { "type": "OgrSource", "params": { - "dataset": { + "data": { "type": "internal", "datasetId": "b6191257-6d61-4c6b-90a4-ebfb1b23899d" }, From 9cbb82b5bf19cc71e15d89276113ebb648371c5f Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Tue, 12 Jul 2022 18:54:32 +0200 Subject: [PATCH 17/21] change dataset route --- services/src/handlers/datasets.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/services/src/handlers/datasets.rs b/services/src/handlers/datasets.rs index 599da9796..3ef6ae945 100644 --- a/services/src/handlers/datasets.rs +++ b/services/src/handlers/datasets.rs @@ -49,9 +49,7 @@ where web::scope("/dataset") .service(web::resource("").route(web::post().to(create_dataset_handler::))) .service(web::resource("/auto").route(web::post().to(auto_create_dataset_handler::))) - .service( - web::resource("/internal/{dataset}").route(web::get().to(get_dataset_handler::)), - ) + .service(web::resource("/{dataset}").route(web::get().to(get_dataset_handler::))) .service( web::resource("/suggest").route(web::get().to(suggest_meta_data_handler::)), ), From e7f6ffdd196a0adc324e247641fedad25f9fdbf2 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Wed, 13 Jul 2022 15:10:03 +0200 Subject: [PATCH 18/21] update changelog --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9af172527..536253767 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added a layers API that allows browsing datasets, stored layers and external data in a uniform fashion + + - https://github.com/geo-engine/geoengine/pull/554 + - Added a `ClassHistogram` plot operator for creating histograms of categorical data - https://github.com/geo-engine/geoengine/pull/560 @@ -21,6 +25,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Refactored dataset ids and external provders + + - https://github.com/geo-engine/geoengine/pull/554 + - **breaking** the parameters of the source operators changed which makes old workflow jsons incompatible + - **breaking** the id of datasets changed which makes old dataset definition jsons incompatible + - Added `Measurement`s to vector data workflows - https://github.com/geo-engine/geoengine/pull/557 From 38dc9a378d58617ef645a020a0ef708d772cb72b Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Thu, 14 Jul 2022 15:02:20 +0200 Subject: [PATCH 19/21] fix fmt --- services/src/handlers/workflows.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/services/src/handlers/workflows.rs b/services/src/handlers/workflows.rs index 7b1a15e0d..45a0933e7 100755 --- a/services/src/handlers/workflows.rs +++ b/services/src/handlers/workflows.rs @@ -252,7 +252,13 @@ async fn resolve_provenance( ) -> Result { match id { DataId::Internal { dataset_id } => datasets.provenance(session, dataset_id).await, - DataId::External(e) => providers.layer_provider(e.provider_id).await?.provenance(id).await, + DataId::External(e) => { + providers + .layer_provider(e.provider_id) + .await? + .provenance(id) + .await + } } } From a3453eeb956e7d6b4eaec26c62b765771acffb36 Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Thu, 14 Jul 2022 17:11:27 +0200 Subject: [PATCH 20/21] fix test --- services/src/handlers/datasets.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/src/handlers/datasets.rs b/services/src/handlers/datasets.rs index 3ef6ae945..f96e33e63 100644 --- a/services/src/handlers/datasets.rs +++ b/services/src/handlers/datasets.rs @@ -1589,7 +1589,7 @@ mod tests { .await?; let req = actix_web::test::TestRequest::get() - .uri(&format!("/dataset/internal/{}", id)) + .uri(&format!("/dataset/{}", id)) .append_header((header::CONTENT_LENGTH, 0)) .append_header((header::AUTHORIZATION, Bearer::new(session_id.to_string()))); let res = send_test_request(req, ctx).await; From cd031d8e53276d44a0cbc5274585814b468876bc Mon Sep 17 00:00:00 2001 From: Michael Mattig Date: Thu, 14 Jul 2022 17:55:19 +0200 Subject: [PATCH 21/21] fix doctest --- operators/src/source/gdal_source/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operators/src/source/gdal_source/mod.rs b/operators/src/source/gdal_source/mod.rs index de1777436..520621dbb 100755 --- a/operators/src/source/gdal_source/mod.rs +++ b/operators/src/source/gdal_source/mod.rs @@ -57,7 +57,7 @@ mod loading_info; /// ```rust /// use serde_json::{Result, Value}; /// use geoengine_operators::source::{GdalSource, GdalSourceParameters}; -/// use geoengine_datatypes::dataset::DataId; +/// use geoengine_datatypes::dataset::{DatasetId, DataId}; /// use geoengine_datatypes::util::Identifier; /// use std::str::FromStr; ///