Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

copernicus data provider #972

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
868306a
initial version of a copernicus data provider
michaelmattig Jul 19, 2024
8c049be
refactor ids and prepare sentinel2 metadata
michaelmattig Jul 22, 2024
540af42
stac access
michaelmattig Jul 23, 2024
70a33df
prepare gdal dataset parameters for loading info
michaelmattig Jul 24, 2024
b3cb979
add drivers for copernicus Sentinel-2
michaelmattig Jul 26, 2024
c77da4b
add gitignore for provider definition with access keys
michaelmattig Jul 26, 2024
b99cd81
data loading (wip)
michaelmattig Jul 26, 2024
3781d44
clean up
michaelmattig Jul 31, 2024
09276e9
Merge branch 'main' of github.com:geo-engine/geoengine into copernicu…
michaelmattig Jul 31, 2024
a14f44c
Merge branch 'main' of github.com:geo-engine/geoengine into copernicu…
michaelmattig Jul 31, 2024
be7b815
Merge branch 'copernicus-provider' of github.com:geo-engine/geoengine…
michaelmattig Jul 31, 2024
ee986a5
fix tests
michaelmattig Jul 31, 2024
d3cf035
testing wip
michaelmattig Jul 31, 2024
9e9b5d8
complete test for loading info
michaelmattig Aug 2, 2024
3636a4c
Merge branch 'main' of github.com:geo-engine/geoengine into copernicu…
michaelmattig Sep 6, 2024
8ef4eaf
simplify test
michaelmattig Sep 6, 2024
668ccbe
build dependencies manually instead of using cargo
michaelmattig Sep 9, 2024
5c4538e
remove vsi curl chunk size and fix tests
michaelmattig Sep 10, 2024
8d1a08c
Merge branch 'main' of github.com:geo-engine/geoengine into copernicu…
michaelmattig Sep 11, 2024
f1e31d5
configurable gdal config for copernicus provider
michaelmattig Sep 12, 2024
fb2de76
Merge branch 'main' of github.com:geo-engine/geoengine into copernicu…
michaelmattig Sep 12, 2024
a7fbe6d
distinguish between Sentinel 2 products
michaelmattig Oct 23, 2024
78cc4ae
add time and space bounds
michaelmattig Oct 23, 2024
6951b5e
Merge branch 'main' of github.com:geo-engine/geoengine into copernicu…
michaelmattig Oct 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,499 changes: 33 additions & 1,466 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Settings-default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ allowed_drivers = [
"HDF4",
"HDF5Image",
"HTTP",
"JP2OpenJPEG",
"KML",
"MEM",
"Memory",
Expand All @@ -126,6 +127,7 @@ allowed_drivers = [
"PostGISRaster",
"PostgreSQL",
"SAFE",
"SENTINEL2",
"SQLite",
"STACIT",
"TopoJSON",
Expand Down
16 changes: 15 additions & 1 deletion datatypes/src/plots/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,27 @@ pub trait Plot {
// fn to_png(&self, width_px: u16, height_px: u16) -> Vec<u8>;
}

#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Serialize)]
#[derive(Debug, Clone, Deserialize, Eq, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct PlotData {
pub vega_string: String,
pub metadata: PlotMetaData,
}

impl PartialEq for PlotData {
fn eq(&self, other: &Self) -> bool {
let vega_equals = match (
serde_json::from_str::<serde_json::Value>(&self.vega_string),
serde_json::from_str::<serde_json::Value>(&other.vega_string),
) {
(Ok(v1), Ok(v2)) => v1 == v2, // if the vega_string is valid JSON, compare the JSON values to avoid formatting differences
_ => self.vega_string == other.vega_string,
};

vega_equals && self.metadata == other.metadata
}
}

#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Serialize, Default)]
#[serde(untagged)]
pub enum PlotMetaData {
Expand Down
10 changes: 10 additions & 0 deletions datatypes/src/util/db_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ impl<S: std::hash::BuildHasher + std::default::Default> From<HashMapTextTextDbTy
#[derive(PartialEq, Eq, Debug, Clone)]
pub struct StringPair((String, String));

impl StringPair {
pub fn new(a: String, b: String) -> Self {
Self((a, b))
}

pub fn into_inner(self) -> (String, String) {
self.0
}
}

impl ToSql for StringPair {
fn to_sql(
&self,
Expand Down
1 change: 0 additions & 1 deletion expression/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ documentation.workspace = true
repository.workspace = true

[dependencies]
cargo = "0.76" # upgrade includes `libsqlite3-sys` which leads to SIGSEGV with GDAL
geoengine-expression-deps = { path = "deps-workspace" }
libloading = "0.8"
log = "0.4"
Expand Down
75 changes: 19 additions & 56 deletions expression/src/dependencies.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
use crate::error::{self, ExpressionExecutionError};
use cargo::{
core::{
compiler::{BuildConfig, MessageFormat},
Shell, Workspace,
},
ops::CompileOptions,
util::command_prelude::CompileMode,
};
use snafu::{OptionExt, ResultExt, Whatever};
use snafu::ResultExt;
use std::path::{Path, PathBuf};

pub type Result<T, E = ExpressionExecutionError> = std::result::Result<T, E>;
Expand All @@ -33,11 +25,25 @@ impl ExpressionDependencies {

Self::copy_deps_workspace(cargo_workspace.path()).context(error::DepsWorkspace)?;

let dependencies = Self::cargo_build(cargo_workspace.path()).map_err(|e| {
ExpressionExecutionError::DepsBuild {
// build the dependencies using cargo throuhh a subprocess
// note, that we do not use the cargo crate here, because it led to a deadlock in tests
let output = std::process::Command::new("cargo")
.current_dir(cargo_workspace.path())
.arg("build")
.arg("--release")
.arg("--frozen")
.output()
.map_err(|e| ExpressionExecutionError::DepsBuild {
debug: format!("{e:?}"),
}
})?;
})?;

if !output.status.success() {
return Err(ExpressionExecutionError::DepsBuild {
debug: String::from_utf8_lossy(&output.stderr).to_string(),
});
}

let dependencies = cargo_workspace.path().join("target/release/deps/");

Ok(Self {
_cargo_workspace: cargo_workspace,
Expand All @@ -55,49 +61,6 @@ impl ExpressionDependencies {
std::fs::write(cargo_workspace.join("lib.rs"), DEPS_LIB_RS)?;
Ok(())
}

/// Builds the dependencies workspace.
/// We will use the libraries in the `target/release/deps/` folder.
///
/// We return [`Whatever`] since [`cargo::util::errors::CargoResult`] has a lifetime.
///
fn cargo_build(cargo_workspace: &Path) -> Result<PathBuf, Whatever> {
let homedir = cargo::util::homedir(cargo_workspace)
.whatever_context("Could not find home directory, e.g. $HOME")?;

// TODO: make shell output configurable?
let dev_null_shell = Shell::from_write(Box::new(std::io::empty()));
let cargo_config =
cargo::util::config::Config::new(dev_null_shell, cargo_workspace.into(), homedir);

let workspace = Workspace::new(&cargo_workspace.join("Cargo.toml"), &cargo_config)
.whatever_context("Invalid workspace")?;

let mut build_config = BuildConfig::new(&cargo_config, None, true, &[], CompileMode::Build)
.whatever_context("Invalid cargo build config")?;
build_config.requested_profile = "release".into();
build_config.message_format = MessageFormat::Short;

let mut compile_options = CompileOptions::new(&cargo_config, CompileMode::Build)
.whatever_context("Invalid compile options")?;
compile_options.build_config = build_config;

let compilation_result = cargo::ops::compile(&workspace, &compile_options)
.whatever_context("Compilation failed")?;

debug_assert_eq!(
compilation_result.deps_output.keys().len(),
1,
"Expected only one deps output"
);

compilation_result
.deps_output
.values()
.next()
.cloned()
.whatever_context("Missing deps output")
}
}

#[cfg(test)]
Expand Down
12 changes: 4 additions & 8 deletions operators/src/plot/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ mod tests {
.unwrap();

assert_eq!(
result.to_string(),
result,
json!({
"Raster-1": {
"valueCount": 66_246, // 362*183 Note: this is caused by the inclusive nature of the bounding box. Since the right and lower bounds are included this wraps to a new row/column of tiles. In this test the tiles are 3x2 pixels in size.
Expand All @@ -833,7 +833,6 @@ mod tests {
"percentiles": [],
},
})
.to_string()
);
}

Expand Down Expand Up @@ -934,7 +933,7 @@ mod tests {
.unwrap();

assert_eq!(
result.to_string(),
result,
json!({
"A": {
"valueCount": 66_246, // 362*183 Note: this is caused by the inclusive nature of the bounding box. Since the right and lower bounds are included this wraps to a new row/column of tiles. In this test the tiles are 3x2 pixels in size.
Expand All @@ -955,7 +954,6 @@ mod tests {
"percentiles": [],
},
})
.to_string()
);
}

Expand Down Expand Up @@ -1117,7 +1115,7 @@ mod tests {
.unwrap();

assert_eq!(
result.to_string(),
result,
json!({
"foo": {
"valueCount": 7,
Expand All @@ -1138,7 +1136,6 @@ mod tests {
"percentiles": [],
},
})
.to_string()
);
}

Expand Down Expand Up @@ -1308,7 +1305,7 @@ mod tests {
.unwrap();

assert_eq!(
result.to_string(),
result,
json!({
"foo": {
"valueCount": 7,
Expand All @@ -1329,7 +1326,6 @@ mod tests {
"percentiles": [],
},
})
.to_string()
);
}

Expand Down
6 changes: 2 additions & 4 deletions operators/src/util/gdal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ use geoengine_datatypes::{
util::Identifier,
};
use itertools::Itertools;
use log::Level::Debug;
use log::{debug, log_enabled};
use snafu::ResultExt;

use crate::{
Expand Down Expand Up @@ -375,10 +373,10 @@ pub fn register_gdal_drivers_from_list<S: BuildHasher>(mut drivers: HashSet<Stri
}
}

if !drivers.is_empty() && log_enabled!(Debug) {
if !drivers.is_empty() {
let mut drivers: Vec<String> = drivers.into_iter().collect();
drivers.sort();
let remaining_drivers = drivers.into_iter().join(", ");
debug!("Could not register drivers: {remaining_drivers}");
log::warn!("Could not register drivers: {remaining_drivers}");
}
}
2 changes: 2 additions & 0 deletions services/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ serde_json = "1.0"
serde_urlencoded = "0.7"
serde_with = "3.6"
snafu = "0.8"
stac = "0.7"
stream-cancel = "0.8"
strum = { version = "0.26", features = ["derive"] }
strum_macros = "0.26"
time = "0.3"
tokio = { version = "1.36", features = [
"macros",
Expand Down
6 changes: 6 additions & 0 deletions services/src/api/model/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,12 @@ impl From<StringPair> for (String, String) {
}
}

impl From<StringPair> for geoengine_datatypes::util::StringPair {
fn from(value: StringPair) -> Self {
Self::new(value.0 .0, value.0 .1)
}
}

#[derive(Debug, Clone, Deserialize, PartialEq, Eq, Serialize, ToSchema)]
pub enum PlotOutputFormat {
JsonPlain,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
use async_trait::async_trait;
use tokio_postgres::Transaction;

use crate::error::Result;

use super::database_migration::{DatabaseVersion, Migration};

/// This migration the Copernicus provider
pub struct Migration0013CopernicusProvider;

#[async_trait]
impl Migration for Migration0013CopernicusProvider {
fn prev_version(&self) -> Option<DatabaseVersion> {
Some("0012_ml_model_db".into())
}

fn version(&self) -> DatabaseVersion {
"0013_copernicus_provider".into()
}

async fn migrate(&self, _tx: &Transaction<'_>) -> Result<()> {
// provider only exists in pro
Ok(())
}
}
3 changes: 3 additions & 0 deletions services/src/contexts/migrations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub use crate::contexts::migrations::{
migration_0010_s2_stack_time_buffers::Migration0010S2StacTimeBuffers,
migration_0011_remove_xgb::Migration0011RemoveXgb,
migration_0012_ml_model_db::Migration0012MlModelDb,
migration_0013_copernicus_provider::Migration0013CopernicusProvider,
};
pub use database_migration::{
initialize_database, migrate_database, DatabaseVersion, Migration, MigrationResult,
Expand All @@ -32,6 +33,7 @@ pub mod migration_0009_oidc_tokens;
pub mod migration_0010_s2_stack_time_buffers;
pub mod migration_0011_remove_xgb;
pub mod migration_0012_ml_model_db;
pub mod migration_0013_copernicus_provider;

#[cfg(test)]
mod schema_info;
Expand All @@ -58,6 +60,7 @@ pub fn all_migrations() -> Vec<Box<dyn Migration>> {
Box::new(Migration0010S2StacTimeBuffers),
Box::new(Migration0011RemoveXgb),
Box::new(Migration0012MlModelDb),
Box::new(Migration0013CopernicusProvider),
]
}

Expand Down
2 changes: 1 addition & 1 deletion services/src/contexts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pub use migrations::{
Migration0004DatasetListingProviderPrio, Migration0005GbifColumnSelection,
Migration0006EbvProvider, Migration0007OwnerRole, Migration0008BandNames,
Migration0009OidcTokens, Migration0010S2StacTimeBuffers, Migration0011RemoveXgb,
Migration0012MlModelDb, MigrationResult,
Migration0012MlModelDb, Migration0013CopernicusProvider, MigrationResult,
};
pub use postgres::{PostgresContext, PostgresDb, PostgresSessionContext};
pub use session::{MockableSession, Session, SessionId, SimpleSession};
Expand Down
Loading
Loading