diff --git a/Cargo.toml b/Cargo.toml index 1b530e9..55fa3a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,10 @@ authors = [] edition = "2021" publish = false +[[bin]] +name = "migrate" +path = "src/bin/migrate.rs" + [dependencies] anyhow = { version = "1.0.81", features = ["backtrace"] } async-trait = "0.1.82" diff --git a/src/bin/migrate.rs b/src/bin/migrate.rs new file mode 100644 index 0000000..430ad4c --- /dev/null +++ b/src/bin/migrate.rs @@ -0,0 +1,299 @@ +//! This binary is used to migrate user IDs from base64 to hex encoding. +use std::{path::Path, str::FromStr}; + +use anyhow::{Context, Result}; +use famedly_sync::{ + get_next_zitadel_user, + user::{ExternalIdEncoding, User as SyncUser}, + zitadel::Zitadel as SyncZitadel, + Config, +}; +use tracing::level_filters::LevelFilter; + +#[tokio::main] +async fn main() -> Result<()> { + // Config + let config_path = + std::env::var("FAMEDLY_SYNC_CONFIG").unwrap_or_else(|_| "./config.yaml".to_owned()); + let config = Config::new(Path::new(&config_path))?; + + // Tracing + let subscriber = tracing_subscriber::FmtSubscriber::builder() + .with_max_level( + config + .log_level + .as_ref() + .map_or(Ok(LevelFilter::INFO), |s| LevelFilter::from_str(s))?, + ) + .finish(); + tracing::subscriber::set_global_default(subscriber) + .context("Setting default tracing subscriber failed")?; + + tracing::info!("Starting migration"); + tracing::debug!("Old external IDs will be base64 decoded and re-encoded as hex"); + tracing::debug!("Note: External IDs are stored in the nick_name field of the user's profile in Zitadel, often referred to as uid."); + + // Zitadel + let mut zitadel = SyncZitadel::new(&config).await?; + + // Detect external ID encoding based on a sample of users + let users_sample = zitadel.get_users_sample().await?; + let encoding = detect_database_encoding(users_sample); + + // Get a stream of all users + let mut stream = zitadel.list_users()?; + + // Process each user + while let Some((user, zitadel_id)) = get_next_zitadel_user(&mut stream, &mut zitadel).await? { + tracing::info!(?user, "Starting migration for user"); + + // Convert uid (=external ID, =nick_name) in Zitadel + let updated_user = user.create_user_with_converted_external_id(encoding)?; + tracing::debug!(?updated_user, "User updated"); + + zitadel.update_user(&zitadel_id, &user, &updated_user).await?; + + tracing::info!(?user, ?updated_user, "User migrated"); + } + + tracing::info!("Migration completed."); + Ok(()) +} + +/// Detects the most likely encoding scheme used across all user IDs +fn detect_database_encoding(users: Vec) -> ExternalIdEncoding { + // Count various encoding signatures + let mut hex_count = 0; + let mut base64_count = 0; + let mut total = 0; + + for user in users { + let nick_name = user.get_external_id(); + + if nick_name.is_empty() { + continue; + } + total += 1; + + // Check hex first (more restrictive) + if nick_name.chars().all(|c| c.is_ascii_hexdigit()) && nick_name.len() % 2 == 0 { + hex_count += 1; + } + + // Check base64 signature + if nick_name.len() % 4 == 0 + && nick_name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '=') + { + base64_count += 1; + } + } + + // Return early if no valid samples + if total == 0 { + return ExternalIdEncoding::Ambiguous; + } + + // Use thresholds to determine encoding + let hex_ratio = f64::from(hex_count) / f64::from(total); + let base64_ratio = f64::from(base64_count) / f64::from(total); + + // Require a strong majority (90%) for a format to be considered dominant + // Also detect when both formats have significant presence + match (hex_ratio, base64_ratio) { + (h, _) if h > 0.9 => ExternalIdEncoding::Hex, + (_, b) if b > 0.9 => ExternalIdEncoding::Base64, + (h, b) if h > 0.2 && b > 0.2 => ExternalIdEncoding::Ambiguous, // Both formats present + _ => ExternalIdEncoding::Ambiguous, // No clear dominant format + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ExternalIdEncoding, SyncUser}; + + fn create_test_user(external_user_id: &str) -> SyncUser { + SyncUser::new( + "first name".to_owned(), + "last name".to_owned(), + "email@example.com".to_owned(), + None, + true, + None, + external_user_id.to_owned(), + ) + } + + fn run_detection_test(user_ids: Vec<&str>, expected_encoding: ExternalIdEncoding) { + let users: Vec = user_ids + .into_iter() + .map(create_test_user) // Assuming SyncUser::new(&str) exists + .collect(); + + let detected = detect_database_encoding(users); + assert_eq!( + detected, expected_encoding, + "Expected {:?} but got {:?}", + expected_encoding, detected + ); + } + + fn run_conversion_test( + original_id: &str, + expected_encoding: ExternalIdEncoding, + expected_result: &str, + ) { + let user = create_test_user(original_id); + let migrated_user = user + .create_user_with_converted_external_id(expected_encoding) + .expect("Should successfully convert user"); + assert_eq!( + migrated_user.get_external_id(), + expected_result, + "Unexpected conversion result" + ); + } + + #[tokio::test] + async fn test_all_hex() { + // All users look like hex: "deadbeef", "cafebabe", "0123456789abcdef" + let user_ids = vec!["deadbeef", "cafebabe", "0123456789abcdef"]; + run_detection_test(user_ids, ExternalIdEncoding::Hex); + } + + #[tokio::test] + async fn test_all_base64() { + // All users look like base64: "Y2FmZQ==", "Zm9v", "YmFy" + // "Y2FmZQ==" decodes to "cafe" + // "Zm9v" decodes to "foo" + // "YmFy" decodes to "bar" + // All are valid base64 and length % 4 == 0 + let user_ids = vec!["Y2FmZQ==", "Zm9v", "YmFy"]; + run_detection_test(user_ids, ExternalIdEncoding::Base64); + } + + #[tokio::test] + async fn test_mixed_ambiguous() { + // Some look hex, all look base64 + let user_ids = vec!["cafebabe", "deadbeef", "beefcafe", "Y2FmZQ==", "Zm9v", "YmFy"]; + run_detection_test(user_ids, ExternalIdEncoding::Base64); + } + + #[tokio::test] + async fn test_edge_length_cases() { + // "cafe" is ambiguous (valid hex and base64) + // "cafeb" length is 5, not divisible by 2 or 4, so neither hex nor base64 + // "abc" length is 3, not divisible by 4, and 'c' is hex valid but odd length -> + // not hex. + let user_ids = vec!["cafe", "cafeb", "abc"]; + // "cafe" might count for both hex and base64, but "cafeb" and "abc" won't count + // for either. Out of 3, maybe 1 counts as hex/base64 and 2 are plain. Ratios: + // hex = 1/3 ≈ 0.33, base64 = 1/3 ≈ 0.33, both < 0.8. + run_detection_test(user_ids, ExternalIdEncoding::Ambiguous); + } + + #[tokio::test] + async fn test_invalid_characters() { + // "zzz" is not hex. It's also not base64-safe (though 'z' is alphanumeric, + // length=3 %4!=0) "+++" is not hex and length=3 not multiple of 4 for base64. + let user_ids = vec!["zzz", "+++"]; + run_detection_test(user_ids, ExternalIdEncoding::Ambiguous); + } + + #[tokio::test] + async fn test_both_formats_significant() { + // 10 total users: + // - 3 hex (30%) + // - 4 base64 (40%) + // - 3 plain (30%) + let user_ids = vec![ + // Hex format users (30%) + "deadbeef", "cafebabe", "12345678", + // Base64 format users (40%) + "Y2FmZQ==", // "cafe" + "Zm9vYmFy", // "foobar" + "aGVsbG8=", // "hello" + "d29ybGQ=", // "world" + // Plain format users (30%) + "plain_1", "plain_2", "plain_3", + ]; + + // Both hex (30%) and base64 (40%) > 20% threshold + // Neither > 90% threshold + // Should detect as Ambiguous + run_detection_test(user_ids, ExternalIdEncoding::Ambiguous); + } + + #[tokio::test] + async fn test_near_threshold_hex() { + // Testing near 90% threshold for hex + // 9 hex users and 1 plain = 90% exactly + let user_ids = vec![ + "deadbeef", "cafebabe", "beefcafe", "12345678", "87654321", "abcdef12", "34567890", + "98765432", "fedcba98", "plain_id", + ]; + // hex_ratio = 9/10 = 0.9 + // Code requires > 0.9, not >=, so this should be Ambiguous + run_detection_test(user_ids, ExternalIdEncoding::Ambiguous); + } + + #[tokio::test] + async fn test_near_threshold_base64() { + // Testing near 90% threshold for base64 + // 9 base64 users and 1 plain = 90% exactly + let user_ids = vec![ + "Y2FmZQ==", // cafe + "Zm9vYmFy", // foobar + "aGVsbG8=", // hello + "d29ybGQ=", // world + "dGVzdA==", // test + "YWJjZA==", // abcd + "eHl6Nzg=", // xyz78 + "cXdlcnQ=", // qwert + "MTIzNDU=", // 12345 + "plain_id", + ]; + // base64_ratio = 9/10 = 0.9 + // Code requires > 0.9, not >=, so this should be Ambiguous + run_detection_test(user_ids, ExternalIdEncoding::Ambiguous); + } + + #[tokio::test] + async fn test_empty_ids() { + // Empty IDs should be skipped. Only one non-empty user which is hex. + // hex_count=1, total=1 => ratio=1.0 > 0.8 => Hex + let user_ids = vec!["", "", "cafebabe"]; + run_detection_test(user_ids, ExternalIdEncoding::Hex); + } + + // + // Conversion Tests + // + + #[tokio::test] + async fn test_conversion_hex_to_hex() { + let original_id = "deadbeef"; + // Expected hex, no changes should be made. + run_conversion_test(original_id, ExternalIdEncoding::Hex, "deadbeef"); + } + + #[tokio::test] + async fn test_conversion_base64_to_hex() { + let original_id = "Y2FmZQ=="; // "cafe" + + // Expected base64, we decode base64 => "cafe" and then hex encode the bytes of + // "cafe". "cafe" as ASCII: 0x63 0x61 0x66 0x65 in hex is "63616665" + run_conversion_test(original_id, ExternalIdEncoding::Base64, "63616665"); + } + + #[tokio::test] + async fn test_conversion_plain_to_hex() { + let original_id = "plain_id"; + // Expected plain without encoding, so just hex-encode the ASCII. + // 'p' = 0x70, 'l' = 0x6c, 'a' = 0x61, 'i' = 0x69, 'n' = 0x6e, '_'=0x5f, + // 'i'=0x69, 'd'=0x64 => "706c61696e5f6964" + run_conversion_test(original_id, ExternalIdEncoding::Plain, "706c61696e5f6964"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 939b603..de0be42 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,8 +6,8 @@ use zitadel::Zitadel; mod config; mod sources; -mod user; -mod zitadel; +pub mod user; +pub mod zitadel; use std::collections::VecDeque; @@ -21,7 +21,7 @@ use sources::{csv::CsvSource, ldap::LdapSource, ukt::UktSource, Source}; /// Helper function to add metadata to streamed zitadel users // TODO: If async closures become a reality, this should be factored // into the `zitadel::search_result_to_user` function -async fn get_next_zitadel_user( +pub async fn get_next_zitadel_user( stream: &mut (impl Stream> + Send + Unpin), zitadel: &mut Zitadel, ) -> Result> { diff --git a/src/user.rs b/src/user.rs index 19d2abd..b975e77 100644 --- a/src/user.rs +++ b/src/user.rs @@ -1,14 +1,28 @@ //! User data helpers use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose, Engine as _}; use uuid::{uuid, Uuid}; use zitadel_rust_client::v2::users::HumanUser; /// The Famedly UUID namespace to use to generate v5 UUIDs. const FAMEDLY_NAMESPACE: Uuid = uuid!("d9979cff-abee-4666-bc88-1ec45a843fb8"); +/// The encoding of the external ID in the database +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ExternalIdEncoding { + /// The external ID is stored as a hex string + Hex, + /// The external ID is stored as a base64 string + Base64, + /// The external ID is stored as a plain string + Plain, + /// The encoding could not be determined + Ambiguous, +} + /// Source-agnostic representation of a user #[derive(Clone)] -pub(crate) struct User { +pub struct User { /// The user's first name pub(crate) first_name: String, /// The user's last name @@ -26,6 +40,20 @@ pub(crate) struct User { } impl User { + /// Create a new user instance, used in tests + #[allow(clippy::must_use_candidate)] + pub fn new( + first_name: String, + last_name: String, + email: String, + phone: Option, + enabled: bool, + preferred_username: Option, + external_user_id: String, + ) -> Self { + Self { first_name, last_name, email, phone, enabled, preferred_username, external_user_id } + } + /// Convert a Zitadel user to our internal representation pub fn try_from_zitadel_user(user: HumanUser, external_id: String) -> Result { let first_name = user @@ -60,10 +88,17 @@ impl User { } /// Get a display name for this user + #[must_use] pub fn get_display_name(&self) -> String { format!("{}, {}", self.last_name, self.first_name) } + /// Get the external user ID + #[must_use] + pub fn get_external_id(&self) -> &str { + &self.external_user_id + } + /// Get the external user ID in raw byte form pub fn get_external_id_bytes(&self) -> Result> { // This looks ugly at a glance, since we get the original @@ -78,6 +113,92 @@ impl User { pub fn get_famedly_uuid(&self) -> Result { Ok(Uuid::new_v5(&FAMEDLY_NAMESPACE, self.get_external_id_bytes()?.as_slice()).to_string()) } + + /// Convert external user ID to a new format based on the detected encoding + pub fn create_user_with_converted_external_id( + &self, + expected_encoding: ExternalIdEncoding, + ) -> Result { + // Double check the encoding + let detected_encoding = match &self.external_user_id { + s if s.is_empty() => { + tracing::warn!(?self, "Skipping user due to empty uid"); + return Ok(self.clone()); + } + s if s.chars().all(|c| c.is_ascii_hexdigit()) && s.len() % 2 == 0 => { + // Looks like hex encoding + if expected_encoding != ExternalIdEncoding::Hex { + tracing::warn!( + ?self, + ?expected_encoding, + detected_encoding = ?ExternalIdEncoding::Hex, + "Encoding mismatch detected" + ); + } + ExternalIdEncoding::Hex + } + s if s + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '=') + && s.len() % 4 == 0 => + { + // Looks like base64 encoding + if expected_encoding != ExternalIdEncoding::Base64 { + tracing::warn!( + ?self, + ?expected_encoding, + detected_encoding = ?ExternalIdEncoding::Base64, + "Encoding mismatch detected" + ); + } + ExternalIdEncoding::Base64 + } + _ => { + // Plain or unknown encoding + if expected_encoding != ExternalIdEncoding::Plain { + tracing::warn!( + ?self, + ?expected_encoding, + detected_encoding = ?ExternalIdEncoding::Plain, + "Encoding mismatch detected" + ); + } + ExternalIdEncoding::Plain + } + }; + + let new_external_id = match expected_encoding { + ExternalIdEncoding::Hex => self.external_user_id.clone(), + ExternalIdEncoding::Base64 => decode_base64_or_fallback( + &self.external_user_id, + "Failed to decode base64 ID despite database heuristic", + ), + ExternalIdEncoding::Plain => hex::encode(self.external_user_id.as_bytes()), + ExternalIdEncoding::Ambiguous => { + tracing::warn!( + ?self, + "Using case-by-case detected encoding due to ambiguous expected encoding" + ); + match detected_encoding { + ExternalIdEncoding::Hex => self.external_user_id.clone(), + ExternalIdEncoding::Base64 => decode_base64_or_fallback( + &self.external_user_id, + "Failed to decode base64 ID despite case-by-case handling", + ), + ExternalIdEncoding::Plain => hex::encode(self.external_user_id.as_bytes()), + ExternalIdEncoding::Ambiguous => { + tracing::error!( + ?self, + "Unreachable code? Ambiguous encoding detected despite case-by-case handling." + ); + unreachable!("Ambiguous encoding should not be detected here"); + } + } + } + }; + + Ok(Self { external_user_id: new_external_id, ..self.clone() }) + } } impl PartialEq for User { @@ -105,3 +226,14 @@ impl std::fmt::Debug for User { .finish() } } + +/// Helper function for base64 decoding with fallback +fn decode_base64_or_fallback(id: &str, warning_message: &str) -> String { + match general_purpose::STANDARD.decode(id) { + Ok(decoded) => hex::encode(decoded), + Err(_) => { + tracing::warn!(?id, "{}", warning_message); + hex::encode(id.as_bytes()) + } + } +} diff --git a/src/zitadel.rs b/src/zitadel.rs index 957d743..9c72748 100644 --- a/src/zitadel.rs +++ b/src/zitadel.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; use anyhow::{anyhow, Context, Result}; use base64::prelude::{Engine, BASE64_STANDARD}; use futures::{Stream, StreamExt}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use url::Url; use zitadel_rust_client::{ v1::Zitadel as ZitadelClientV1, @@ -20,6 +20,7 @@ use zitadel_rust_client::{ use crate::{ config::{Config, FeatureFlags}, + get_next_zitadel_user, user::User, FeatureFlag, }; @@ -27,9 +28,12 @@ use crate::{ /// The Zitadel project role to assign to users. const FAMEDLY_USER_ROLE: &str = "User"; +/// The number of users to sample for encoding detection +const USER_SAMPLE_SIZE: usize = 50; + /// A very high-level Zitadel zitadel_client -#[derive(Clone)] -pub(crate) struct Zitadel { +#[derive(Clone, Debug)] +pub struct Zitadel { /// Zitadel configuration zitadel_config: ZitadelConfig, /// Optional set of features @@ -43,7 +47,7 @@ pub(crate) struct Zitadel { impl Zitadel { /// Construct the Zitadel instance - pub(crate) async fn new(config: &Config) -> Result { + pub async fn new(config: &Config) -> Result { let zitadel_client = ZitadelClient::new(config.zitadel.url.clone(), config.zitadel.key_file.clone()) .await @@ -106,6 +110,36 @@ impl Zitadel { }) } + /// Return a vector of a random sample of Zitadel users + /// We use this to determine the encoding of the external IDs + pub async fn get_users_sample(&mut self) -> Result> { + let mut stream = self + .zitadel_client + .list_users( + ListUsersRequest::new(vec![ + SearchQuery::new().with_type_query(TypeQuery::new(Userv2Type::Human)) + ]) + .with_asc(true) + .with_sorting_column(UserFieldName::NickName) + .with_page_size(USER_SAMPLE_SIZE), + ) + .map(|stream| { + stream.map(|user| { + let id = user.user_id().ok_or(anyhow!("Missing Zitadel user ID"))?.clone(); + let user = search_result_to_user(user)?; + Ok((user, id)) + }) + })?; + + let mut users = Vec::new(); + + while let Some(user) = get_next_zitadel_user(&mut stream, self).await? { + users.push(user.0); + } + + Ok(users) + } + /// Delete a Zitadel user pub async fn delete_user(&mut self, zitadel_id: &str) -> Result<()> { tracing::info!("Deleting user with Zitadel ID: {}", zitadel_id); @@ -163,10 +197,7 @@ impl Zitadel { if self.feature_flags.is_enabled(FeatureFlag::SsoLogin) { user.set_idp_links(vec![IdpLink::new() - .with_user_id( - get_zitadel_encoded_id(imported_user.get_external_id_bytes()?) - .context("Failed to set IDP user ID")?, - ) + .with_user_id(get_zitadel_encoded_id(imported_user.get_external_id_bytes()?)) .with_idp_id(self.zitadel_config.idp_id.clone()) .with_user_name(imported_user.email.clone())]); } @@ -236,13 +267,15 @@ impl Zitadel { if old_user.first_name != updated_user.first_name || old_user.last_name != updated_user.last_name + || old_user.external_user_id != updated_user.external_user_id { request.set_profile( SetHumanProfile::new( updated_user.first_name.clone(), updated_user.last_name.clone(), ) - .with_display_name(updated_user.get_display_name()), + .with_display_name(updated_user.get_display_name()) + .with_nick_name(updated_user.external_user_id.clone()), ); } @@ -295,7 +328,7 @@ impl Zitadel { } /// Convert a Zitadel search result to a user -fn search_result_to_user(user: ZitadelUser) -> Result { +pub fn search_result_to_user(user: ZitadelUser) -> Result { let human_user = user.human().ok_or(anyhow!("Machine user found in human user search"))?; let nick_name = human_user .profile() @@ -319,16 +352,14 @@ fn search_result_to_user(user: ZitadelUser) -> Result { /// create collisions (although this is unlikely). /// /// Only use this for Zitadel support. -pub fn get_zitadel_encoded_id(external_id_bytes: Vec) -> Result { - Ok(if let Ok(encoded_id) = String::from_utf8(external_id_bytes.clone()) { - encoded_id - } else { - BASE64_STANDARD.encode(external_id_bytes) - }) +#[allow(clippy::must_use_candidate)] +pub fn get_zitadel_encoded_id(external_id_bytes: Vec) -> String { + String::from_utf8(external_id_bytes.clone()) + .unwrap_or_else(|_| BASE64_STANDARD.encode(external_id_bytes)) } /// Configuration related to Famedly Zitadel -#[derive(Debug, Clone, Deserialize, PartialEq)] +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] pub struct ZitadelConfig { /// The URL for Famedly Zitadel authentication pub url: Url, diff --git a/tests/e2e.rs b/tests/e2e.rs index a489b9f..f7923a5 100644 --- a/tests/e2e.rs +++ b/tests/e2e.rs @@ -4,12 +4,14 @@ /// E2E integration tests use std::{collections::HashSet, path::Path, time::Duration}; +use base64::{engine::general_purpose, Engine as _}; use famedly_sync::{ csv_test_helpers::temp_csv_file, - perform_sync, + get_next_zitadel_user, perform_sync, ukt_test_helpers::{ get_mock_server_url, prepare_endpoint_mock, prepare_oauth2_mock, ENDPOINT_PATH, OAUTH2_PATH, }, + zitadel::Zitadel as SyncZitadel, AttributeMapping, Config, FeatureFlag, }; use ldap3::{Ldap as LdapClient, LdapConnAsync, LdapConnSettings, Mod}; @@ -1395,6 +1397,213 @@ async fn test_e2e_sso_linking() { ); } +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_base64_id() { + let config = ldap_config().await; + cleanup_test_users(config).await; + + // The uid for this test must be such that encodes to such base64 string that + // doesn't look like hex. Otherwise, we need to have a sample of users so the + // script determines encoding heuristically. This is tested later in + // test_e2e_migrate_ambiguous_id + let uid = "base64_test"; + let email = "migrate_test@famedly.de"; + let user_name = "migrate_user"; + + // Base64-encoded External ID + let base64_id = general_purpose::STANDARD.encode(uid); + + run_migration_test(config, email, user_name, base64_id.clone(), hex::encode(uid.as_bytes())) + .await; +} + +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_plain_id() { + let config = ldap_config().await; + cleanup_test_users(config).await; + + let uid = "plain_test"; + let email = "plain_test@famedly.de"; + let user_name = "plain_user"; + + // Plain unencoded External ID + let plain_id = uid.to_owned(); + + run_migration_test(config, email, user_name, plain_id.clone(), hex::encode(uid.as_bytes())) + .await; +} + +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_hex_id() { + let config = ldap_config().await; + cleanup_test_users(config).await; + + let uid = "hex_test"; + let email = "hex_test@famedly.de"; + let user_name = "hex_user"; + + // Already hex-encoded External ID + let hex_id = hex::encode(uid.as_bytes()); + + run_migration_test(config, email, user_name, hex_id.clone(), hex_id.clone()).await; +} + +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_empty_id() { + let config = ldap_config().await; + + let email = "empty_id@famedly.de"; + let user_name = "empty_user"; + + // Empty External ID + let empty_id = "".to_owned(); + + run_migration_test(config, email, user_name, empty_id.clone(), empty_id.clone()).await; +} + +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_ambiguous_id_as_base64() { + let config = ldap_config().await; + cleanup_test_users(config).await; + + let email = "ambiguous_id@famedly.de"; + let user_name = "ambiguous_user_one"; + + // "cafe" is hex (ca fe) and also appears as valid base64 + // (all alphanumeric and length % 4 == 0) + let ambiguous_id = "cafe".to_owned(); + + // The migration logic should decide to treat it as hex when looking at it on + // its own, because we check for hex first (it's a subset of base64 and thus + // more restrictive) + let expected_id = ambiguous_id.clone(); + run_migration_test(config, email, user_name, ambiguous_id, expected_id).await; + + // When we create some base64-only encoded values in the database, the migration + // logic should heuristically find out, that the DB has external IDs encoded + // with base64 and thus treat the ambiguous ID as base64 even though it can be + // both base64 and hex + let base_64_user = ImportHumanUserRequest { + user_name: "another_test".to_owned(), + profile: Some(Profile { + first_name: "Test".to_owned(), + last_name: "User".to_owned(), + display_name: "User, Test".to_owned(), + gender: Gender::Unspecified.into(), + nick_name: "Z9FmZQ==".to_owned(), // base64 encoded + preferred_language: String::default(), + }), + email: Some(Email { + email: "another_test@example.com".to_owned(), + is_email_verified: true, + }), + phone: Some(Phone { phone: "+12345678901".to_owned(), is_phone_verified: true }), + password: String::default(), + hashed_password: None, + password_change_required: false, + request_passwordless_registration: false, + otp_code: String::default(), + idps: vec![], + }; + + let zitadel = open_zitadel_connection().await; + let temp_user = zitadel + .create_human_user(&config.zitadel.organization_id, base_64_user) + .await + .expect("Failed to create user"); + + let user_name = "ambiguous_user_two"; + + // "beefcafe" appears both as a valid hex and base64 + let ambiguous_id = "beefcafe".to_owned(); + + let decoded = + general_purpose::STANDARD.decode(&ambiguous_id).expect("Test ID should be valid base64"); + let expected_id = hex::encode(decoded); + + run_migration_test(config, email, user_name, ambiguous_id, expected_id).await; + + zitadel.remove_user(temp_user).await.expect("Failed to delete user"); +} + +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_then_ldap_sync() { + let config = ldap_config().await; + cleanup_test_users(config).await; + + let uid = "migrate_sync_test_ldap"; + let email = "migrate_sync_ldap@famedly.de"; + let user_name = "migrate_sync_user_ldap"; + + // Base64-encoded ID + let base64_id = general_purpose::STANDARD.encode(uid); + + run_migration_test(config, email, user_name, base64_id.clone(), hex::encode(uid.as_bytes())) + .await; + + // LDAP with updated First Name + let config = ldap_config().await; + let mut ldap = Ldap::new().await; + ldap.create_user( + "New First Name", + "User", + "User, Test", // !NOTE: Display name from LDAP isn't picked up by the sync + email, + Some("+12345678901"), + uid, + false, + ) + .await; + + perform_sync(config).await.expect("LDAP sync failed"); + + // Verify both External ID encoding and updated First Name + let zitadel = open_zitadel_connection().await; + let user = zitadel + .get_user_by_login_name(user_name) + .await + .expect("Failed to get user after LDAP sync") + .expect("User not found after LDAP sync"); + + match user.r#type { + Some(UserType::Human(human)) => { + let profile = human.profile.expect("User lacks profile after LDAP sync"); + let expected_hex_id = hex::encode(uid.as_bytes()); + assert_eq!( + profile.nick_name, expected_hex_id, + "External ID not in hex encoding after LDAP sync for user '{}'", + email + ); + assert_eq!( + profile.first_name, "New First Name", + "Fist name was not updated by LDAP sync for user '{}'", + email + ); + } + _ => panic!("User lacks human details after LDAP sync for user '{}'", email), + } +} + +#[test(tokio::test)] +#[test_log(default_log_filter = "debug")] +async fn test_e2e_migrate_dry_run() { + let mut dry_run_config = ldap_config().await.clone(); + dry_run_config.feature_flags.push(FeatureFlag::DryRun); + + let uid = "plain_test_dry_run"; + let email = "plain_test_dry_run@famedly.de"; + let user_name = "plain_user_dry_run"; + let plain_id = uid.to_owned(); + + run_migration_test(&dry_run_config, email, user_name, plain_id.clone(), plain_id).await; +} + struct Ldap { client: LdapClient, } @@ -1529,6 +1738,122 @@ async fn open_zitadel_connection() -> Zitadel { .expect("failed to set up Zitadel client") } +/// Helper function to create a user, run migration, and verify the encoding. +async fn run_migration_test( + config: &Config, + email: &str, + user_name: &str, + initial_nick_name: String, + expected_nick_name: String, +) { + // Prepare Zitadel client + let zitadel = open_zitadel_connection().await; + + // Create user in Zitadel + let user = ImportHumanUserRequest { + user_name: user_name.to_owned(), + profile: Some(Profile { + first_name: "Test".to_owned(), + last_name: "User".to_owned(), + display_name: "User, Test".to_owned(), + gender: Gender::Unspecified.into(), + nick_name: initial_nick_name.clone(), + preferred_language: String::default(), + }), + email: Some(Email { email: email.to_owned(), is_email_verified: true }), + phone: Some(Phone { phone: "+12345678901".to_owned(), is_phone_verified: true }), + password: String::default(), + hashed_password: None, + password_change_required: false, + request_passwordless_registration: false, + otp_code: String::default(), + idps: vec![], + }; + + zitadel + .create_human_user(&config.zitadel.organization_id, user) + .await + .expect("Failed to create user"); + + // Run migration + run_migration_binary(config.feature_flags.contains(&FeatureFlag::DryRun)); + + // Verify External ID after migration + let user = zitadel + .get_user_by_login_name(user_name) + .await + .expect("Failed to get user") + .expect("User not found"); + + match user.r#type { + Some(user_type) => { + if let UserType::Human(human) = user_type { + let profile = human.profile.expect("User lacks profile"); + assert_eq!( + profile.nick_name, expected_nick_name, + "Nickname encoding mismatch for user '{}'", + email + ); + } else { + panic!("User is not of type Human for user '{}'", email); + } + } + None => panic!("User type is None for user '{}'", email), + } +} + +/// Helper function to run the migration binary. +fn run_migration_binary(is_dry_run: bool) { + let temp_dir = tempfile::tempdir().unwrap(); + + // Copy service-user.json to temp location + let mut key_file_path = temp_dir.path().to_path_buf(); + key_file_path.push("zitadel"); + std::fs::create_dir_all(&key_file_path).unwrap(); + key_file_path.push("service-user.json"); + + std::fs::copy("tests/environment/zitadel/service-user.json", &key_file_path).unwrap(); + + // Read and modify config + let mut config_path = std::env::current_dir().unwrap(); + config_path.push("tests/environment/config.yaml"); + let mut config_content = std::fs::read_to_string(&config_path).unwrap(); + + // Update key_file path to be relative to temp config + config_content = config_content.replace( + "key_file: tests/environment/zitadel/service-user.json", + &format!("key_file: {}", key_file_path.to_str().unwrap()), + ); + + // Add dry run flag if needed + if is_dry_run { + config_content = config_content.replace("feature_flags:", "feature_flags:\n - dry_run"); + } + + // Write config to temp dir + let config_file = temp_dir.path().join("config.yaml"); + std::fs::write(&config_file, &config_content).unwrap(); + + // Run migration with temp config + std::env::set_var("FAMEDLY_SYNC_CONFIG", config_file.to_str().unwrap()); + + let status = std::process::Command::new(env!("CARGO_BIN_EXE_migrate")) + .status() + .expect("Failed to execute migration binary"); + assert!(status.success(), "Migration binary exited with status: {}", status); +} + +async fn cleanup_test_users(config: &Config) { + let mut zitadel = SyncZitadel::new(config).await.expect("failed to set up Zitadel client"); + let mut stream = zitadel.list_users().expect("failed to list users"); + + while let Some(zitadel_user) = + get_next_zitadel_user(&mut stream, &mut zitadel).await.expect("failed to get next user") + { + zitadel.delete_user(&zitadel_user.1).await.expect("failed to delete user"); + } +} + /// Get the module's test environment config async fn ldap_config() -> &'static Config { CONFIG_WITH_LDAP