Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Migration script for LDAP sync #93

Merged
merged 2 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ authors = []
edition = "2021"
publish = false

[[bin]]
name = "migrate"
path = "src/bin/migrate.rs"

[dependencies]
anyhow = { version = "1.0.81", features = ["backtrace"] }
async-trait = "0.1.82"
Expand Down
257 changes: 257 additions & 0 deletions src/bin/migrate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
//! This binary is used to migrate user IDs from base64 to hex encoding.
use std::{path::Path, str::FromStr};

use anyhow::{Context, Result};
use famedly_sync::{
get_next_zitadel_user,
user::{ExternalIdEncoding, User as SyncUser},
zitadel::Zitadel as SyncZitadel,
Config,
};
use tracing::level_filters::LevelFilter;

#[tokio::main]
async fn main() -> Result<()> {
// Config
let config_path =
std::env::var("FAMEDLY_SYNC_CONFIG").unwrap_or_else(|_| "./config.yaml".to_owned());
let config = Config::new(Path::new(&config_path))?;

// Tracing
let subscriber = tracing_subscriber::FmtSubscriber::builder()
.with_max_level(
config
.log_level
.as_ref()
.map_or(Ok(LevelFilter::INFO), |s| LevelFilter::from_str(s))?,
)
.finish();
tracing::subscriber::set_global_default(subscriber)
.context("Setting default tracing subscriber failed")?;

tracing::info!("Starting migration");
tracing::debug!("Old external IDs will be base64 decoded and re-encoded as hex");
tracing::debug!("Note: External IDs are stored in the nick_name field of the user's profile in Zitadel, often referred to as uid.");

// Zitadel
let mut zitadel = SyncZitadel::new(&config).await?;

// Detect external ID encoding based on a sample of users
let users_sample = zitadel.get_users_sample().await?;
let encoding = detect_database_encoding(users_sample);

// Get a stream of all users
let mut stream = zitadel.list_users()?;

// Process each user
while let Some((user, zitadel_id)) = get_next_zitadel_user(&mut stream, &mut zitadel).await? {
tracing::info!(?user, "Starting migration for user");

// Convert uid (=external ID, =nick_name) in Zitadel
let updated_user = user.create_user_with_converted_external_id(encoding)?;
tracing::debug!(?updated_user, "User updated");

zitadel.update_user(&zitadel_id, &user, &updated_user).await?;

tracing::info!(?user, ?updated_user, "User migrated");
}

tracing::info!("Migration completed.");
Ok(())
}

/// Detects the most likely encoding scheme used across all user IDs
fn detect_database_encoding(users: Vec<SyncUser>) -> ExternalIdEncoding {
// Count various encoding signatures
let mut hex_count = 0;
let mut base64_count = 0;
let mut total = 0;

for user in users {
let nick_name = user.get_external_id();

if nick_name.is_empty() {
continue;
}
total += 1;

// Check hex first (more restrictive)
if nick_name.chars().all(|c| c.is_ascii_hexdigit()) && nick_name.len() % 2 == 0 {
hex_count += 1;
}

// Check base64 signature
if nick_name.len() % 4 == 0
&& nick_name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '+' || c == '/' || c == '=')
{
base64_count += 1;
}
}

// Use thresholds to determine encoding
let hex_ratio = f64::from(hex_count) / f64::from(total);
let base64_ratio = f64::from(base64_count) / f64::from(total);

if hex_ratio > 0.8 {
ExternalIdEncoding::Hex
} else if base64_ratio > 0.8 {
ExternalIdEncoding::Base64
} else {
ExternalIdEncoding::Plain
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::{ExternalIdEncoding, SyncUser};

fn create_test_user(external_user_id: &str) -> SyncUser {
SyncUser::new(
"first name".to_owned(),
"last name".to_owned(),
"email@example.com".to_owned(),
None,
true,
None,
external_user_id.to_owned(),
)
}

fn run_detection_test(user_ids: Vec<&str>, expected_encoding: ExternalIdEncoding) {
let users: Vec<SyncUser> = user_ids
.into_iter()
.map(create_test_user) // Assuming SyncUser::new(&str) exists
.collect();

let detected = detect_database_encoding(users);
assert_eq!(
detected, expected_encoding,
"Expected {:?} but got {:?}",

Check warning on line 132 in src/bin/migrate.rs

View check run for this annotation

Codecov / codecov/patch

src/bin/migrate.rs#L132

Added line #L132 was not covered by tests
expected_encoding, detected
);
}

fn run_conversion_test(
original_id: &str,
expected_encoding: ExternalIdEncoding,
expected_result: &str,
) {
let user = create_test_user(original_id);
let migrated_user = user
.create_user_with_converted_external_id(expected_encoding)
.expect("Should successfully convert user");
assert_eq!(
migrated_user.get_external_id(),
expected_result,
"Unexpected conversion result"

Check warning on line 149 in src/bin/migrate.rs

View check run for this annotation

Codecov / codecov/patch

src/bin/migrate.rs#L149

Added line #L149 was not covered by tests
);
}

#[tokio::test]
async fn test_all_hex() {
// All users look like hex: "deadbeef", "cafebabe", "0123456789abcdef"
let user_ids = vec!["deadbeef", "cafebabe", "0123456789abcdef"];
run_detection_test(user_ids, ExternalIdEncoding::Hex);
}

#[tokio::test]
async fn test_all_base64() {
// All users look like base64: "Y2FmZQ==", "Zm9v", "YmFy"
// "Y2FmZQ==" decodes to "cafe"
// "Zm9v" decodes to "foo"
// "YmFy" decodes to "bar"
// All are valid base64 and length % 4 == 0
let user_ids = vec!["Y2FmZQ==", "Zm9v", "YmFy"];
run_detection_test(user_ids, ExternalIdEncoding::Base64);
}

#[tokio::test]
async fn test_mixed_ambiguous() {
// Some look hex, all look base64
let user_ids = vec!["cafebabe", "deadbeef", "beefcafe", "Y2FmZQ==", "Zm9v", "YmFy"];
run_detection_test(user_ids, ExternalIdEncoding::Base64);
}

#[tokio::test]
async fn test_edge_length_cases() {
// "cafe" is ambiguous (valid hex and base64)
// "cafeb" length is 5, not divisible by 2 or 4, so neither hex nor base64
// "abc" length is 3, not divisible by 4, and 'c' is hex valid but odd length ->
// not hex.
let user_ids = vec!["cafe", "cafeb", "abc"];
// "cafe" might count for both hex and base64, but "cafeb" and "abc" won't count
// for either. Out of 3, maybe 1 counts as hex/base64 and 2 are plain. Ratios:
// hex = 1/3 ≈ 0.33, base64 = 1/3 ≈ 0.33, both < 0.8.
run_detection_test(user_ids, ExternalIdEncoding::Plain);
}

#[tokio::test]
async fn test_invalid_characters() {
// "zzz" is not hex. It's also not base64-safe (though 'z' is alphanumeric,
// length=3 %4!=0) "+++" is not hex and length=3 not multiple of 4 for base64.
let user_ids = vec!["zzz", "+++"];
run_detection_test(user_ids, ExternalIdEncoding::Plain);
}

#[tokio::test]
async fn test_near_threshold_hex() {
// We want a scenario where hex ratio just hits 0.8.
// Suppose we have 5 users total, 4 of which are hex. 4/5 = 0.8
// If 4 pass as hex, and maybe 1 is something else.
let user_ids = vec!["deadbeef", "cafebabe", "beefcafe", "0123456789abcdef", "plain_id"];
// The 4 hex IDs will count, "plain_id" won't count for either.
// hex_ratio = 4/5=0.8. The code uses `>` 0.8 not `>=`, so 0.8 is NOT greater
// than 0.8. This test checks that boundary condition. Expected = Plain since
// not strictly greater.
run_detection_test(user_ids, ExternalIdEncoding::Plain);
}

#[tokio::test]
async fn test_near_threshold_base64() {
// Similar scenario for base64
// 5 users, 4 are valid base64. 4/5=0.8 exactly.
let user_ids = vec!["Y2FmZQ==", "Zm9v", "YmFy", "YQ==", "plain_id"];
// Again hits exactly 0.8, not greater, expect Plain
run_detection_test(user_ids, ExternalIdEncoding::Plain);
}

#[tokio::test]
async fn test_empty_ids() {
// Empty IDs should be skipped. Only one non-empty user which is hex.
// hex_count=1, total=1 => ratio=1.0 > 0.8 => Hex
let user_ids = vec!["", "", "cafebabe"];
run_detection_test(user_ids, ExternalIdEncoding::Hex);
}

//
// Conversion Tests
//

#[tokio::test]
async fn test_conversion_hex_to_hex() {
let original_id = "deadbeef";
// Expected hex, no changes should be made.
run_conversion_test(original_id, ExternalIdEncoding::Hex, "deadbeef");
}

#[tokio::test]
async fn test_conversion_base64_to_hex() {
let original_id = "Y2FmZQ=="; // "cafe"

// Expected base64, we decode base64 => "cafe" and then hex encode the bytes of
// "cafe". "cafe" as ASCII: 0x63 0x61 0x66 0x65 in hex is "63616665"
run_conversion_test(original_id, ExternalIdEncoding::Base64, "63616665");
}

#[tokio::test]
async fn test_conversion_plain_to_hex() {
let original_id = "plain_id";
// Expected plain without encoding, so just hex-encode the ASCII.
// 'p' = 0x70, 'l' = 0x6c, 'a' = 0x61, 'i' = 0x69, 'n' = 0x6e, '_'=0x5f,
// 'i'=0x69, 'd'=0x64 => "706c61696e5f6964"
run_conversion_test(original_id, ExternalIdEncoding::Plain, "706c61696e5f6964");
}
}
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use zitadel::Zitadel;

mod config;
mod sources;
mod user;
mod zitadel;
pub mod user;
pub mod zitadel;

use std::collections::VecDeque;

Expand All @@ -21,7 +21,7 @@ use sources::{csv::CsvSource, ldap::LdapSource, ukt::UktSource, Source};
/// Helper function to add metadata to streamed zitadel users
// TODO: If async closures become a reality, this should be factored
// into the `zitadel::search_result_to_user` function
async fn get_next_zitadel_user(
pub async fn get_next_zitadel_user(
stream: &mut (impl Stream<Item = Result<(User, String)>> + Send + Unpin),
zitadel: &mut Zitadel,
) -> Result<Option<(User, String)>> {
Expand Down
Loading
Loading