diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 7ba13ab80c1..e4ab23afd80 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -227,3 +227,8 @@ ENOTSUP enotsup SETFL tmpfs + +Hijri +Nowruz +charmap +hijri diff --git a/Cargo.lock b/Cargo.lock index 5e10d0d54a2..ae4d7f73a5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3383,6 +3383,7 @@ dependencies = [ "codspeed-divan-compat", "fluent", "icu_calendar", + "icu_locale", "jiff", "nix", "parse_datetime", diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 424578d1dfd..4094c806bf4 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -1718,6 +1718,8 @@ version = "0.6.0" dependencies = [ "clap", "fluent", + "icu_calendar", + "icu_locale", "jiff", "nix", "parse_datetime", diff --git a/src/uu/date/Cargo.toml b/src/uu/date/Cargo.toml index 9b927b1e2dd..8820d96b98c 100644 --- a/src/uu/date/Cargo.toml +++ b/src/uu/date/Cargo.toml @@ -19,12 +19,14 @@ workspace = true path = "src/date.rs" [features] -i18n-datetime = ["uucore/i18n-datetime", "icu_calendar"] +default = ["i18n-datetime"] +i18n-datetime = ["uucore/i18n-datetime", "dep:icu_calendar", "dep:icu_locale"] [dependencies] clap = { workspace = true } fluent = { workspace = true } icu_calendar = { workspace = true, optional = true } +icu_locale = { workspace = true, optional = true } jiff = { workspace = true, features = [ "tzdb-bundle-platform", "tzdb-zoneinfo", diff --git a/src/uu/date/src/date.rs b/src/uu/date/src/date.rs index cf6732aaa49..f82fe1c38bf 100644 --- a/src/uu/date/src/date.rs +++ b/src/uu/date/src/date.rs @@ -20,8 +20,10 @@ use std::sync::OnceLock; use uucore::display::Quotable; use uucore::error::FromIo; use uucore::error::{UResult, USimpleError}; +#[cfg(feature = "i18n-datetime")] use uucore::i18n::datetime::{ - get_localized_day_name, get_localized_month_name, should_use_icu_locale, + get_era_year, get_localized_day_name, get_localized_month_name, get_time_locale, + should_use_icu_locale, }; use uucore::translate; use uucore::{format_usage, show}; @@ -618,14 +620,14 @@ fn format_date_with_locale_aware_months( format_string: &str, config: &Config, ) -> Result { - // Only use ICU for non-default locales and when format string contains month or day specifiers - let use_icu = should_use_icu_locale(); - + // Only use ICU for non-English locales and when format string contains month, day, or era year specifiers if (format_string.contains("%B") || format_string.contains("%b") || format_string.contains("%A") - || format_string.contains("%a")) - && use_icu + || format_string.contains("%a") + || format_string.contains("%Y") + || format_string.contains("%Ey")) + && should_use_icu_locale() { let broken_down = BrokenDownTime::from(date); // Get localized month names if needed @@ -665,37 +667,58 @@ fn format_date_with_locale_aware_months( (String::new(), String::new()) }; - // Replace format specifiers with placeholders for successful ICU translations only + // Get era year if needed + let era_year = if format_string.contains("%Y") || format_string.contains("%Ey") { + if let (Some(year), Some(month), Some(day)) = + (broken_down.year(), broken_down.month(), broken_down.day()) + { + let (locale, _encoding) = get_time_locale(); + get_era_year(year.into(), month as u8, day as u8, locale) + } else { + None + } + } else { + None + }; + + // Replace format specifiers with NULL-byte placeholders for successful ICU translations only + // Use NULL bytes to avoid collision with user format strings let mut temp_format = format_string.to_string(); if !full_month.is_empty() { - temp_format = temp_format.replace("%B", "<<>>"); + temp_format = temp_format.replace("%B", "\0FULL_MONTH\0"); } if !abbrev_month.is_empty() { - temp_format = temp_format.replace("%b", "<<>>"); + temp_format = temp_format.replace("%b", "\0ABBREV_MONTH\0"); } if !full_day.is_empty() { - temp_format = temp_format.replace("%A", "<<>>"); + temp_format = temp_format.replace("%A", "\0FULL_DAY\0"); } if !abbrev_day.is_empty() { - temp_format = temp_format.replace("%a", "<<>>"); + temp_format = temp_format.replace("%a", "\0ABBREV_DAY\0"); + } + if era_year.is_some() { + temp_format = temp_format.replace("%Y", "\0ERA_YEAR\0"); } // Format with the temporary string let temp_result = broken_down.to_string_with_config(config, &temp_format)?; - // Replace placeholders with localized names + // Replace NULL-byte placeholders with localized names let mut final_result = temp_result; if !full_month.is_empty() { - final_result = final_result.replace("<<>>", &full_month); + final_result = final_result.replace("\0FULL_MONTH\0", &full_month); } if !abbrev_month.is_empty() { - final_result = final_result.replace("<<>>", &abbrev_month); + final_result = final_result.replace("\0ABBREV_MONTH\0", &abbrev_month); } if !full_day.is_empty() { - final_result = final_result.replace("<<>>", &full_day); + final_result = final_result.replace("\0FULL_DAY\0", &full_day); } if !abbrev_day.is_empty() { - final_result = final_result.replace("<<>>", &abbrev_day); + final_result = final_result.replace("\0ABBREV_DAY\0", &abbrev_day); + } + if let Some(era_year_val) = era_year { + final_result = final_result.replace("\0ERA_YEAR\0", &era_year_val.to_string()); } return Ok(final_result); diff --git a/src/uucore/src/lib/features/i18n/datetime.rs b/src/uucore/src/lib/features/i18n/datetime.rs index 4cca5ed2251..e5d6a666286 100644 --- a/src/uucore/src/lib/features/i18n/datetime.rs +++ b/src/uucore/src/lib/features/i18n/datetime.rs @@ -116,6 +116,96 @@ pub fn get_localized_day_name(year: i32, month: u8, day: u8, full: bool) -> Stri formatted.trim().to_string() } +/// Determine the appropriate calendar system for a given locale +pub fn get_locale_calendar_type(locale: &Locale) -> CalendarType { + let locale_str = locale.to_string(); + + match locale_str.as_str() { + // Thai locales use Buddhist calendar + s if s.starts_with("th") => CalendarType::Buddhist, + // Persian/Farsi locales use Persian calendar (Solar Hijri) + s if s.starts_with("fa") => CalendarType::Persian, + // Amharic (Ethiopian) locales use Ethiopian calendar + s if s.starts_with("am") => CalendarType::Ethiopian, + // Default to Gregorian for all other locales + _ => CalendarType::Gregorian, + } +} + +/// Calendar types supported for locale-aware formatting +#[derive(Debug, Clone, PartialEq)] +pub enum CalendarType { + /// Gregorian calendar (used by most locales) + Gregorian, + /// Buddhist calendar (Thai locales) - adds 543 years to Gregorian year + Buddhist, + /// Persian Solar Hijri calendar (Persian/Farsi locales) - subtracts 621/622 years + Persian, + /// Ethiopian calendar (Amharic locales) - subtracts 7/8 years + Ethiopian, +} + +/// Convert a Gregorian date to the appropriate calendar system for a locale +/// +/// # Arguments +/// * `year` - Gregorian year +/// * `month` - Month (1-12) +/// * `day` - Day (1-31) +/// * `calendar_type` - Target calendar system +/// +/// # Returns +/// * `Some((era_year, month, day))` - Date in target calendar system +/// * `None` - If conversion fails +pub fn convert_date_to_locale_calendar( + year: i32, + month: u8, + day: u8, + calendar_type: &CalendarType, +) -> Option<(i32, u8, u8)> { + match calendar_type { + CalendarType::Gregorian => Some((year, month, day)), + CalendarType::Buddhist => { + // Buddhist calendar: Gregorian year + 543 + Some((year + 543, month, day)) + } + CalendarType::Persian => { + // Persian calendar conversion (Solar Hijri) + // March 21 (Nowruz) is roughly the start of the Persian year + let persian_year = if month > 3 || (month == 3 && day >= 21) { + year - 621 // After March 21 + } else { + year - 622 // Before March 21 + }; + Some((persian_year, month, day)) + } + CalendarType::Ethiopian => { + // Ethiopian calendar conversion + // September 11/12 is roughly the start of the Ethiopian year + let ethiopian_year = if month > 9 || (month == 9 && day >= 11) { + year - 7 // After September 11 + } else { + year - 8 // Before September 11 + }; + Some((ethiopian_year, month, day)) + } + } +} + +/// Get the era year for a given date and locale +pub fn get_era_year(year: i32, month: u8, day: u8, locale: &Locale) -> Option { + // Validate input date + if !(1..=12).contains(&month) || !(1..=31).contains(&day) { + return None; + } + + let calendar_type = get_locale_calendar_type(locale); + match calendar_type { + CalendarType::Gregorian => None, + _ => convert_date_to_locale_calendar(year, month, day, &calendar_type) + .map(|(era_year, _, _)| era_year), + } +} + #[cfg(test)] mod tests { use super::*; @@ -128,4 +218,47 @@ mod tests { // The caller (date.rs) will handle this by falling back to jiff assert!(name.is_empty() || name.len() >= 3); } + + #[test] + fn test_calendar_type_detection() { + let thai_locale = icu_locale::locale!("th-TH"); + let persian_locale = icu_locale::locale!("fa-IR"); + let amharic_locale = icu_locale::locale!("am-ET"); + let english_locale = icu_locale::locale!("en-US"); + + assert_eq!( + get_locale_calendar_type(&thai_locale), + CalendarType::Buddhist + ); + assert_eq!( + get_locale_calendar_type(&persian_locale), + CalendarType::Persian + ); + assert_eq!( + get_locale_calendar_type(&amharic_locale), + CalendarType::Ethiopian + ); + assert_eq!( + get_locale_calendar_type(&english_locale), + CalendarType::Gregorian + ); + } + + #[test] + fn test_era_year_conversion() { + let thai_locale = icu_locale::locale!("th-TH"); + let persian_locale = icu_locale::locale!("fa-IR"); + let amharic_locale = icu_locale::locale!("am-ET"); + + // Test Thai Buddhist calendar (2026 + 543 = 2569) + assert_eq!(get_era_year(2026, 6, 15, &thai_locale), Some(2569)); + + // Test Persian calendar (rough approximation) + assert_eq!(get_era_year(2026, 3, 22, &persian_locale), Some(1405)); + assert_eq!(get_era_year(2026, 3, 19, &persian_locale), Some(1404)); + + // Test Ethiopian calendar (rough approximation) + assert_eq!(get_era_year(2026, 9, 12, &amharic_locale), Some(2019)); + assert_eq!(get_era_year(2026, 9, 10, &amharic_locale), Some(2018)); + } } diff --git a/tests/by-util/test_date.rs b/tests/by-util/test_date.rs index ee2e0addd92..1034dfdfc54 100644 --- a/tests/by-util/test_date.rs +++ b/tests/by-util/test_date.rs @@ -1627,3 +1627,242 @@ fn test_date_parenthesis_vs_other_special_chars() { .stderr_contains("invalid date"); } } + +#[test] +#[cfg(unix)] +fn test_date_iranian_locale_solar_hijri_calendar() { + // Test Iranian locale uses Solar Hijri calendar + // Verify the Solar Hijri calendar is used in the Iranian locale + use std::process::Command; + + // Check if Iranian locale is available + let locale_check = Command::new("locale") + .env("LC_ALL", "fa_IR.UTF-8") + .arg("charmap") + .output(); + + let locale_available = match locale_check { + Ok(output) => String::from_utf8_lossy(&output.stdout).trim() == "UTF-8", + Err(_) => false, + }; + + if !locale_available { + println!("Skipping Iranian locale test - fa_IR.UTF-8 locale not available"); + return; + } + + // Get current year in Gregorian calendar + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // 03-19 and 03-22 of the same Gregorian year are in different years in the + // Solar Hijri calendar + let year_march_19: i32 = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("-d") + .arg(format!("{current_year}-03-19")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + let year_march_22: i32 = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("-d") + .arg(format!("{current_year}-03-22")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // Years should differ by 1 + assert_eq!(year_march_19, year_march_22 - 1); + + // The difference between the Gregorian year is 621 or 622 years + assert_eq!(year_march_19, current_year - 622); + assert_eq!(year_march_22, current_year - 621); + + // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar + let iso_result = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("--iso-8601=hours") + .succeeds(); + let iso_output = iso_result.stdout_str(); + assert!(iso_output.starts_with(¤t_year.to_string())); + + let rfc_result = new_ucmd!() + .env("LC_ALL", "fa_IR.UTF-8") + .arg("--rfc-3339=date") + .succeeds(); + let rfc_output = rfc_result.stdout_str(); + assert!(rfc_output.starts_with(¤t_year.to_string())); +} + +#[test] +#[cfg(unix)] +fn test_date_ethiopian_locale_calendar() { + // Test Ethiopian locale uses Ethiopian calendar + // Verify the Ethiopian calendar is used in the Ethiopian locale + use std::process::Command; + + // Check if Ethiopian locale is available + let locale_check = Command::new("locale") + .env("LC_ALL", "am_ET.UTF-8") + .arg("charmap") + .output(); + + let locale_available = match locale_check { + Ok(output) => String::from_utf8_lossy(&output.stdout).trim() == "UTF-8", + Err(_) => false, + }; + + if !locale_available { + println!("Skipping Ethiopian locale test - am_ET.UTF-8 locale not available"); + return; + } + + // Get current year in Gregorian calendar + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // 09-10 and 09-12 of the same Gregorian year are in different years in the + // Ethiopian calendar + let year_september_10: i32 = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("-d") + .arg(format!("{current_year}-09-10")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + let year_september_12: i32 = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("-d") + .arg(format!("{current_year}-09-12")) + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // Years should differ by 1 + assert_eq!(year_september_10, year_september_12 - 1); + + // The difference between the Gregorian year is 7 or 8 years + assert_eq!(year_september_10, current_year - 8); + assert_eq!(year_september_12, current_year - 7); + + // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar + let iso_result = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("--iso-8601=hours") + .succeeds(); + let iso_output = iso_result.stdout_str(); + assert!(iso_output.starts_with(¤t_year.to_string())); + + let rfc_result = new_ucmd!() + .env("LC_ALL", "am_ET.UTF-8") + .arg("--rfc-3339=date") + .succeeds(); + let rfc_output = rfc_result.stdout_str(); + assert!(rfc_output.starts_with(¤t_year.to_string())); +} + +#[test] +#[cfg(unix)] +fn test_date_thai_locale_solar_calendar() { + // Test Thai locale uses Thai solar calendar + // Verify the Thai solar calendar is used with the Thai locale + use std::process::Command; + + // Check if Thai locale is available + let locale_check = Command::new("locale") + .env("LC_ALL", "th_TH.UTF-8") + .arg("charmap") + .output(); + + let locale_available = match locale_check { + Ok(output) => String::from_utf8_lossy(&output.stdout).trim() == "UTF-8", + Err(_) => false, + }; + + if !locale_available { + println!("Skipping Thai locale test - th_TH.UTF-8 locale not available"); + return; + } + + // Get current year in Gregorian calendar + let current_year: i32 = new_ucmd!() + .env("LC_ALL", "C") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + // Since 1941, the year in the Thai solar calendar is the Gregorian year plus 543 + let thai_year: i32 = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("+%Y") + .succeeds() + .stdout_str() + .trim() + .parse() + .unwrap(); + + assert_eq!(thai_year, current_year + 543); + + // All months that have 31 days have names that end with "คม" (Thai characters) + let days_31_suffix = "\u{0E04}\u{0E21}"; // "คม" in Unicode + + for month in ["01", "03", "05", "07", "08", "10", "12"] { + let month_result = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("--date") + .arg(format!("{current_year}-{month}-01")) + .arg("+%B") + .succeeds(); + let month_name = month_result.stdout_str(); + + assert!( + month_name.trim().ends_with(days_31_suffix), + "Month {month} should end with 'คม', got: {month_name}" + ); + } + + // Check that --iso-8601 and --rfc-3339 use the Gregorian calendar + let iso_result = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("--iso-8601=hours") + .succeeds(); + let iso_output = iso_result.stdout_str(); + assert!(iso_output.starts_with(¤t_year.to_string())); + + let rfc_result = new_ucmd!() + .env("LC_ALL", "th_TH.UTF-8") + .arg("--rfc-3339=date") + .succeeds(); + let rfc_output = rfc_result.stdout_str(); + assert!(rfc_output.starts_with(¤t_year.to_string())); +}