From f8ef3f3e6447665af670f9ec399665be55763f42 Mon Sep 17 00:00:00 2001 From: Christopher Dryden Date: Fri, 19 Dec 2025 16:05:40 +0000 Subject: [PATCH 1/4] sort: add locale-aware month parsing using ICU --- .../cspell.dictionaries/jargon.wordlist.txt | 1 + Cargo.lock | 58 ++++----- fuzz/Cargo.lock | 119 +++++++++--------- src/uu/sort/Cargo.toml | 2 + src/uu/sort/src/sort.rs | 46 +------ src/uucore/Cargo.toml | 3 +- src/uucore/src/lib/features/i18n/mod.rs | 9 ++ src/uucore/src/lib/features/i18n/month.rs | 78 ++++++++++++ tests/by-util/test_sort.rs | 47 +++++++ 9 files changed, 231 insertions(+), 132 deletions(-) create mode 100644 src/uucore/src/lib/features/i18n/month.rs diff --git a/.vscode/cspell.dictionaries/jargon.wordlist.txt b/.vscode/cspell.dictionaries/jargon.wordlist.txt index 7ba13ab80c1..6a6a1c7d065 100644 --- a/.vscode/cspell.dictionaries/jargon.wordlist.txt +++ b/.vscode/cspell.dictionaries/jargon.wordlist.txt @@ -1,3 +1,4 @@ +janv AFAICT asimd ASIMD diff --git a/Cargo.lock b/Cargo.lock index df301dbd7cf..879d4fed7fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,7 +77,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -88,7 +88,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.52" +version = "1.2.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" dependencies = [ "find-msvc-tools", "shlex", @@ -384,9 +384,9 @@ dependencies = [ [[package]] name = "codspeed" -version = "4.3.0" +version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38c2eb3388ebe26b5a0ab6bf4969d9c4840143d7f6df07caa3cc851b0606cef6" +checksum = "5f0d98d97fd75ca4489a1a0997820a6521531085e7c8a98941bd0e1264d567dd" dependencies = [ "anyhow", "cc", @@ -402,9 +402,9 @@ dependencies = [ [[package]] name = "codspeed-divan-compat" -version = "4.3.0" +version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2de65b7489a59709724d489070c6d05b7744039e4bf751d0a2006b90bb5593d" +checksum = "4179ec5518e79efcd02ed50aa483ff807902e43c85146e87fff58b9cffc06078" dependencies = [ "clap", "codspeed", @@ -415,9 +415,9 @@ dependencies = [ [[package]] name = "codspeed-divan-compat-macros" -version = "4.3.0" +version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ca01ce4fd22b8dcc6c770dcd6b74343642e842482b94e8920d14e10c57638d" +checksum = "15eaee97aa5bceb32cc683fe25cd6373b7fc48baee5c12471996b58b6ddf0d7c" dependencies = [ "divan-macros", "itertools 0.14.0", @@ -429,9 +429,9 @@ dependencies = [ [[package]] name = "codspeed-divan-compat-walltime" -version = "4.3.0" +version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "720ab9d0714718afe5f5832be6e5f5eb5ce97836e24ca7bf7042eea4308b9fb8" +checksum = "c38671153aa73be075d6019cab5ab1e6b31d36644067c1ac4cef73bf9723ce33" dependencies = [ "cfg-if", "clap", @@ -1001,7 +1001,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1052,9 +1052,9 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" [[package]] name = "fixed_decimal" @@ -1713,7 +1713,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1996,7 +1996,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2317,9 +2317,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.106" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -2350,9 +2350,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.44" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -2586,7 +2586,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2742,9 +2742,9 @@ dependencies = [ [[package]] name = "signal-hook" -version = "0.4.3" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b57709da74f9ff9f4a27dce9526eec25ca8407c45a7887243b031a58935fb8e" +checksum = "2a37d01603c37b5466f808de79f845c7116049b0579adb70a6b7d47c1fa3a952" dependencies = [ "libc", "signal-hook-registry", @@ -2896,7 +2896,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -3363,7 +3363,7 @@ dependencies = [ "gcd", "libc", "nix", - "signal-hook 0.4.3", + "signal-hook 0.4.1", "tempfile", "thiserror 2.0.18", "uucore", @@ -4545,7 +4545,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4994,9 +4994,9 @@ checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" [[package]] name = "zmij" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea" +checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" [[package]] name = "zopfli" diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 424578d1dfd..17b9c9e3c67 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -131,9 +131,9 @@ checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "blake2b_simd" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e903a20b159e944f91ec8499fe1e55651480c541ea0a584f5d967c49ad9d99" +checksum = "b79834656f71332577234b50bfc009996f7449e0c056884e6a02492ded0ca2f3" dependencies = [ "arrayref", "arrayvec", @@ -142,15 +142,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -206,9 +207,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.51" +version = "1.2.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" dependencies = [ "find-msvc-tools", "jobserver", @@ -230,9 +231,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "num-traits", @@ -263,9 +264,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "colorchoice" @@ -307,16 +308,16 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "core-foundation-sys" @@ -415,15 +416,15 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "data-encoding-macro" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47ce6c96ea0102f01122a185683611bd5ac8d99e62bc59dd12e6bda344ee673d" +checksum = "8142a83c17aa9461d637e649271eae18bf2edd00e91f2e105df36c3c16355bdb" dependencies = [ "data-encoding", "data-encoding-macro-internal", @@ -431,9 +432,9 @@ dependencies = [ [[package]] name = "data-encoding-macro-internal" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" +checksum = "7ab67060fc6b8ef687992d439ca0fa36e7ed17e9a0b16b25b601e8757df720de" dependencies = [ "data-encoding", "syn", @@ -517,9 +518,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" [[package]] name = "fixed_decimal" @@ -534,9 +535,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -605,9 +606,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", @@ -810,9 +811,9 @@ dependencies = [ [[package]] name = "icu_locale_data" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03e2fcaefecdf05619f3d6f91740e79ab969b4dd54f77cbf546b1d0d28e3147" +checksum = "1c5f1d16b4c3a2642d3a719f18f6b06070ab0aef246a6418130c955ae08aa831" [[package]] name = "icu_normalizer" @@ -1023,9 +1024,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -1058,9 +1059,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "linux-raw-sys" @@ -1283,9 +1284,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -1316,9 +1317,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -1351,9 +1352,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -1540,9 +1541,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.113" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -1585,18 +1586,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -1970,18 +1971,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -1992,9 +1993,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2002,9 +2003,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -2015,9 +2016,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] @@ -2193,9 +2194,9 @@ dependencies = [ [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "write16" @@ -2243,18 +2244,18 @@ checksum = "9b3a41ce106832b4da1c065baa4c31cf640cf965fa1483816402b7f6b96f0a64" [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "71ddd76bcebeed25db614f82bf31a9f4222d3fbba300e6fb6c00afa26cbd4d9d" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "d8187381b52e32220d50b255276aa16a084ec0a9017a0ca2152a1f55c539758d" dependencies = [ "proc-macro2", "quote", diff --git a/src/uu/sort/Cargo.toml b/src/uu/sort/Cargo.toml index e487a1bfe49..ec0496a6e80 100644 --- a/src/uu/sort/Cargo.toml +++ b/src/uu/sort/Cargo.toml @@ -42,6 +42,7 @@ uucore = { workspace = true, features = [ "version-cmp", "i18n-decimal", "i18n-collator", + "i18n-month", ] } fluent = { workspace = true } @@ -60,6 +61,7 @@ uucore = { workspace = true, features = [ "parser-size", "version-cmp", "i18n-collator", + "i18n-month", ] } [[bin]] diff --git a/src/uu/sort/src/sort.rs b/src/uu/sort/src/sort.rs index 01ddc63fbe0..8ede10227bc 100644 --- a/src/uu/sort/src/sort.rs +++ b/src/uu/sort/src/sort.rs @@ -50,6 +50,7 @@ use uucore::extendedbigdecimal::ExtendedBigDecimal; #[cfg(feature = "i18n-collator")] use uucore::i18n::collator::locale_cmp; use uucore::i18n::decimal::locale_decimal_separator; +use uucore::i18n::month::month_parse as locale_month_parse; use uucore::line_ending::LineEnding; use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; use uucore::parser::parse_size::{ParseSizeError, Parser}; @@ -725,7 +726,7 @@ impl<'a> Line<'a> { .enumerate() .skip_while(|(_, c)| c.is_ascii_whitespace()); - let month = if month_parse(initial_selection) == Month::Unknown { + let month = if locale_month_parse(initial_selection) == 0 { // We failed to parse a month, which is equivalent to matching nothing. // Add the "no match for key" marker to the first non-whitespace character. let first_non_whitespace = month_chars.next(); @@ -2840,49 +2841,8 @@ fn random_shuffle(a: &[u8], b: &[u8], salt: &[u8]) -> Ordering { da.cmp(&db) } -#[derive(Eq, Ord, PartialEq, PartialOrd, Clone, Copy)] -enum Month { - Unknown, - January, - February, - March, - April, - May, - June, - July, - August, - September, - October, - November, - December, -} - -/// Parse the beginning string into a Month, returning [`Month::Unknown`] on errors. -fn month_parse(line: &[u8]) -> Month { - let line = line.trim_ascii_start(); - - match line.get(..3).map(|x| x.to_ascii_uppercase()).as_deref() { - Some(b"JAN") => Month::January, - Some(b"FEB") => Month::February, - Some(b"MAR") => Month::March, - Some(b"APR") => Month::April, - Some(b"MAY") => Month::May, - Some(b"JUN") => Month::June, - Some(b"JUL") => Month::July, - Some(b"AUG") => Month::August, - Some(b"SEP") => Month::September, - Some(b"OCT") => Month::October, - Some(b"NOV") => Month::November, - Some(b"DEC") => Month::December, - _ => Month::Unknown, - } -} - fn month_compare(a: &[u8], b: &[u8]) -> Ordering { - let ma = month_parse(a); - let mb = month_parse(b); - - ma.cmp(&mb) + locale_month_parse(a).cmp(&locale_month_parse(b)) } fn print_sorted<'a, T: Iterator>>( diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 507f7740c6e..185f18a0b32 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -149,11 +149,12 @@ format = [ "quoting-style", "unit-prefix", ] -i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime"] +i18n-all = ["i18n-collator", "i18n-decimal", "i18n-datetime", "i18n-month"] i18n-common = ["icu_locale"] i18n-collator = ["i18n-common", "icu_collator"] i18n-decimal = ["i18n-common", "icu_decimal", "icu_provider"] i18n-datetime = ["i18n-common", "icu_calendar", "icu_datetime"] +i18n-month = ["i18n-common", "icu_datetime", "icu_provider", "libc"] mode = ["libc"] perms = ["entries", "libc", "walkdir"] buf-copy = [] diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs index e8e0f3f3c5d..5f0dfec0050 100644 --- a/src/uucore/src/lib/features/i18n/mod.rs +++ b/src/uucore/src/lib/features/i18n/mod.rs @@ -13,6 +13,8 @@ pub mod collator; pub mod datetime; #[cfg(feature = "i18n-decimal")] pub mod decimal; +#[cfg(feature = "i18n-month")] +pub mod month; /// The encoding specified by the locale, if specified /// Currently only supports ASCII and UTF-8 for the sake of simplicity. @@ -86,6 +88,13 @@ pub fn get_numeric_locale() -> &'static (Locale, UEncoding) { NUMERIC_LOCALE.get_or_init(|| get_locale_from_env("LC_NUMERIC")) } +/// Get the time locale from the environment (used for month names, etc.) +pub fn get_time_locale() -> &'static (Locale, UEncoding) { + static TIME_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new(); + + TIME_LOCALE.get_or_init(|| get_locale_from_env("LC_TIME")) +} + /// Return the encoding deduced from the locale environment variable. pub fn get_locale_encoding() -> UEncoding { get_collating_locale().1 diff --git a/src/uucore/src/lib/features/i18n/month.rs b/src/uucore/src/lib/features/i18n/month.rs new file mode 100644 index 00000000000..68471246409 --- /dev/null +++ b/src/uucore/src/lib/features/i18n/month.rs @@ -0,0 +1,78 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::sync::OnceLock; + +use icu_datetime::provider::neo::{DatetimeNamesMonthGregorianV1, MonthNames}; +use icu_locale::{Locale, locale}; +use icu_provider::prelude::*; + +use crate::i18n::get_time_locale; + +fn load_month_names(loc: &Locale) -> Option> { + let data_locale = DataLocale::from(loc.clone()); + let abbr_attr = DataMarkerAttributes::from_str_or_panic("3"); + let request = DataRequest { + id: DataIdentifierBorrowed::for_marker_attributes_and_locale(abbr_attr, &data_locale), + metadata: DataRequestMetadata::default(), + }; + + let response: DataResponse = + icu_datetime::provider::Baked.load(request).ok()?; + + if let MonthNames::Linear(names) = response.payload.get() { + let mut result = Vec::new(); + for (i, name) in names.iter().take(12).enumerate() { + let month = (i + 1) as u8; + let upper = name.to_uppercase(); + // Some locales use trailing periods in abbreviated months (e.g., "janv." in French). + // Store both with and without the period so we can match either format. + let stripped = upper.trim_end_matches('.'); + if stripped != upper { + result.push((stripped.to_string(), month)); + } + result.push((upper, month)); + } + return Some(result); + } + None +} + +fn get_month_names() -> &'static Vec<(String, u8)> { + static MONTH_NAMES: OnceLock> = OnceLock::new(); + MONTH_NAMES.get_or_init(|| { + let loc = get_time_locale().0.clone(); + load_month_names(&loc) + .or_else(|| load_month_names(&locale!("en"))) + .expect("ICU should always have English month data") + }) +} + +/// Parse a month name from the beginning of the input bytes. +/// Returns month number (1-12) or 0 if not recognized. +pub fn month_parse(input: &[u8]) -> u8 { + let input = input.trim_ascii_start(); + + // Convert bytes to string for comparison. For valid UTF-8, use it directly. + // For non-UTF-8 (e.g., Latin-1 locales), treat each byte as a Unicode codepoint. + // This handles legacy encodings like ISO-8859-1 where byte 0xE9 = 'é'. + let input_upper = std::str::from_utf8(input).map_or_else( + |_| { + input + .iter() + .map(|&b| b as char) + .collect::() + .to_uppercase() + }, + |s| s.to_uppercase(), + ); + + for (name, month) in get_month_names() { + if input_upper.starts_with(name) { + return *month; + } + } + 0 +} diff --git a/tests/by-util/test_sort.rs b/tests/by-util/test_sort.rs index f6842969ceb..53070a9cb97 100644 --- a/tests/by-util/test_sort.rs +++ b/tests/by-util/test_sort.rs @@ -2611,4 +2611,51 @@ fn test_locale_complex_utf8_sorting() { .stdout_is("apple\nApple\nbanana\nBanana\nzebra\nZebra\n"); } +#[test] +fn test_month_sort_english() { + new_ucmd!() + .arg("-M") + .pipe_in("Dec\nJan\nMar\nFeb\n") + .succeeds() + .stdout_only("Jan\nFeb\nMar\nDec\n"); +} + +#[test] +fn test_month_sort_case_insensitive() { + new_ucmd!() + .arg("-M") + .pipe_in("dec\nJAN\nmar\nFEB\n") + .succeeds() + .stdout_only("JAN\nFEB\nmar\ndec\n"); +} + +#[test] +fn test_month_sort_with_prefix() { + new_ucmd!() + .arg("-M") + .pipe_in("December 25\nJanuary 1\nMarch 15\n") + .succeeds() + .stdout_only("January 1\nMarch 15\nDecember 25\n"); +} + +#[test] +fn test_month_sort_unknown_sorted_first() { + // Unknown month names sort before known months + new_ucmd!() + .arg("-M") + .pipe_in("Jan\nxyz\nFeb\nabc\n") + .succeeds() + .stdout_only("abc\nxyz\nJan\nFeb\n"); +} + +#[test] +fn test_month_sort_french_locale() { + new_ucmd!() + .arg("-M") + .env("LC_ALL", "fr_FR.UTF-8") + .pipe_in("déc.\njanv.\nmars\nfévr.\n") + .succeeds() + .stdout_only("janv.\nfévr.\nmars\ndéc.\n"); +} + /* spell-checker: enable */ From ed18bcca1e04d27e7a371e17631221fa98890af8 Mon Sep 17 00:00:00 2001 From: Christopher Dryden Date: Fri, 19 Dec 2025 23:36:18 +0000 Subject: [PATCH 2/4] CI: add ja_JP.UTF-8 locale for sort-month test --- .github/workflows/GnuTests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/GnuTests.yml b/.github/workflows/GnuTests.yml index 0f8ed7fd167..180f9e179d8 100644 --- a/.github/workflows/GnuTests.yml +++ b/.github/workflows/GnuTests.yml @@ -92,6 +92,7 @@ jobs: sudo locale-gen --keep-existing am_ET.UTF-8 # Ethiopia sudo locale-gen --keep-existing th_TH.UTF-8 # Thailand sudo locale-gen --keep-existing zh_CN.GB18030 # China + sudo locale-gen --keep-existing ja_JP.UTF-8 # Japan sudo update-locale echo "After:" From 63aec7576b3ce5d9186f8d889fbd827fb36edcde Mon Sep 17 00:00:00 2001 From: Christopher Dryden Date: Wed, 14 Jan 2026 00:43:19 +0000 Subject: [PATCH 3/4] deny.toml: skip hashbrown 0.15.5 duplicate --- deny.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deny.toml b/deny.toml index 51bf577cfdf..79285afa14b 100644 --- a/deny.toml +++ b/deny.toml @@ -89,7 +89,7 @@ skip = [ { name = "itertools", version = "0.13.0" }, # ordered-multimap { name = "hashbrown", version = "0.14.5" }, - # lru (via num-prime) + # lru (via num-prime), icu4x { name = "hashbrown", version = "0.15.5" }, # cexpr (via bindgen) { name = "nom", version = "7.1.3" }, From c9007595ed8a4dd6746f47e6bbfd82865775b74a Mon Sep 17 00:00:00 2001 From: Christopher Dryden Date: Mon, 26 Jan 2026 17:15:54 +0000 Subject: [PATCH 4/4] sort: fix month parsing for C/POSIX locale --- src/uucore/src/lib/features/i18n/month.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/uucore/src/lib/features/i18n/month.rs b/src/uucore/src/lib/features/i18n/month.rs index 68471246409..87d75c6f0dd 100644 --- a/src/uucore/src/lib/features/i18n/month.rs +++ b/src/uucore/src/lib/features/i18n/month.rs @@ -44,7 +44,14 @@ fn get_month_names() -> &'static Vec<(String, u8)> { static MONTH_NAMES: OnceLock> = OnceLock::new(); MONTH_NAMES.get_or_init(|| { let loc = get_time_locale().0.clone(); - load_month_names(&loc) + // For undefined locale (C/POSIX), ICU returns generic month names like "M01", "M02" + // which aren't useful for matching. Skip directly to English fallback. + let result = if loc == locale!("und") { + None + } else { + load_month_names(&loc) + }; + result .or_else(|| load_month_names(&locale!("en"))) .expect("ICU should always have English month data") })