From d1120d8a9e54472972e56f2810de92c99a752a47 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Tue, 21 Nov 2023 17:47:43 -0800 Subject: [PATCH] ICU-22493 Implement First Day Override in Calendar --- icu4c/source/common/loclikely.cpp | 66 +++++++++------- icu4c/source/i18n/iso8601cal.cpp | 9 ++- icu4c/source/test/intltest/caltest.cpp | 76 +++++++++++++++++++ icu4c/source/test/intltest/caltest.h | 3 + .../main/java/com/ibm/icu/util/Calendar.java | 11 ++- .../main/java/com/ibm/icu/util/ULocale.java | 31 ++++++-- .../dev/test/calendar/IBMCalendarTest.java | 70 +++++++++++++++++ 7 files changed, 222 insertions(+), 44 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index bd3a597df40a..2416bdc52d7e 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -789,55 +789,65 @@ U_NAMESPACE_END // The following must at least allow for rg key value (6) plus terminator (1). #define ULOC_RG_BUFLEN 8 -U_CAPI int32_t U_EXPORT2 -ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, - char *region, int32_t regionCapacity, UErrorCode* status) { - if (U_FAILURE(*status)) { - return 0; - } - char rgBuf[ULOC_RG_BUFLEN]; - UErrorCode rgStatus = U_ZERO_ERROR; +namespace { +int GetRegionFromKey(const char *localeID, const char* key, char* buf) { + UErrorCode status = U_ZERO_ERROR; // First check for rg keyword value icu::CharString rg; { icu::CharStringByteSink sink(&rg); - ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus); + ulocimp_getKeywordValue(localeID, key, sink, &status); } - int32_t rgLen = rg.length(); - if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) { - rgLen = 0; + int32_t len = rg.length(); + if (U_FAILURE(status) || len < 3 || len > 7) { + len = 0; } else { // chop off the subdivision code (which will generally be "zzzz" anyway) const char* const data = rg.data(); if (uprv_isASCIILetter(data[0])) { - rgLen = 2; - rgBuf[0] = uprv_toupper(data[0]); - rgBuf[1] = uprv_toupper(data[1]); + len = 2; + buf[0] = uprv_toupper(data[0]); + buf[1] = uprv_toupper(data[1]); } else { // assume three-digit region code - rgLen = 3; - uprv_memcpy(rgBuf, data, rgLen); + len = 3; + uprv_memcpy(buf, data, len); } } + return len; +} +} // namespace +U_CAPI int32_t U_EXPORT2 +ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, + char *region, int32_t regionCapacity, UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + char rgBuf[ULOC_RG_BUFLEN]; + int32_t rgLen = GetRegionFromKey(localeID, "rg", rgBuf); if (rgLen == 0) { // No valid rg keyword value, try for unicode_region_subtag rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); if (U_FAILURE(*status)) { rgLen = 0; } else if (rgLen == 0 && inferRegion) { - // no unicode_region_subtag but inferRegion true, try likely subtags - rgStatus = U_ZERO_ERROR; - icu::CharString locBuf; - { - icu::CharStringByteSink sink(&locBuf); - ulocimp_addLikelySubtags(localeID, sink, &rgStatus); - } - if (U_SUCCESS(rgStatus)) { - rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status); - if (U_FAILURE(*status)) { - rgLen = 0; + // Second check for sd keyword value + rgLen = GetRegionFromKey(localeID, "sd", rgBuf); + if (rgLen == 0) { + // no unicode_region_subtag but inferRegion true, try likely subtags + UErrorCode rgStatus = U_ZERO_ERROR; + icu::CharString locBuf; + { + icu::CharStringByteSink sink(&locBuf); + ulocimp_addLikelySubtags(localeID, sink, &rgStatus); + } + if (U_SUCCESS(rgStatus)) { + rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status); + if (U_FAILURE(*status)) { + rgLen = 0; + } } } } diff --git a/icu4c/source/i18n/iso8601cal.cpp b/icu4c/source/i18n/iso8601cal.cpp index c3288bc6b5d6..6bb7579e72da 100644 --- a/icu4c/source/i18n/iso8601cal.cpp +++ b/icu4c/source/i18n/iso8601cal.cpp @@ -14,11 +14,12 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ISO8601Calendar) ISO8601Calendar::ISO8601Calendar(const Locale& aLocale, UErrorCode& success) : GregorianCalendar(aLocale, success) { - UErrorCode fwStatus = U_ZERO_ERROR; - int32_t fwLength = aLocale.getKeywordValue("fw", nullptr, 0, fwStatus); - // Do not set first day of week for iso8601 to Monday if we have fw keyword + UErrorCode tempStatus = U_ZERO_ERROR; + int32_t length = aLocale.getKeywordValue("fw", nullptr, 0, tempStatus) + + aLocale.getKeywordValue("rg", nullptr, 0, tempStatus); + // Do not set first day of week for iso8601 to Monday if we have fw or rg keywords // and let the value set by the Calendar constructor to take care of it. - if (U_SUCCESS(fwStatus) && fwLength == 0) { + if (U_SUCCESS(tempStatus) && length == 0) { setFirstDayOfWeek(UCAL_MONDAY); } setMinimalDaysInFirstWeek(4); diff --git a/icu4c/source/test/intltest/caltest.cpp b/icu4c/source/test/intltest/caltest.cpp index 73476293f7d0..6d5962c2e7a6 100644 --- a/icu4c/source/test/intltest/caltest.cpp +++ b/icu4c/source/test/intltest/caltest.cpp @@ -189,6 +189,7 @@ void CalendarTest::runIndexedTest( int32_t index, UBool exec, const char* &name, TESTCASE_AUTO(TestFWWithISO8601); TESTCASE_AUTO(TestDangiOverflowIsLeapMonthBetween22507); TESTCASE_AUTO(TestRollWeekOfYear); + TESTCASE_AUTO(TestFirstDayOfWeek); TESTCASE_AUTO_END; } @@ -5552,6 +5553,81 @@ void CalendarTest::TestRollWeekOfYear() { U_ASSERT(U_SUCCESS(status)); cal->roll(UCAL_WEEK_OF_YEAR, 1, status); } + +void CalendarTest::verifyFirstDayOfWeek(const char* locale, UCalendarDaysOfWeek expected) { + UErrorCode status = U_ZERO_ERROR; + Locale l = Locale::forLanguageTag(locale, status); + U_ASSERT(U_SUCCESS(status)); + LocalPointer cal(Calendar::createInstance(l, status), status); + U_ASSERT(U_SUCCESS(status)); + assertEquals(locale, + expected, cal->getFirstDayOfWeek(status)); + U_ASSERT(U_SUCCESS(status)); +} + +/** + * Test "First Day Overrides" behavior + * https://unicode.org/reports/tr35/tr35-dates.html#first-day-overrides + * And data in of + * https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalData.xml + * + * Examples of region for First Day of a week + * Friday: MV + * Saturday: AE AF + * Sunday: US JP + * Monday: GB + */ +void CalendarTest::TestFirstDayOfWeek() { + // Test -u-fw- value + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sun-rg-mvzzzz-sd-usca", UCAL_SUNDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-mon-rg-mvzzzz-sd-usca", UCAL_MONDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-tue-rg-mvzzzz-sd-usca", UCAL_TUESDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-wed-rg-mvzzzz-sd-usca", UCAL_WEDNESDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-thu-rg-mvzzzz-sd-usca", UCAL_THURSDAY); + verifyFirstDayOfWeek("en-AE-u-ca-iso8601-fw-fri-rg-aezzzz-sd-usca", UCAL_FRIDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sat-rg-mvzzzz-sd-usca", UCAL_SATURDAY); + + // Test -u-rg- value + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-mvzzzz-sd-usca", UCAL_FRIDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-aezzzz-sd-usca", UCAL_SATURDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-uszzzz-sd-usca", UCAL_SUNDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-gbzzzz-sd-usca", UCAL_MONDAY); + + // Test -u-ca-iso8601 + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-sd-mv00", UCAL_MONDAY); + verifyFirstDayOfWeek("en-AE-u-ca-iso8601-sd-aeaj", UCAL_MONDAY); + verifyFirstDayOfWeek("en-US-u-ca-iso8601-sd-usca", UCAL_MONDAY); + + // Test Region Tags only + verifyFirstDayOfWeek("en-MV", UCAL_FRIDAY); + verifyFirstDayOfWeek("en-AE", UCAL_SATURDAY); + verifyFirstDayOfWeek("en-US", UCAL_SUNDAY); + verifyFirstDayOfWeek("dv-GB", UCAL_MONDAY); + + // Test -u-sd- + verifyFirstDayOfWeek("en-u-sd-mv00", UCAL_FRIDAY); + verifyFirstDayOfWeek("en-u-sd-aeaj", UCAL_SATURDAY); + verifyFirstDayOfWeek("en-u-sd-usca", UCAL_SUNDAY); + verifyFirstDayOfWeek("dv-u-sd-gbsct", UCAL_MONDAY); + + // Test Add Likely Subtags algorithm produces a region + // dv => dv_Thaa_MV => Friday + verifyFirstDayOfWeek("dv", UCAL_FRIDAY); + // und_Thaa => dv_Thaa_MV => Friday + verifyFirstDayOfWeek("und-Thaa", UCAL_FRIDAY); + + // ssh => ssh_Arab_AE => Saturday + verifyFirstDayOfWeek("ssh", UCAL_SATURDAY); + // wbl_Arab => wbl_Arab_AF => Saturday + verifyFirstDayOfWeek("wbl-Arab", UCAL_SATURDAY); + + // en => en_Latn_US => Sunday + verifyFirstDayOfWeek("en", UCAL_SUNDAY); + // und_Hira => ja_Hira_JP => Sunday + verifyFirstDayOfWeek("und-Hira", UCAL_SUNDAY); + + verifyFirstDayOfWeek("zxx", UCAL_MONDAY); +} #endif /* #if !UCONFIG_NO_FORMATTING */ //eof diff --git a/icu4c/source/test/intltest/caltest.h b/icu4c/source/test/intltest/caltest.h index 23ef779408f4..9f4ba750f222 100644 --- a/icu4c/source/test/intltest/caltest.h +++ b/icu4c/source/test/intltest/caltest.h @@ -334,6 +334,9 @@ class CalendarTest: public CalendarTimeZoneTest { void TestFWWithISO8601(); void TestRollWeekOfYear(); + void verifyFirstDayOfWeek(const char* locale, UCalendarDaysOfWeek expected); + void TestFirstDayOfWeek(); + void RunChineseCalendarInTemporalLeapYearTest(Calendar* cal); void RunIslamicCalendarInTemporalLeapYearTest(Calendar* cal); void Run366DaysIsLeapYearCalendarInTemporalLeapYearTest(Calendar* cal); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/util/Calendar.java b/icu4j/main/core/src/main/java/com/ibm/icu/util/Calendar.java index e915af13d6ce..69bcf1bb2e05 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/util/Calendar.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/util/Calendar.java @@ -1856,10 +1856,13 @@ private static Calendar createInstance(ULocale locale) { case ISO8601: // Only differs week numbering rule from Gregorian cal = new GregorianCalendar(zone, locale); - String type = locale.getUnicodeLocaleType("fw"); - // Only set fw to Monday for ISO8601 if there aer no fw keyword. - // If there is a fw keyword, the Calendar constructor already set it to the fw value. - if (locale.getKeywordValue("fw") == null) { + // Based on UTS35 "First Day Overrides" + // https://unicode.org/reports/tr35/tr35-dates.html#first-day-overrides + // Only set fw to Monday for ISO8601 if there are no fw nor rg keywords. + // If there is a fw or rg keywords, the Calendar constructor already set it + // to the fw value or based on the rg value. + if (locale.getUnicodeLocaleType("fw") == null && + locale.getUnicodeLocaleType("rg") == null) { cal.setFirstDayOfWeek(MONDAY); } cal.setMinimalDaysInFirstWeek(4); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java b/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java index d2fa84a8c94b..1c5bb17a5124 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/util/ULocale.java @@ -958,6 +958,22 @@ public static String getCountry(String localeID) { return new LocaleIDParser(localeID).getCountry(); } + /** + * Get region code from a key in locale or null. + */ + private static String getRegionFromKey(ULocale locale, String key) { + String region = locale.getKeywordValue(key); + if (region != null && region.length() >= 3 && region.length() <= 7) { + if (Character.isLetter(region.charAt(0))) { + return AsciiUtil.toUpperString(region.substring(0, 2)); + } else { + // assume three-digit region code + return region.substring(0, 3); + } + } + return null; + } + /** * {@icu} Get the region to use for supplemental data lookup. * Uses @@ -981,17 +997,16 @@ public static String getCountry(String localeID) { @Deprecated public static String getRegionForSupplementalData( ULocale locale, boolean inferRegion) { - String region = locale.getKeywordValue("rg"); - if (region != null && region.length() >= 3 && region.length() <= 7) { - if (Character.isLetter(region.charAt(0))) { - return AsciiUtil.toUpperString(region.substring(0, 2)); - } else { - // assume three-digit region code - return region.substring(0, 3); - } + String region = getRegionFromKey(locale, "rg"); + if (region != null) { + return region; } region = locale.getCountry(); if (region.length() == 0 && inferRegion) { + region = getRegionFromKey(locale, "sd"); + if (region != null) { + return region; + } ULocale maximized = addLikelySubtags(locale); region = maximized.getCountry(); } diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java index 0bc673aeabb3..5404088af20e 100644 --- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java +++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java @@ -205,6 +205,76 @@ void quasiGregorianTest(Calendar cal, int[] data) { } } + private void verifyFirstDayOfWeek(String l, int weekday) { + assertEquals(l, weekday, + Calendar.getInstance(Locale.forLanguageTag(l)).getFirstDayOfWeek()); + } + /** + * Test "First Day Overrides" behavior + * https://unicode.org/reports/tr35/tr35-dates.html#first-day-overrides + * And data in of + * https://github.com/unicode-org/cldr/blob/main/common/supplemental/supplementalData.xml + * + * Examples of region for First Day of a week + * Friday: MV + * Saturday: AE AF + * Sunday: US JP + * Monday: GB + */ + @Test + public void TestFirstDayOfWeek() { + String l; + // Test -u-fw- value + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sun-rg-mvzzzz-sd-usca", Calendar.SUNDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-mon-rg-mvzzzz-sd-usca", Calendar.MONDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-tue-rg-mvzzzz-sd-usca", Calendar.TUESDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-wed-rg-mvzzzz-sd-usca", Calendar.WEDNESDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-thu-rg-mvzzzz-sd-usca", Calendar.THURSDAY); + verifyFirstDayOfWeek("en-AE-u-ca-iso8601-fw-fri-rg-aezzzz-sd-usca", Calendar.FRIDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-fw-sat-rg-mvzzzz-sd-usca", Calendar.SATURDAY); + + // Test -u-rg- value + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-mvzzzz-sd-usca", Calendar.FRIDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-aezzzz-sd-usca", Calendar.SATURDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-uszzzz-sd-usca", Calendar.SUNDAY); + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-rg-gbzzzz-sd-usca", Calendar.MONDAY); + + // Test -u-ca-iso8601 + verifyFirstDayOfWeek("en-MV-u-ca-iso8601-sd-mv00", Calendar.MONDAY); + verifyFirstDayOfWeek("en-AE-u-ca-iso8601-sd-aeaj", Calendar.MONDAY); + verifyFirstDayOfWeek("en-US-u-ca-iso8601-sd-usca", Calendar.MONDAY); + + // Test Region Tags only + verifyFirstDayOfWeek("en-MV", Calendar.FRIDAY); + verifyFirstDayOfWeek("en-AE", Calendar.SATURDAY); + verifyFirstDayOfWeek("en-US", Calendar.SUNDAY); + verifyFirstDayOfWeek("dv-GB", Calendar.MONDAY); + + // Test -u-sd- + //verifyFirstDayOfWeek("en-u-sd-mv00", Calendar.FRIDAY); + // verifyFirstDayOfWeek("en-u-sd-aeaj", Calendar.SATURDAY); + // verifyFirstDayOfWeek("en-u-sd-usca", Calendar.SUNDAY); + // verifyFirstDayOfWeek("dv-u-sd-gbsct", Calendar.MONDAY); + + // Test Add Likely Subtags algorithm produces a region + // dv => dv_Thaa_MV => Friday + verifyFirstDayOfWeek("dv", Calendar.FRIDAY); + // und_Thaa => dv_Thaa_MV => Friday + verifyFirstDayOfWeek("und-Thaa", Calendar.FRIDAY); + + // ssh => ssh_Arab_AE => Saturday + verifyFirstDayOfWeek("ssh", Calendar.SATURDAY); + // wbl_Arab => wbl_Arab_AF => Saturday + verifyFirstDayOfWeek("wbl-Arab", Calendar.SATURDAY); + + // en => en_Latn_US => Sunday + verifyFirstDayOfWeek("en", Calendar.SUNDAY); + // und_Hira => ja_Hira_JP => Sunday + verifyFirstDayOfWeek("und-Hira", Calendar.SUNDAY); + + verifyFirstDayOfWeek("zxx", Calendar.MONDAY); + } + /** * Verify that BuddhistCalendar shifts years to Buddhist Era but otherwise * behaves like GregorianCalendar.