Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release/7.0-staging] Fix creating cultures with extensions in the name #87152

Merged
merged 7 commits into from
Jun 10, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using Microsoft.DotNet.RemoteExecutor;
using System.Collections.Generic;
using Xunit;

Expand Down Expand Up @@ -440,13 +441,65 @@ public void TestCreationWithTemporaryLCID(int lcid)
Assert.NotEqual(lcid, new CultureInfo(lcid).LCID);
}

[InlineData("zh-TW-u-co-zhuyin")]
[InlineData("de-DE-u-co-phoneb")]
[InlineData("de-u-co-phonebk")]
[InlineData("zh-TW-u-co-zhuyin", "zh-TW", "zh-TW_zhuyin")]
[InlineData("de-DE-u-co-phonebk", "de-DE", "de-DE_phoneboo")]
[InlineData("de-DE-u-co-phonebk-u-xx", "de-DE-u-xx", "de-DE-u-xx_phoneboo")]
[InlineData("de-DE-u-xx-u-co-phonebk", "de-DE-u-xx-u-co-phonebk", "de-DE-u-xx-u-co-phonebk")]
[InlineData("de-DE-t-xx-u-co-phonebk", "de-DE-t-xx-u-co-phonebk", "de-DE-t-xx-u-co-phonebk_phoneboo")]
[InlineData("de-DE-u-co-phonebk-t-xx", "de-DE-t-xx", "de-DE-t-xx_phoneboo")]
[InlineData("de-DE-u-co-phonebk-t-xx-u-yy", "de-DE-t-xx-u-yy", "de-DE-t-xx-u-yy_phoneboo")]
[InlineData("de-DE", "de-DE", "de-DE")]
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
public void TestCreationWithMangledSortName(string cultureName)
public void TestCreationWithMangledSortName(string cultureName, string expectedCultureName, string expectedSortName)
{
CultureInfo ci = CultureInfo.GetCultureInfo(cultureName);

Assert.Equal(expectedCultureName, ci.Name);
Assert.Equal(expectedSortName, ci.CompareInfo.Name);
}

[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
public void TestNeutralCultureWithCollationName()
{
Assert.True(CultureInfo.GetCultureInfo(cultureName).CompareInfo.Name.Equals(cultureName, StringComparison.OrdinalIgnoreCase));
Assert.Throws<CultureNotFoundException>(() => CultureInfo.GetCultureInfo("zh-u-co-zhuyin"));
Assert.Throws<CultureNotFoundException>(() => CultureInfo.GetCultureInfo("de-u-co-phonebk"));
}

[InlineData("xx-u-XX", "xx-u-xx")]
[InlineData("xx-u-XX-u-yy", "xx-u-xx-u-yy")]
[InlineData("xx-t-ja-JP", "xx-t-ja-jp")]
[InlineData("qps-plocm", "qps-PLOCM")] // ICU normalize this name to "qps--plocm" which we normalize it back to "qps-plocm"
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
public void TestCreationWithICUNormalizedNames(string cultureName, string expectedCultureName)
{
CultureInfo ci = CultureInfo.GetCultureInfo(cultureName);
Assert.Equal(expectedCultureName, ci.Name);
}

private static bool SupportRemoteExecutionWithIcu => RemoteExecutor.IsSupported && PlatformDetection.IsIcuGlobalization;

[InlineData("xx-u-XX")]
[InlineData("xx-u-XX-u-yy")]
[InlineData("xx-t-ja-JP")]
[InlineData("qps-plocm")]
[InlineData("zh-TW-u-co-zhuyin")]
[InlineData("de-DE-u-co-phonebk")]
[InlineData("de-DE-u-co-phonebk-u-xx")]
[InlineData("de-DE-u-xx-u-co-phonebk")]
[InlineData("de-DE-t-xx-u-co-phonebk")]
[InlineData("de-DE-u-co-phonebk-t-xx")]
[InlineData("de-DE-u-co-phonebk-t-xx-u-yy")]
[InlineData("de-DE")]
[ConditionalTheory(nameof(SupportRemoteExecutionWithIcu))]
public void TestWithResourceLookup(string cultureName)
{
RemoteExecutor.Invoke(name => {
CultureInfo.CurrentUICulture = CultureInfo.GetCultureInfo(name);
int Zero = 0;

// This should go through the resource manager to get the localized exception message using the current UI culture
Assert.Throws<DivideByZeroException>(() => 1 / Zero);
}, cultureName).Dispose();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,88 @@ internal sealed partial class CultureData
// ICU constants
private const int ICU_ULOC_KEYWORD_AND_VALUES_CAPACITY = 100; // max size of keyword or value
private const int ICU_ULOC_FULLNAME_CAPACITY = 157; // max size of locale name
private const int WINDOWS_MAX_COLLATION_NAME_LENGTH = 8; // max collation name length in the culture name

/// <summary>
/// Process the locale name that ICU returns and convert it to the format that .NET expects.
/// </summary>
/// <param name="name">The locale name that ICU returns.</param>
/// <param name="extension">The extension part in the original culture name.</param>
/// <param name="collationStart">The index of the collation in the name.</param>
/// <remarks>
/// BCP 47 specifications allow for extensions in the locale name, following the format language-script-region-extensions-collation. However,
/// not all extensions supported by ICU are supported in .NET. In the locale name, extensions are separated from the rest of the name using '-u-' or '-t-'.
/// In .NET, only the collation extension is supported. If the name includes a collation extension, it will be prefixed with '-u-co-'.
/// For example, en-US-u-co-search would be converted to the ICU name en_US@collation=search, which would then be translated to the .NET name en-US_search.
/// All extensions in the ICU names start with @. When normalizing the name to the .NET format, we retain the extensions in the name to ensure differentiation
/// between names with extensions and those without. For example, we may have a name like en-US and en-US-u-xx. Although .NET doesn't support the extension xx,
/// we still include it in the name to distinguish it from the name without the extension.
/// </remarks>
private static string NormalizeCultureName(string name, ReadOnlySpan<char> extension, out int collationStart)
{
Debug.Assert(name is not null);
Debug.Assert(name.Length <= ICU_ULOC_FULLNAME_CAPACITY);

collationStart = -1;
bool changed = false;
Span<char> buffer = stackalloc char[ICU_ULOC_FULLNAME_CAPACITY];
int bufferIndex = 0;

for (int i = 0; i < name.Length && bufferIndex < ICU_ULOC_FULLNAME_CAPACITY; i++)
{
char c = name[i];
if (c == '-' && i < name.Length - 1 && name[i + 1] == '-')
{
// ICU changes names like `qps_plocm` (one underscore) to `qps__plocm` (two underscores)
// The reason this occurs is because, while ICU canonicalizing, ulocimp_getCountry returns an empty string since the country code value is > 3 (rightly so).
// But append an extra '_' thinking that country code was in-fact appended (for the empty string value as well).
// Before processing, the name qps__plocm will be converted to its .NET name equivalent, which is qps--plocm.
changed = true;
buffer[bufferIndex++] = '-';
i++;
}
else if (c == '@')
{
changed = true;

if (!extension.IsEmpty && extension.TryCopyTo(buffer.Slice(bufferIndex)))
{
bufferIndex += extension.Length;
}

int collationIndex = name.IndexOf("collation=", i + 1, StringComparison.Ordinal);
if (collationIndex > 0)
{
collationIndex += "collation=".Length;

// format of the locale properties is @key=value;collation=collationName;key=value;key=value
int endOfCollation = name.IndexOf(';', collationIndex);
if (endOfCollation < 0)
{
endOfCollation = name.Length;
}

int length = Math.Min(WINDOWS_MAX_COLLATION_NAME_LENGTH, endOfCollation - collationIndex); // Windows doesn't allow collation names longer than 8 characters
if (buffer.Length - bufferIndex >= length + 1)
{
collationStart = bufferIndex;
buffer[bufferIndex++] = '_';
name.AsSpan(collationIndex, length).CopyTo(buffer.Slice(bufferIndex));
bufferIndex += length;
}
}

// done getting all parts can be supported in the .NET culture names.
break;
}
else
{
buffer[bufferIndex++] = name[i];
}
}

return changed ? new string(buffer.Slice(0, bufferIndex)) : name;
}

/// <summary>
/// This method uses the sRealName field (which is initialized by the constructor before this is called) to
Expand All @@ -26,16 +108,15 @@ private bool InitIcuCultureDataCore()
string realNameBuffer = _sRealName;

// Basic validation
if (!IsValidCultureName(realNameBuffer, out var index))
if (!IsValidCultureName(realNameBuffer, out var index, out int indexOfExtensions))
{
return false;
}

// Replace _ (alternate sort) with @collation= for ICU
ReadOnlySpan<char> alternateSortName = default;
if (index > 0)
{
alternateSortName = realNameBuffer.AsSpan(index + 1);
ReadOnlySpan<char> alternateSortName = realNameBuffer.AsSpan(index + 1);
realNameBuffer = string.Concat(realNameBuffer.AsSpan(0, index), ICU_COLLATION_KEYWORD, alternateSortName);
}

Expand All @@ -45,22 +126,9 @@ private bool InitIcuCultureDataCore()
return false; // fail
}

// Replace the ICU collation keyword with an _
Debug.Assert(_sWindowsName != null);
index = _sWindowsName.IndexOf(ICU_COLLATION_KEYWORD, StringComparison.Ordinal);
if (index >= 0)
{
// Use original culture name if alternateSortName is not set, which is possible even if the normalized
// culture name has "@collation=".
// "zh-TW-u-co-zhuyin" is a good example. The term "u-co-" means the following part will be the sort name
// and it will be treated in ICU as "zh-TW@collation=zhuyin".
_sName = alternateSortName.Length == 0 ? realNameBuffer : string.Concat(_sWindowsName.AsSpan(0, index), "_", alternateSortName);
}
else
{
_sName = _sWindowsName;
}
_sRealName = _sName;

_sRealName = NormalizeCultureName(_sWindowsName, indexOfExtensions > 0 ? _sRealName.AsSpan(indexOfExtensions) : ReadOnlySpan<char>.Empty, out int collationStart);

_iLanguage = LCID;
if (_iLanguage == 0)
Expand All @@ -69,11 +137,15 @@ private bool InitIcuCultureDataCore()
}
_bNeutral = TwoLetterISOCountryName.Length == 0;
_sSpecificCulture = _bNeutral ? IcuLocaleData.GetSpecificCultureName(_sRealName) : _sRealName;
// Remove the sort from sName unless custom culture
if (index > 0 && !_bNeutral && !IsCustomCultureId(_iLanguage))

if (_bNeutral && collationStart > 0)
{
_sName = _sWindowsName.Substring(0, index);
return false; // neutral cultures cannot have collation
}

// Remove the sort from sName unless custom culture
_sName = collationStart < 0 ? _sRealName : _sRealName.Substring(0, collationStart);

return true;
}

Expand Down Expand Up @@ -369,7 +441,7 @@ private static CultureInfo[] IcuEnumCultures(CultureTypes types)
}

bool enumNeutrals = (types & CultureTypes.NeutralCultures) != 0;
bool enumSpecificss = (types & CultureTypes.SpecificCultures) != 0;
bool enumSpecifics = (types & CultureTypes.SpecificCultures) != 0;

List<CultureInfo> list = new List<CultureInfo>();
if (enumNeutrals)
Expand All @@ -384,7 +456,7 @@ private static CultureInfo[] IcuEnumCultures(CultureTypes types)
if (index + length <= bufferLength)
{
CultureInfo ci = CultureInfo.GetCultureInfo(new string(chars, index, length));
if ((enumNeutrals && ci.IsNeutralCulture) || (enumSpecificss && !ci.IsNeutralCulture))
if ((enumNeutrals && ci.IsNeutralCulture) || (enumSpecifics && !ci.IsNeutralCulture))
{
list.Add(ci);
}
Expand Down Expand Up @@ -418,10 +490,14 @@ private static string IcuGetConsoleFallbackName(string cultureName)
/// * Disallow input that starts or ends with '-' or '_'.
/// * Disallow input that has any combination of consecutive '-' or '_'.
/// * Disallow input that has multiple '_'.
///
/// The IsValidCultureName method also identifies the presence of any extensions in the name (such as -u- or -t-) and returns the index of the extension.
/// This is necessary because we need to append the extensions to the name when normalizing it to the .NET format.
/// </remarks>
private static bool IsValidCultureName(string subject, out int indexOfUnderscore)
private static bool IsValidCultureName(string subject, out int indexOfUnderscore, out int indexOfExtensions)
{
indexOfUnderscore = -1;
indexOfExtensions = -1;

if (subject.Length == 0) return true; // Invariant Culture
if (subject.Length == 1 || subject.Length > LocaleNameMaxLength) return false;
Expand All @@ -446,6 +522,16 @@ private static bool IsValidCultureName(string subject, out int indexOfUnderscore
seenUnderscore = true;
indexOfUnderscore = i;
}
else
{
if (indexOfExtensions < 0 && i < subject.Length - 2 && (subject[i + 1] is 'u' or 't') && subject[i + 2] == '-') // we have -u- or -t- which is an extension
{
if (subject[i + 1] == 't' || i >= subject.Length - 6 || subject[i + 3] != 'c' || subject[i + 4] != 'o' || subject[i + 5] != '-' ) // not -u-co- collation extension
{
indexOfExtensions = i;
}
}
}
}
else
{
Expand Down