Skip to content

Commit

Permalink
Remove IsFastSort optimizations from String
Browse files Browse the repository at this point in the history
- Fix bugs in the Span-based globalization fast paths. Note that some of these bug fixes are going to affect performance of the globalization fast paths.
- Use Span-based globalization fast paths for strings
- Avoid static array allocation for HighCharTable

Fixes #26758
  • Loading branch information
EgorBo authored and jkotas committed Sep 20, 2019
1 parent 93c9dc5 commit f49f90c
Show file tree
Hide file tree
Showing 12 changed files with 140 additions and 520 deletions.

Large diffs are not rendered by default.

5 changes: 0 additions & 5 deletions src/System.Private.CoreLib/src/System/String.CoreCLR.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,6 @@ public extern int Length
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal static extern string FastAllocateString(int length);

// Is this a string that can be compared quickly (that is it has only characters > 0x80
// and not a - or '
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal extern bool IsFastSort();

// Set extra byte for odd-sized strings that came from interop as BSTR.
[MethodImplAttribute(MethodImplOptions.InternalCall)]
internal extern void SetTrailByte(byte data);
Expand Down
23 changes: 0 additions & 23 deletions src/classlibnative/bcltype/stringnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,6 @@
#pragma optimize("tgy", on)
#endif

/*===============================IsFastSort===============================
**Action: Call the helper to walk the string and see if we have any high chars.
**Returns: void. The appropriate bits are set on the String.
**Arguments: vThisRef - The string to be checked.
**Exceptions: None.
==============================================================================*/
FCIMPL1(FC_BOOL_RET, COMString::IsFastSort, StringObject* thisRef) {
FCALL_CONTRACT;

VALIDATEOBJECT(thisRef);
_ASSERTE(thisRef!=NULL);
DWORD state = thisRef->GetHighCharState();
if (IS_STRING_STATE_UNDETERMINED(state)) {
state = (STRINGREF(thisRef))->InternalCheckHighChars();
FC_GC_POLL_RET();
}
else {
FC_GC_POLL_NOT_NEEDED();
}
FC_RETURN_BOOL(IS_FAST_SORT(state)); //This can indicate either high chars or special sorting chars.
}
FCIMPLEND


/*==================================GETCHARAT===================================
**Returns the character at position index. Thows IndexOutOfRangeException as
Expand Down
3 changes: 0 additions & 3 deletions src/classlibnative/bcltype/stringnative.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ class COMString {
//
// Search/Query Methods
//
static FCDECL1(FC_BOOL_RET, IsFastSort, StringObject* pThisRef);
static FCDECL1(FC_BOOL_RET, IsAscii, StringObject* pThisRef);

static FCDECL6(INT32, CompareOrdinalEx, StringObject* strA, INT32 indexA, INT32 countA, StringObject* strB, INT32 indexB, INT32 countB);

static FCDECL2(FC_CHAR_RET, GetCharAt, StringObject* pThisRef, INT32 index);
Expand Down
10 changes: 0 additions & 10 deletions src/inc/utilcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -4873,16 +4873,6 @@ BOOL NoGuiOnAssert();
VOID TerminateOnAssert();
#endif // _DEBUG

class HighCharHelper {
public:
static inline BOOL IsHighChar(int c) {
return (BOOL)HighCharTable[c];
}

private:
static const BYTE HighCharTable[];
};


BOOL ThreadWillCreateGuardPage(SIZE_T sizeReservedStack, SIZE_T sizeCommitedStack);

Expand Down
23 changes: 3 additions & 20 deletions src/md/compiler/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1524,11 +1524,8 @@ STDMETHODIMP RegMeta::DefineUserString( // S_OK or error.

UINT32 nIndex; // Index into the user string heap.
CQuickBytes qb; // For storing the string with the byte prefix.
ULONG i; // Loop counter.
BOOL bIs80Plus = false; // Is there an 80+ WCHAR.
ULONG ulMemSize; // Size of memory taken by the string passed in.
PBYTE pb; // Pointer into memory allocated by qb.
WCHAR c; // Temporary used during comparison;



Expand All @@ -1541,30 +1538,16 @@ STDMETHODIMP RegMeta::DefineUserString( // S_OK or error.

_ASSERTE(pstk && szString && cchString != ULONG_MAX);

//
// Walk the entire string looking for characters that would block us from doing
// a fast comparison of the string. These characters include anything greater than
// 0x80 or an apostrophe or a hyphen. Apostrophe and hyphen are excluded because
// they would prevent words like coop and co-op from sorting together in a culture-aware
// comparison. We also need to exclude some set of the control characters. This check
// is more restrictive
//
for (i=0; i<cchString; i++) {
c = szString[i];
if (c>=0x80 || HighCharHelper::IsHighChar((int)c)) {
bIs80Plus = true;
break;
}
}

// Copy over the string to memory.
ulMemSize = cchString * sizeof(WCHAR);
IfFailGo(qb.ReSizeNoThrow(ulMemSize + 1));
pb = reinterpret_cast<PBYTE>(qb.Ptr());
memcpy(pb, szString, ulMemSize);
SwapStringLength((WCHAR *) pb, cchString);
// Set the last byte of memory to indicate whether there is a 80+ character.
*(pb + ulMemSize) = bIs80Plus ? 1 : 0;
// Always set the last byte of memory to indicate that there may be a 80+ or special character.
// This byte is not used by the runtime.
*(pb + ulMemSize) = 1;

IfFailGo(m_pStgdb->m_MiniMd.PutUserString(
MetaData::DataBlob(pb, ulMemSize + 1),
Expand Down
178 changes: 0 additions & 178 deletions src/utilcode/util_nodependencies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,181 +813,3 @@ BOOL ThreadWillCreateGuardPage(SIZE_T sizeReservedStack, SIZE_T sizeCommitedStac
return (sizeReservedStack > sizeCommitedStack + ((size_t)sysInfo.dwPageSize));
} // ThreadWillCreateGuardPage

//The following characters have special sorting weights when combined with other
//characters, which means we can't use our fast sorting algorithm on them.
//Most of these are pretty rare control characters, but apostrophe and hyphen
//are fairly common and force us down the slower path. This is because we want
//"word sorting", which means that "coop" and "co-op" sort together, instead of
//separately as they would if we were doing a string sort.
// 0x0001 6 3 2 2 0 ;Start Of Heading
// 0x0002 6 4 2 2 0 ;Start Of Text
// 0x0003 6 5 2 2 0 ;End Of Text
// 0x0004 6 6 2 2 0 ;End Of Transmission
// 0x0005 6 7 2 2 0 ;Enquiry
// 0x0006 6 8 2 2 0 ;Acknowledge
// 0x0007 6 9 2 2 0 ;Bell
// 0x0008 6 10 2 2 0 ;Backspace

// 0x000e 6 11 2 2 0 ;Shift Out
// 0x000f 6 12 2 2 0 ;Shift In
// 0x0010 6 13 2 2 0 ;Data Link Escape
// 0x0011 6 14 2 2 0 ;Device Control One
// 0x0012 6 15 2 2 0 ;Device Control Two
// 0x0013 6 16 2 2 0 ;Device Control Three
// 0x0014 6 17 2 2 0 ;Device Control Four
// 0x0015 6 18 2 2 0 ;Negative Acknowledge
// 0x0016 6 19 2 2 0 ;Synchronous Idle
// 0x0017 6 20 2 2 0 ;End Of Transmission Block
// 0x0018 6 21 2 2 0 ;Cancel
// 0x0019 6 22 2 2 0 ;End Of Medium
// 0x001a 6 23 2 2 0 ;Substitute
// 0x001b 6 24 2 2 0 ;Escape
// 0x001c 6 25 2 2 0 ;File Separator
// 0x001d 6 26 2 2 0 ;Group Separator
// 0x001e 6 27 2 2 0 ;Record Separator
// 0x001f 6 28 2 2 0 ;Unit Separator

// 0x0027 6 128 2 2 0 ;Apostrophe-Quote
// 0x002d 6 130 2 2 0 ;Hyphen-Minus

// 0x007f 6 29 2 2 0 ;Delete

const BYTE
HighCharHelper::HighCharTable[]= {
TRUE, /* 0x0, 0x0 */
TRUE, /* 0x1, .*/
TRUE, /* 0x2, .*/
TRUE, /* 0x3, .*/
TRUE, /* 0x4, .*/
TRUE, /* 0x5, .*/
TRUE, /* 0x6, .*/
TRUE, /* 0x7, .*/
TRUE, /* 0x8, .*/
FALSE, /* 0x9, */
#ifdef PLATFORM_UNIX
TRUE, /* 0xA, */
#else
FALSE, /* 0xA, */
#endif // PLATFORM_UNIX
FALSE, /* 0xB, .*/
FALSE, /* 0xC, .*/
#ifdef PLATFORM_UNIX
TRUE, /* 0xD, */
#else
FALSE, /* 0xD, */
#endif // PLATFORM_UNIX
TRUE, /* 0xE, .*/
TRUE, /* 0xF, .*/
TRUE, /* 0x10, .*/
TRUE, /* 0x11, .*/
TRUE, /* 0x12, .*/
TRUE, /* 0x13, .*/
TRUE, /* 0x14, .*/
TRUE, /* 0x15, .*/
TRUE, /* 0x16, .*/
TRUE, /* 0x17, .*/
TRUE, /* 0x18, .*/
TRUE, /* 0x19, .*/
TRUE, /* 0x1A, */
TRUE, /* 0x1B, .*/
TRUE, /* 0x1C, .*/
TRUE, /* 0x1D, .*/
TRUE, /* 0x1E, .*/
TRUE, /* 0x1F, .*/
FALSE, /*0x20, */
FALSE, /*0x21, !*/
FALSE, /*0x22, "*/
FALSE, /*0x23, #*/
FALSE, /*0x24, $*/
FALSE, /*0x25, %*/
FALSE, /*0x26, &*/
TRUE, /*0x27, '*/
FALSE, /*0x28, (*/
FALSE, /*0x29, )*/
FALSE, /*0x2A **/
FALSE, /*0x2B, +*/
FALSE, /*0x2C, ,*/
TRUE, /*0x2D, -*/
FALSE, /*0x2E, .*/
FALSE, /*0x2F, /*/
FALSE, /*0x30, 0*/
FALSE, /*0x31, 1*/
FALSE, /*0x32, 2*/
FALSE, /*0x33, 3*/
FALSE, /*0x34, 4*/
FALSE, /*0x35, 5*/
FALSE, /*0x36, 6*/
FALSE, /*0x37, 7*/
FALSE, /*0x38, 8*/
FALSE, /*0x39, 9*/
FALSE, /*0x3A, :*/
FALSE, /*0x3B, ;*/
FALSE, /*0x3C, <*/
FALSE, /*0x3D, =*/
FALSE, /*0x3E, >*/
FALSE, /*0x3F, ?*/
FALSE, /*0x40, @*/
FALSE, /*0x41, A*/
FALSE, /*0x42, B*/
FALSE, /*0x43, C*/
FALSE, /*0x44, D*/
FALSE, /*0x45, E*/
FALSE, /*0x46, F*/
FALSE, /*0x47, G*/
FALSE, /*0x48, H*/
FALSE, /*0x49, I*/
FALSE, /*0x4A, J*/
FALSE, /*0x4B, K*/
FALSE, /*0x4C, L*/
FALSE, /*0x4D, M*/
FALSE, /*0x4E, N*/
FALSE, /*0x4F, O*/
FALSE, /*0x50, P*/
FALSE, /*0x51, Q*/
FALSE, /*0x52, R*/
FALSE, /*0x53, S*/
FALSE, /*0x54, T*/
FALSE, /*0x55, U*/
FALSE, /*0x56, V*/
FALSE, /*0x57, W*/
FALSE, /*0x58, X*/
FALSE, /*0x59, Y*/
FALSE, /*0x5A, Z*/
FALSE, /*0x5B, [*/
FALSE, /*0x5C, \*/
FALSE, /*0x5D, ]*/
FALSE, /*0x5E, ^*/
FALSE, /*0x5F, _*/
FALSE, /*0x60, `*/
FALSE, /*0x61, a*/
FALSE, /*0x62, b*/
FALSE, /*0x63, c*/
FALSE, /*0x64, d*/
FALSE, /*0x65, e*/
FALSE, /*0x66, f*/
FALSE, /*0x67, g*/
FALSE, /*0x68, h*/
FALSE, /*0x69, i*/
FALSE, /*0x6A, j*/
FALSE, /*0x6B, k*/
FALSE, /*0x6C, l*/
FALSE, /*0x6D, m*/
FALSE, /*0x6E, n*/
FALSE, /*0x6F, o*/
FALSE, /*0x70, p*/
FALSE, /*0x71, q*/
FALSE, /*0x72, r*/
FALSE, /*0x73, s*/
FALSE, /*0x74, t*/
FALSE, /*0x75, u*/
FALSE, /*0x76, v*/
FALSE, /*0x77, w*/
FALSE, /*0x78, x*/
FALSE, /*0x79, y*/
FALSE, /*0x7A, z*/
FALSE, /*0x7B, {*/
FALSE, /*0x7C, |*/
FALSE, /*0x7D, }*/
FALSE, /*0x7E, ~*/
TRUE, /*0x7F, */
}; // HighCharHelper::HighCharTable
1 change: 0 additions & 1 deletion src/vm/ecalllist.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ FCFuncStart(gStringFuncs)
FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_PtrSByt_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::CtorSBytePtrManaged)
FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_PtrSByt_Int_Int_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::CtorSBytePtrStartLengthManaged)
FCDynamicSig(COR_CTOR_METHOD_NAME, &gsig_IM_PtrSByt_Int_Int_Encoding_RetVoid, CORINFO_INTRINSIC_Illegal, ECall::CtorSBytePtrStartLengthEncodingManaged)
FCFuncElement("IsFastSort", COMString::IsFastSort)
FCIntrinsic("get_Length", COMString::Length, CORINFO_INTRINSIC_StringLength)
FCIntrinsic("get_Chars", COMString::GetCharAt, CORINFO_INTRINSIC_StringGetChar)
FCFuncElement("SetTrailByte", COMString::FCSetTrailByte)
Expand Down
82 changes: 0 additions & 82 deletions src/vm/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1017,88 +1017,6 @@ BOOL StringObject::CaseInsensitiveCompHelper(__in_ecount(aLength) WCHAR *strACha

}

/*=============================InternalHasHighChars=============================
**Action: Checks if the string can be sorted quickly. The requirements are that
** the string contain no character greater than 0x80 and that the string not
** contain an apostrophe or a hypen. Apostrophe and hyphen are excluded so that
** words like co-op and coop sort together.
**Returns: Void. The side effect is to set a bit on the string indicating whether or not
** the string contains high chars.
**Arguments: The String to be checked.
**Exceptions: None
==============================================================================*/
DWORD StringObject::InternalCheckHighChars() {
WRAPPER_NO_CONTRACT;

WCHAR *chars;
WCHAR c;
INT32 length;

RefInterpretGetStringValuesDangerousForGC((WCHAR **) &chars, &length);

DWORD stringState = STRING_STATE_FAST_OPS;

for (int i=0; i<length; i++) {
c = chars[i];
if (c>=0x80) {
SetHighCharState(STRING_STATE_HIGH_CHARS);
return STRING_STATE_HIGH_CHARS;
} else if (HighCharHelper::IsHighChar((int)c)) {
//This means that we have a character which forces special sorting,
//but doesn't necessarily force slower casing and indexing. We'll
//set a value to remember this, but we need to check the rest of
//the string because we may still find a charcter greater than 0x7f.
stringState = STRING_STATE_SPECIAL_SORT;
}
}

SetHighCharState(stringState);
return stringState;
}

#ifdef VERIFY_HEAP
/*=============================ValidateHighChars=============================
**Action: Validate if the HighChars bits is set correctly, no side effect
**Returns: BOOL for result of validation
**Arguments: The String to be checked.
**Exceptions: None
==============================================================================*/
BOOL StringObject::ValidateHighChars()
{
WRAPPER_NO_CONTRACT;
DWORD curStringState = GetHighCharState ();
// state could always be undetermined
if (curStringState == STRING_STATE_UNDETERMINED)
{
return TRUE;
}

WCHAR *chars;
INT32 length;
RefInterpretGetStringValuesDangerousForGC((WCHAR **) &chars, &length);

DWORD stringState = STRING_STATE_FAST_OPS;
for (int i=0; i<length; i++) {
WCHAR c = chars[i];
if (c>=0x80)
{
// if there is a high char in the string, the state has to be STRING_STATE_HIGH_CHARS
return curStringState == STRING_STATE_HIGH_CHARS;
}
else if (HighCharHelper::IsHighChar((int)c)) {
//This means that we have a character which forces special sorting,
//but doesn't necessarily force slower casing and indexing. We'll
//set a value to remember this, but we need to check the rest of
//the string because we may still find a charcter greater than 0x7f.
stringState = STRING_STATE_SPECIAL_SORT;
}
}

return stringState == curStringState;
}

#endif //VERIFY_HEAP

/*============================InternalTrailByteCheck============================
**Action: Many years ago, VB didn't have the concept of a byte array, so enterprising
** users created one by allocating a BSTR with an odd length and using it to
Expand Down
Loading

0 comments on commit f49f90c

Please sign in to comment.