From 64117a514c9de0ddcd8391ce42ba6a6c776785f6 Mon Sep 17 00:00:00 2001 From: Seth Brenith Date: Thu, 26 Jul 2018 14:45:26 -0700 Subject: [PATCH 1/2] Avoid copying strings in (de|en)codeURI(Component)? and trim * The last step of Encode and Decode was copying the buffer for no reason; skip that copy * Often the input to Encode or Decode doesn't require any transformation, so return the original string in that case * Likewise, avoid creating new substrings for trimmed strings if nothing needs to be trimmed Related to OS:17754314 --- lib/Runtime/Library/JavascriptString.cpp | 7 +++++ lib/Runtime/Library/UriHelper.cpp | 40 +++++++++++++++++++----- lib/Runtime/Library/UriHelper.h | 4 +-- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/lib/Runtime/Library/JavascriptString.cpp b/lib/Runtime/Library/JavascriptString.cpp index e613d447a61..7d55a97ecee 100644 --- a/lib/Runtime/Library/JavascriptString.cpp +++ b/lib/Runtime/Library/JavascriptString.cpp @@ -2472,6 +2472,13 @@ namespace Js Assert(idxEnd >= 0); } } + + if (idxStart == 0 && idxEnd == len - 1) + { + AssertMsg(scriptContext == arg->GetScriptContext(), "Should have already marshaled the string in cross site thunk"); + return arg; + } + return SubstringCore(arg, idxStart, idxEnd - idxStart + 1, scriptContext); } diff --git a/lib/Runtime/Library/UriHelper.cpp b/lib/Runtime/Library/UriHelper.cpp index 9530bce6b40..31b87ec7914 100644 --- a/lib/Runtime/Library/UriHelper.cpp +++ b/lib/Runtime/Library/UriHelper.cpp @@ -29,7 +29,7 @@ namespace Js } } - return Encode(strURI->GetString(), strURI->GetLength(), flags, scriptContext); + return Encode(strURI, flags, scriptContext); } unsigned char UriHelper::s_uriProps[128] = @@ -126,10 +126,13 @@ namespace Js } // The Encode algorithm described in sec. 15.1.3 of the spec. The input string is - // 'input' and the Unescaped set is described by the flags 'unescapedFlags'. The + // 'strURI' and the Unescaped set is described by the flags 'unescapedFlags'. The // output is a string var. - Var UriHelper::Encode(__in_ecount(len) const char16* input, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext ) + Var UriHelper::Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext ) { + uint32 len = strURI->GetLength(); + __in_ecount(len) const char16* input = strURI->GetString(); + bool needsChanges = false; BYTE bUTF8[MaxUTF8Len]; // pass 1 calculate output length and error check @@ -144,6 +147,8 @@ namespace Js } else { + needsChanges = true; + if( c >= 0xDC00 && c <= 0xDFFF ) { JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */); @@ -173,6 +178,13 @@ namespace Js } } + // If nothing needs encoding, then avoid extra work + if (!needsChanges) + { + AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk"); + return strURI; + } + //pass 2 generate the encoded URI uint32 allocSize = UInt32Math::Add(outputLen, 1); @@ -238,7 +250,7 @@ namespace Js __analysis_assume(outputLen + 1 == allocSize); outURI[outputLen] = _u('\0'); - return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext); + return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext); } Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags ) @@ -265,14 +277,17 @@ namespace Js } } - return Decode(strURI->GetString(), strURI->GetLength(), reservedFlags, scriptContext); + return Decode(strURI, reservedFlags, scriptContext); } // The Decode algorithm described in sec. 15.1.3 of the spec. The input string is - // 'input' and the Reserved set is described by the flags 'reservedFlags'. The + // 'strURI' and the Reserved set is described by the flags 'reservedFlags'. The // output is a string var. - Var UriHelper::Decode(__in_ecount(len) const char16* input, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext) + Var UriHelper::Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext) { + uint32 len = strURI->GetLength(); + __in_ecount(len) const char16* input = strURI->GetString(); + bool needsChanges = false; char16 c1; char16 c; // pass 1 calculate output length and error check @@ -283,6 +298,8 @@ namespace Js if( c == '%') { + needsChanges = true; + uint32 start = k; if( k + 2 >= len ) { @@ -383,6 +400,13 @@ namespace Js } } + // If nothing needs decoding, then avoid extra work + if (!needsChanges) + { + AssertMsg(scriptContext == strURI->GetScriptContext(), "Should have already marshaled the string in cross site thunk"); + return strURI; + } + //pass 2 generate the decoded URI uint32 allocSize = UInt32Math::Add(outputLen, 1); char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize); @@ -538,7 +562,7 @@ namespace Js __analysis_assume(outputLen + 1 == allocSize); outURI[outputLen] = _u('\0'); - return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext); + return JavascriptString::NewWithBuffer(outURI, outputLen, scriptContext); } // Decodes a two-hexadecimal-digit wide character pair into the byte value it represents diff --git a/lib/Runtime/Library/UriHelper.h b/lib/Runtime/Library/UriHelper.h index 63bccda235e..8c27ef4d9be 100644 --- a/lib/Runtime/Library/UriHelper.h +++ b/lib/Runtime/Library/UriHelper.h @@ -18,7 +18,7 @@ namespace Js }; static Var EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags); - static Var Decode(__in_ecount(len) const char16* psz, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext); + static Var Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext); static Var DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags); static unsigned char s_uriProps[128]; @@ -82,7 +82,7 @@ namespace Js static const uint32 MaxUTF8Len = 4; static uint32 ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len]); static uint32 FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen ); - static Var Encode(__in_ecount(len) const char16* psz, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext ); + static Var Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext ); private: static bool DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value); From 67c4844d542a4963b9a7300175f644120ca2fc37 Mon Sep 17 00:00:00 2001 From: Seth Brenith Date: Thu, 26 Jul 2018 14:58:04 -0700 Subject: [PATCH 2/2] Use charcount_t --- lib/Runtime/Library/UriHelper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Runtime/Library/UriHelper.cpp b/lib/Runtime/Library/UriHelper.cpp index 31b87ec7914..26de3321344 100644 --- a/lib/Runtime/Library/UriHelper.cpp +++ b/lib/Runtime/Library/UriHelper.cpp @@ -130,7 +130,7 @@ namespace Js // output is a string var. Var UriHelper::Encode(JavascriptString* strURI, unsigned char unescapedFlags, ScriptContext* scriptContext ) { - uint32 len = strURI->GetLength(); + charcount_t len = strURI->GetLength(); __in_ecount(len) const char16* input = strURI->GetString(); bool needsChanges = false; BYTE bUTF8[MaxUTF8Len]; @@ -285,7 +285,7 @@ namespace Js // output is a string var. Var UriHelper::Decode(JavascriptString* strURI, unsigned char reservedFlags, ScriptContext* scriptContext) { - uint32 len = strURI->GetLength(); + charcount_t len = strURI->GetLength(); __in_ecount(len) const char16* input = strURI->GetString(); bool needsChanges = false; char16 c1;