diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index aa84ae41e2148b..435b58fc6a022a 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -132,7 +132,12 @@ internal enum Flags : ulong /// /// Used for asserting that certain methods are only called from the constructor to validate thread-safety assumptions /// - Debug_LeftConstructor = 1UL << 57 + Debug_LeftConstructor = 1UL << 57, + + /// + /// Indicates that there is an authority, but the scheme does not use "//" to delimit it (e.g. "http:\\host"). + /// + SchemeNotCanonical_NoTrailingSlashes = 1UL << 58, } [Conditional("DEBUG")] @@ -1898,7 +1903,6 @@ public Uri MakeRelativeUri(Uri uri) return uri; } - // // http://www.ietf.org/rfc/rfc3986.txt // // 3.3. Path @@ -1914,7 +1918,6 @@ public Uri MakeRelativeUri(Uri uri) // http:(relativeUri) may be considered a valid relative Uri. // // Returns true if a colon is found in the first path segment, false otherwise - // private static readonly SearchValues s_segmentSeparatorChars = SearchValues.Create(@":\/?#"); @@ -2442,7 +2445,7 @@ private unsafe void CreateUriInfo(Flags cF) // This will create a Host string. The validity has been already checked // // Assuming: UriInfo member is already set at this point - private unsafe void CreateHostString() + private void CreateHostString() { if (!_syntax.IsSimple) { @@ -2470,12 +2473,7 @@ private unsafe void CreateHostString() // An Authority may need escaping except when it's an inet server address if (HostType == Flags.BasicHostType) { - int idx = 0; - Check result; - fixed (char* pHost = host) - { - result = CheckCanonical(pHost, ref idx, host.Length, c_DummyChar); - } + Check result = CheckCanonical(host, c_DummyChar, out _); if ((result & Check.DisplayCanonical) == 0) { @@ -3206,7 +3204,7 @@ private static void GetLengthWithoutTrailingSpaces(string str, ref int length, i // - Sets the Canonicalization flags if applied // - Will NOT create MoreInfo members // - private unsafe void ParseRemaining() + private void ParseRemaining() { // ensure we parsed up to the path EnsureUriInfo(); @@ -3222,76 +3220,74 @@ private unsafe void ParseRemaining() int origIdx; // stores index to switched original string int idx = _info.Offset.Scheme; int length = _string.Length; - Check result = Check.None; + Check result; UriSyntaxFlags syntaxFlags = _syntax.Flags; // _info.Offset values may be parsed twice but we lock only on _flags update. - fixed (char* str = _string) + GetLengthWithoutTrailingSpaces(_string, ref length, idx); + + if (IsImplicitFile) { - GetLengthWithoutTrailingSpaces(_string, ref length, idx); + cF |= Flags.SchemeNotCanonical; + } + else + { + ReadOnlySpan str = _string.AsSpan(idx); - if (IsImplicitFile) - { - cF |= Flags.SchemeNotCanonical; - } - else + Debug.Assert(str.StartsWith(_syntax.SchemeName, StringComparison.OrdinalIgnoreCase)); + Debug.Assert(str[_syntax.SchemeName.Length] == ':'); + + int schemeLength = _syntax.SchemeName.Length; + + // Check that the scheme casing matches the canonical one (lowercase). + // Using a manual loop instead of ContainsAnyInRange because the expected length is small. + foreach (char c in str.Slice(0, schemeLength)) { - int i; - string schemeName = _syntax.SchemeName; - for (i = 0; i < schemeName.Length; ++i) - { - if (schemeName[i] != str[idx + i]) - cF |= Flags.SchemeNotCanonical; - } - // For an authority Uri only // after the scheme would be canonical - // (for compatibility with: http:\\host) - if (((_flags & Flags.AuthorityFound) != 0) && (idx + i + 3 >= length || str[idx + i + 1] != '/' || - str[idx + i + 2] != '/')) + if (char.IsAsciiLetterUpper(c)) { cF |= Flags.SchemeNotCanonical; + break; } } - - //Check the form of the user info - if ((_flags & Flags.HasUserInfo) != 0) + // For an authority Uri only // after the scheme would be canonical + // (for compatibility with: http:\\host) + if ((_flags & Flags.AuthorityFound) != 0 && !str.Slice(schemeLength).StartsWith("://", StringComparison.Ordinal)) { - idx = _info.Offset.User; - result = CheckCanonical(str, ref idx, _info.Offset.Host, '@'); - if ((result & Check.DisplayCanonical) == 0) - { - cF |= Flags.UserNotCanonical; - } - if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical) - { - cF |= Flags.E_UserNotCanonical; - } - if (IriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath - | Check.FoundNonAscii | Check.NotIriCanonical)) - == (Check.DisplayCanonical | Check.FoundNonAscii))) - { - cF |= Flags.UserIriCanonical; - } + cF |= Flags.SchemeNotCanonical | Flags.SchemeNotCanonical_NoTrailingSlashes; } } - // - // Delay canonical Host checking to avoid creation of a host string - // Will do that on demand. - // + // Check the form of the user info + if ((_flags & Flags.HasUserInfo) != 0) + { + idx = _info.Offset.User; + result = CheckCanonical(_string.AsSpan(idx, _info.Offset.Host - idx), '@', out _); - // - //We have already checked on the port in EnsureUriInfo() that calls CreateUriInfo - // + if ((result & Check.DisplayCanonical) == 0) + { + cF |= Flags.UserNotCanonical; + } + if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical) + { + cF |= Flags.E_UserNotCanonical; + } + if (IriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath + | Check.FoundNonAscii | Check.NotIriCanonical)) + == (Check.DisplayCanonical | Check.FoundNonAscii))) + { + cF |= Flags.UserIriCanonical; + } + } - // - // Parsing the Path if any - // + // Delay canonical Host checking to avoid creation of a host string. + // We will do that on demand. + // We have already checked on the port in EnsureUriInfo() that calls CreateUriInfo. - // For iri parsing if we found unicode the idx has offset into _originalUnicodeString.. + // Parsing the Path if any. + // For iri parsing if we found unicode the idx has offset into _originalUnicodeString, // so restart parsing from there and make _info.Offset.Path as _string.Length - idx = _info.Offset.Path; origIdx = _info.Offset.Path; @@ -3396,34 +3392,31 @@ private unsafe void ParseRemaining() } } - fixed (char* str = _string) { - if (IsImplicitFile || ((syntaxFlags & (UriSyntaxFlags.MayHaveQuery | UriSyntaxFlags.MayHaveFragment)) == 0)) - { - result = CheckCanonical(str, ref idx, length, c_DummyChar); - } - else - { - result = CheckCanonical(str, ref idx, length, (((syntaxFlags & UriSyntaxFlags.MayHaveQuery) != 0) - ? '?' : _syntax.InFact(UriSyntaxFlags.MayHaveFragment) ? '#' : c_EOL)); - } + char delim = + IsImplicitFile || (syntaxFlags & (UriSyntaxFlags.MayHaveQuery | UriSyntaxFlags.MayHaveFragment)) == 0 ? c_DummyChar : + ((syntaxFlags & UriSyntaxFlags.MayHaveQuery) != 0) ? '?' : '#'; - // ATTN: - // This may render problems for unknown schemes, but in general for an authority based Uri - // (that has slashes) a path should start with "/" - // This becomes more interesting knowing how a file uri is used in "file://c:/path" - // It will be converted to file:///c:/path - // - // However, even more interesting is that vsmacros://c:\path will not add the third slash in the _canoical_ case - // - // We use special syntax flag to check if the path is rooted, i.e. has a first slash - // - if (((_flags & Flags.AuthorityFound) != 0) && ((syntaxFlags & UriSyntaxFlags.PathIsRooted) != 0) - && (_info.Offset.Path == length || (str[_info.Offset.Path] != '/' && str[_info.Offset.Path] != '\\'))) - { - cF |= Flags.FirstSlashAbsent; - } + result = CheckCanonical(_string.AsSpan(idx, length - idx), delim, out int checkedChars); + idx += checkedChars; } + + // ATTN: + // This may render problems for unknown schemes, but in general for an authority based Uri + // (that has slashes) a path should start with "/" + // This becomes more interesting knowing how a file uri is used in "file://c:/path" + // It will be converted to file:///c:/path + // + // However, even more interesting is that vsmacros://c:\path will not add the third slash in the _canoical_ case + // + // We use special syntax flag to check if the path is rooted, i.e. has a first slash + // + if (((_flags & Flags.AuthorityFound) != 0) && ((syntaxFlags & UriSyntaxFlags.PathIsRooted) != 0) + && (_info.Offset.Path == length || _string[_info.Offset.Path] is not ('/' or '\\'))) + { + cF |= Flags.FirstSlashAbsent; + } + // Check the need for compression or backslashes conversion // we included IsDosPath since it may come with other than FILE uri, for ex. scheme://C:\path // (This is very unfortunate that the original design has included that feature) @@ -3529,31 +3522,30 @@ private unsafe void ParseRemaining() _info.Offset.Query = idx; - fixed (char* str = _string) + if (idx < length && _string[idx] == '?') { - if (idx < length && str[idx] == '?') + idx++; // This is to exclude first '?' character from checking + result = CheckCanonical(_string.AsSpan(idx, length - idx), (syntaxFlags & (UriSyntaxFlags.MayHaveFragment)) != 0 ? '#' : c_EOL, out int charsChecked); + idx += charsChecked; + + if ((result & Check.DisplayCanonical) == 0) { - ++idx; // This is to exclude first '?' character from checking - result = CheckCanonical(str, ref idx, length, ((syntaxFlags & (UriSyntaxFlags.MayHaveFragment)) != 0) - ? '#' : c_EOL); - if ((result & Check.DisplayCanonical) == 0) - { - cF |= Flags.QueryNotCanonical; - } + cF |= Flags.QueryNotCanonical; + } - if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical) - { - cF |= Flags.E_QueryNotCanonical; - } + if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical) + { + cF |= Flags.E_QueryNotCanonical; + } - if (IriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath - | Check.FoundNonAscii | Check.NotIriCanonical)) - == (Check.DisplayCanonical | Check.FoundNonAscii))) - { - cF |= Flags.QueryIriCanonical; - } + if (IriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath + | Check.FoundNonAscii | Check.NotIriCanonical)) + == (Check.DisplayCanonical | Check.FoundNonAscii))) + { + cF |= Flags.QueryIriCanonical; } } + // //Now we've got to parse the Fragment if any. Note that Fragment requires the presence of '#' // @@ -3577,29 +3569,28 @@ private unsafe void ParseRemaining() _info.Offset.Fragment = idx; - fixed (char* str = _string) + if (idx < length && _string[idx] == '#') { - if (idx < length && str[idx] == '#') + idx++; // This is to exclude first '#' character from checking + // We don't using c_DummyChar since want to allow '?' and '#' as unescaped + result = CheckCanonical(_string.AsSpan(idx, length - idx), c_EOL, out int checkedChars); + idx += checkedChars; + + if ((result & Check.DisplayCanonical) == 0) { - ++idx; // This is to exclude first '#' character from checking - //We don't using c_DummyChar since want to allow '?' and '#' as unescaped - result = CheckCanonical(str, ref idx, length, c_EOL); - if ((result & Check.DisplayCanonical) == 0) - { - cF |= Flags.FragmentNotCanonical; - } + cF |= Flags.FragmentNotCanonical; + } - if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical) - { - cF |= Flags.E_FragmentNotCanonical; - } + if ((result & (Check.EscapedCanonical | Check.BackslashInPath)) != Check.EscapedCanonical) + { + cF |= Flags.E_FragmentNotCanonical; + } - if (IriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath - | Check.FoundNonAscii | Check.NotIriCanonical)) - == (Check.DisplayCanonical | Check.FoundNonAscii))) - { - cF |= Flags.FragmentIriCanonical; - } + if (IriParsing && ((result & (Check.DisplayCanonical | Check.EscapedCanonical | Check.BackslashInPath + | Check.FoundNonAscii | Check.NotIriCanonical)) + == (Check.DisplayCanonical | Check.FoundNonAscii))) + { + cF |= Flags.FragmentIriCanonical; } } _info.Offset.End = idx; @@ -4141,24 +4132,28 @@ private enum Check FoundNonAscii = 0x8 } - // // Used by ParseRemaining as well by InternalIsWellFormedOriginalString - // - private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) + private Check CheckCanonical(ReadOnlySpan span, char delim, out int end) { + Debug.Assert(delim is '@' or '/' or '?' or '#' or c_DummyChar or c_EOL); + Check res = Check.None; bool needsEscaping = false; bool foundEscaping = false; bool iriParsing = IriParsing; - char c; - int i = idx; - for (; i < end; ++i) + int i; + for (i = 0; (uint)i < (uint)span.Length; i++) { - c = str[i]; - // Control chars usually should be escaped in any case - if (c <= '\x1F' || (c >= '\x7F' && c <= '\x9F')) + char c = span[i]; + + if (char.IsAsciiLetterOrDigit(c)) { + // The most common case - unreserved chars. + } + else if (c <= '\x1F' || (c >= '\x7F' && c <= '\x9F')) + { + // Control chars usually should be escaped in any case needsEscaping = true; foundEscaping = true; res |= Check.ReservedFound; @@ -4172,19 +4167,21 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) if (char.IsHighSurrogate(c)) { - if ((i + 1) < end) + if ((uint)(i + 1) < (uint)span.Length) { - valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], out _, true); + valid = IriHelper.CheckIriUnicodeRange(c, span[i + 1], out _, true); + i++; } } else { valid = IriHelper.CheckIriUnicodeRange(c, true); } + if (!valid) res |= Check.NotIriCanonical; } - if (!needsEscaping) needsEscaping = true; + needsEscaping = true; } else if (c == delim) { @@ -4197,8 +4194,7 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) } else if (c == '?') { - if (IsImplicitFile || (_syntax != null && !_syntax.InFact(UriSyntaxFlags.MayHaveQuery) - && delim != c_EOL)) + if (IsImplicitFile || (_syntax != null && !_syntax.InFact(UriSyntaxFlags.MayHaveQuery) && delim != c_EOL)) { // If found as reserved this char is not suitable for safe unescaped display // Will need to escape it when both escaping and unescaping the string @@ -4210,6 +4206,7 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) else if (c == '#') { needsEscaping = true; + if (IsImplicitFile || (_syntax != null && !_syntax.InFact(UriSyntaxFlags.MayHaveFragment))) { // If found as reserved this char is not suitable for safe unescaped display @@ -4220,19 +4217,19 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) } else if (c == '/' || c == '\\') { - if ((res & Check.BackslashInPath) == 0 && c == '\\') + if (c == '\\') { res |= Check.BackslashInPath; } - if ((res & Check.DotSlashAttn) == 0 && i + 1 != end && (str[i + 1] == '/' || str[i + 1] == '\\')) + + if ((uint)(i + 1) < (uint)span.Length && (span[i + 1] == '/' || span[i + 1] == '\\')) { res |= Check.DotSlashAttn; } } else if (c == '.') { - if ((res & Check.DotSlashAttn) == 0 && i + 1 == end || str[i + 1] == '.' || str[i + 1] == '/' - || str[i + 1] == '\\' || str[i + 1] == '?' || str[i + 1] == '#') + if (((uint)(i + 1) >= (uint)span.Length || span[i + 1] == '.' || span[i + 1] == '/' || span[i + 1] == '\\' || span[i + 1] == '?' || span[i + 1] == '#')) { res |= Check.DotSlashAttn; } @@ -4240,7 +4237,7 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) else if (((c <= '"' && c != '!') || (c >= '[' && c <= '^') || c == '>' || c == '<' || c == '`')) { - if (!needsEscaping) needsEscaping = true; + needsEscaping = true; // The check above validates only that we have valid IRI characters, which is not enough to // conclude that we have a valid canonical IRI. @@ -4257,41 +4254,36 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) } else if (c == '%') { - if (!foundEscaping) foundEscaping = true; - //try unescape a byte hex escaping - if (i + 2 < end && (c = UriHelper.DecodeHexChars(str[i + 1], str[i + 2])) != c_DummyChar) + foundEscaping = true; + + // try unescape a byte hex escaping + if ((uint)(i + 2) < (uint)span.Length && (c = UriHelper.DecodeHexChars(span[i + 1], span[i + 2])) != c_DummyChar) { if (c == '.' || c == '/' || c == '\\') { res |= Check.DotSlashEscaped; } + i += 2; continue; } + // otherwise we follow to non escaped case - if (!needsEscaping) - { - needsEscaping = true; - } + needsEscaping = true; } } - if (foundEscaping) + if (!needsEscaping) { - if (!needsEscaping) - { - res |= Check.EscapedCanonical; - } + res |= Check.EscapedCanonical; } - else + + if (!foundEscaping) { res |= Check.DisplayCanonical; - if (!needsEscaping) - { - res |= Check.EscapedCanonical; - } } - idx = i; + + end = i; return res; } diff --git a/src/libraries/System.Private.Uri/src/System/UriExt.cs b/src/libraries/System.Private.Uri/src/System/UriExt.cs index 4558cd997cc6aa..d08883602b2142 100644 --- a/src/libraries/System.Private.Uri/src/System/UriExt.cs +++ b/src/libraries/System.Private.Uri/src/System/UriExt.cs @@ -423,147 +423,122 @@ public static bool IsWellFormedUriString([NotNullWhen(true), StringSyntax(String return result.IsWellFormedOriginalString(); } - // - // Internal stuff - // - // Returns false if OriginalString value // (1) is not correctly escaped as per URI spec excluding intl UNC name case // (2) or is an absolute Uri that represents implicit file Uri "c:\dir\file" // (3) or is an absolute Uri that misses a slash before path "file://c:/dir/file" // (4) or contains unescaped backslashes even if they will be treated // as forward slashes like http:\\host/path\file or file:\\\c:\path - // - internal unsafe bool InternalIsWellFormedOriginalString() + internal bool InternalIsWellFormedOriginalString() { if (UserDrivenParsing) throw new InvalidOperationException(SR.Format(SR.net_uri_UserDrivenParsing, this.GetType())); - fixed (char* str = _string) + string str = _string; + + // For a relative Uri we only care about escaping and backslashes + if (!IsAbsoluteUri) { - int idx = 0; - // - // For a relative Uri we only care about escaping and backslashes - // - if (!IsAbsoluteUri) + // my:scheme/path?query is not well formed because the colon is ambiguous + if (CheckForColonInFirstPathSegment(str)) { - // my:scheme/path?query is not well formed because the colon is ambiguous - if (CheckForColonInFirstPathSegment(_string)) - { - return false; - } - return (CheckCanonical(str, ref idx, _string.Length, c_EOL) - & (Check.BackslashInPath | Check.EscapedCanonical)) == Check.EscapedCanonical; + return false; } - // - // (2) or is an absolute Uri that represents implicit file Uri "c:\dir\file" - // - if (IsImplicitFile) - return false; + return (CheckCanonical(str, c_EOL, out _) & (Check.BackslashInPath | Check.EscapedCanonical)) == Check.EscapedCanonical; + } + + // (2) or is an absolute Uri that represents implicit file Uri "c:\dir\file" + if (IsImplicitFile) + return false; - //This will get all the offsets, a Host name will be checked separately below - EnsureParseRemaining(); + // This will get all the offsets, a Host name will be checked separately below + EnsureParseRemaining(); - Flags nonCanonical = (_flags & (Flags.E_CannotDisplayCanonical | Flags.IriCanonical)); + Flags nonCanonical = (_flags & (Flags.E_CannotDisplayCanonical | Flags.IriCanonical)); - // Cleanup canonical IRI from nonCanonical - if ((nonCanonical & (Flags.UserIriCanonical | Flags.PathIriCanonical | Flags.QueryIriCanonical | Flags.FragmentIriCanonical)) != 0) + // Cleanup canonical IRI from nonCanonical + if ((nonCanonical & (Flags.UserIriCanonical | Flags.PathIriCanonical | Flags.QueryIriCanonical | Flags.FragmentIriCanonical)) != 0) + { + if ((nonCanonical & (Flags.E_UserNotCanonical | Flags.UserIriCanonical)) == (Flags.E_UserNotCanonical | Flags.UserIriCanonical)) { - if ((nonCanonical & (Flags.E_UserNotCanonical | Flags.UserIriCanonical)) == (Flags.E_UserNotCanonical | Flags.UserIriCanonical)) - { - nonCanonical &= ~(Flags.E_UserNotCanonical | Flags.UserIriCanonical); - } + nonCanonical &= ~(Flags.E_UserNotCanonical | Flags.UserIriCanonical); + } - if ((nonCanonical & (Flags.E_PathNotCanonical | Flags.PathIriCanonical)) == (Flags.E_PathNotCanonical | Flags.PathIriCanonical)) - { - nonCanonical &= ~(Flags.E_PathNotCanonical | Flags.PathIriCanonical); - } + if ((nonCanonical & (Flags.E_PathNotCanonical | Flags.PathIriCanonical)) == (Flags.E_PathNotCanonical | Flags.PathIriCanonical)) + { + nonCanonical &= ~(Flags.E_PathNotCanonical | Flags.PathIriCanonical); + } - if ((nonCanonical & (Flags.E_QueryNotCanonical | Flags.QueryIriCanonical)) == (Flags.E_QueryNotCanonical | Flags.QueryIriCanonical)) - { - nonCanonical &= ~(Flags.E_QueryNotCanonical | Flags.QueryIriCanonical); - } + if ((nonCanonical & (Flags.E_QueryNotCanonical | Flags.QueryIriCanonical)) == (Flags.E_QueryNotCanonical | Flags.QueryIriCanonical)) + { + nonCanonical &= ~(Flags.E_QueryNotCanonical | Flags.QueryIriCanonical); + } - if ((nonCanonical & (Flags.E_FragmentNotCanonical | Flags.FragmentIriCanonical)) == (Flags.E_FragmentNotCanonical | Flags.FragmentIriCanonical)) - { - nonCanonical &= ~(Flags.E_FragmentNotCanonical | Flags.FragmentIriCanonical); - } + if ((nonCanonical & (Flags.E_FragmentNotCanonical | Flags.FragmentIriCanonical)) == (Flags.E_FragmentNotCanonical | Flags.FragmentIriCanonical)) + { + nonCanonical &= ~(Flags.E_FragmentNotCanonical | Flags.FragmentIriCanonical); } + } + + // User, Path, Query or Fragment may have some non escaped characters + if (((nonCanonical & Flags.E_CannotDisplayCanonical & (Flags.E_UserNotCanonical | Flags.E_PathNotCanonical | + Flags.E_QueryNotCanonical | Flags.E_FragmentNotCanonical)) != Flags.Zero)) + { + return false; + } - // User, Path, Query or Fragment may have some non escaped characters - if (((nonCanonical & Flags.E_CannotDisplayCanonical & (Flags.E_UserNotCanonical | Flags.E_PathNotCanonical | - Flags.E_QueryNotCanonical | Flags.E_FragmentNotCanonical)) != Flags.Zero)) + // checking on scheme:\\ or file://// + if (InFact(Flags.AuthorityFound)) + { + if (InFact(Flags.SchemeNotCanonical_NoTrailingSlashes)) { return false; } - // checking on scheme:\\ or file://// - if (InFact(Flags.AuthorityFound)) + if (InFact(Flags.UncPath | Flags.DosPath)) { - idx = _info.Offset.Scheme + _syntax.SchemeName.Length + 2; - if (idx >= _info.Offset.User || _string[idx - 1] == '\\' || _string[idx] == '\\') - return false; + int idx = _info.Offset.Scheme + _syntax.SchemeName.Length + 3; - if (InFact(Flags.UncPath | Flags.DosPath)) + if (idx < _info.Offset.User && str[idx] is '/' or '\\') { - while (++idx < _info.Offset.User && (_string[idx] == '/' || _string[idx] == '\\')) - return false; + return false; } } + } + // (3) or is an absolute Uri that misses a slash before path "file://c:/dir/file" + // Note that for this check to be more general we assert that if Path is non empty and if it requires a first slash + // (which looks absent) then the method has to fail. + // Today it's only possible for a Dos like path, i.e. file://c:/bla would fail below check. + if (InFact(Flags.FirstSlashAbsent) && _info.Offset.Query > _info.Offset.Path) + return false; - // (3) or is an absolute Uri that misses a slash before path "file://c:/dir/file" - // Note that for this check to be more general we assert that if Path is non empty and if it requires a first slash - // (which looks absent) then the method has to fail. - // Today it's only possible for a Dos like path, i.e. file://c:/bla would fail below check. - if (InFact(Flags.FirstSlashAbsent) && _info.Offset.Query > _info.Offset.Path) - return false; - - // (4) or contains unescaped backslashes even if they will be treated - // as forward slashes like http:\\host/path\file or file:\\\c:\path - // Note we do not check for Flags.ShouldBeCompressed i.e. allow // /./ and alike as valid - if (InFact(Flags.BackslashInPath)) - return false; + // (4) or contains unescaped backslashes even if they will be treated + // as forward slashes like http:\\host/path\file or file:\\\c:\path + // Note we do not check for Flags.ShouldBeCompressed i.e. allow // /./ and alike as valid + if (InFact(Flags.BackslashInPath)) + return false; - // Capturing a rare case like file:///c|/dir - if (IsDosPath && _string[_info.Offset.Path + SecuredPathIndex - 1] == '|') - return false; + // Capturing a rare case like file:///c|/dir + if (IsDosPath && str[_info.Offset.Path + SecuredPathIndex - 1] == '|') + return false; - // - // May need some real CPU processing to answer the request - // - // - // Check escaping for authority - // - // IPv6 hosts cannot be properly validated by CheckCanonical - if ((_flags & Flags.CanonicalDnsHost) == 0 && HostType != Flags.IPv6HostType) - { - idx = _info.Offset.User; - Check result = CheckCanonical(str, ref idx, _info.Offset.Path, '/'); - if (((result & (Check.ReservedFound | Check.BackslashInPath | Check.EscapedCanonical)) - != Check.EscapedCanonical) - && (!IriParsing || (result & (Check.DisplayCanonical | Check.FoundNonAscii | Check.NotIriCanonical)) - != (Check.DisplayCanonical | Check.FoundNonAscii))) - { - return false; - } - } + // Check escaping for authority + // IPv6 hosts cannot be properly validated by CheckCanonical + if ((_flags & Flags.CanonicalDnsHost) == 0 && HostType != Flags.IPv6HostType) + { + int idx = _info.Offset.User; + Check result = CheckCanonical(str.AsSpan(idx, _info.Offset.Path - idx), '/', out _); - // Want to ensure there are slashes after the scheme - if ((_flags & (Flags.SchemeNotCanonical | Flags.AuthorityFound)) - == (Flags.SchemeNotCanonical | Flags.AuthorityFound)) + if ((result & (Check.ReservedFound | Check.BackslashInPath | Check.EscapedCanonical)) != Check.EscapedCanonical + && (!IriParsing || (result & (Check.DisplayCanonical | Check.FoundNonAscii | Check.NotIriCanonical)) != (Check.DisplayCanonical | Check.FoundNonAscii))) { - idx = _syntax.SchemeName.Length; - while (str[idx++] != ':'); - if (idx + 1 >= _string.Length || str[idx] != '/' || str[idx + 1] != '/') - return false; + return false; } } - // - // May be scheme, host, port or path need some canonicalization but still the uri string is found to be a - // "well formed" one - // + + // The scheme, host, port or path may need some canonicalization, but the uri string is found to be a "well formed" one. return true; } diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriIsWellFormedUriStringTest.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriIsWellFormedUriStringTest.cs index 8b4e1043fb6c84..67aecaf0dc6081 100644 --- a/src/libraries/System.Private.Uri/tests/FunctionalTests/UriIsWellFormedUriStringTest.cs +++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/UriIsWellFormedUriStringTest.cs @@ -287,6 +287,13 @@ public void UriIsWellFormed_IPv6HostIriOn_True() new object[] { "file://c:/directory/filename", false }, new object[] { "\\\\?\\UNC\\Server01\\user\\docs\\Letter.txt", false }, + // Test Scheme + new object[] { @"http:/host/path", false }, + new object[] { @"http:\host/path", false }, + new object[] { @"http:/\host/path", false }, + new object[] { @"http:\/host/path", false }, + new object[] { @"http:\\host/path", false }, + // Test Host new object[] { "http://www.contoso.com", true }, new object[] { "http://\u00E4.contos.com", true }, @@ -306,7 +313,6 @@ public void UriIsWellFormed_IPv6HostIriOn_True() // Test Path new object[] { "http://www.contoso.com/path???/file name", false }, new object[] { "http://www.contoso.com/\u00E4???/file name", false }, - new object[] { "http:\\host/path/file", false }, new object[] { "http://www.contoso.com/a/sek http://test.com", false }, new object[] { "http://www.contoso.com/\u00E4/sek http://test.com", false }, @@ -437,14 +443,15 @@ public void UriIsWellFormed_IPv6HostIriOn_True() new object[] { "http://www.contoso.com/path?a# a ", false }, new object[] { "http://www.contoso.com/path?\u00E4# \u00E4 ", false }, - new object[] { "http://www.contoso.com/path?a#a?a", true }, new object[] { "http://www.contoso.com/\u00E4?\u00E4#u00E4?\u00E4", true }, - // Sample in "private unsafe Check CheckCanonical(char* str, ref ushort idx, ushort end, char delim)" code comments new object[] { "http://www.contoso.com/\u00E4/ path2/ param=val", false }, new object[] { "http://www.contoso.com/\u00E4? param=val", false }, new object[] { "http://www.contoso.com/\u00E4?param=val# fragment", false }, + + // Surrogate pairs + new object[] { "http://www.contoso.com/path/\uD83C\uDF49?query\uD83C\uDF49=\uD83C\uDF49#\uD83C\uDF49", true }, }; [Theory]