Skip to content

Commit

Permalink
Merge master to feature/string-interp (#9580)
Browse files Browse the repository at this point in the history
* Update dependencies from https://github.com/dotnet/arcade build 20200626.2 (#9577)

Microsoft.DotNet.Arcade.Sdk
 From Version 1.0.0-beta.20302.3 -> To Version 1.0.0-beta.20326.2

Co-authored-by: dotnet-maestro[bot] <dotnet-maestro[bot]@users.noreply.github.com>

* Improve perf for String.filter up to 3x (#9509)

* Improve perf for String.filter 2-2.5x

* Cleanup: remove "foo" etc in tests

* Add tests for new execution path for LOH in String.filter

* Change test string

* String map performance improvement (#9470)

* Simplify and improve perf of String.length

* Improve performance of String.map

* Revert "Simplify and improve perf of String.length"

* Resolves #9470 (comment)

* Lingering space

* Change `String` to use `new` to clarify use of ctor

* Add some better tests for String.map, add side-effect test

* Add tests to ensure the mapping function is called a deterministically amount of times

* Fix typo

* Remove "foo" from String.map tests

* Perf: String.replicate from O(n) to O(log(n)), up to 12x speed improvement (#9512)

* Turn String.replicate from O(n) into O(log(n))

* Cleanup String.replicate tests by removing usages of "foo"

* String.replicate: add tests for missing cases, and for the new O(log(n)) cut-off points

* Improve String.replicate algorithm further

* Add tests for String.replicate covering all lines/branches of algo

* Fix accidental comment

Co-authored-by: dotnet-maestro[bot] <42748379+dotnet-maestro[bot]@users.noreply.github.com>
Co-authored-by: dotnet-maestro[bot] <dotnet-maestro[bot]@users.noreply.github.com>
Co-authored-by: Abel Braaksma <abel.online@xs4all.nl>
  • Loading branch information
4 people authored Jun 27, 2020
1 parent 098ae6c commit e04ac6d
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 28 deletions.
4 changes: 2 additions & 2 deletions eng/Version.Details.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
<ProductDependencies>
</ProductDependencies>
<ToolsetDependencies>
<Dependency Name="Microsoft.DotNet.Arcade.Sdk" Version="1.0.0-beta.20302.3">
<Dependency Name="Microsoft.DotNet.Arcade.Sdk" Version="1.0.0-beta.20326.2">
<Uri>https://github.com/dotnet/arcade</Uri>
<Sha>9b71be0663493cd0e111b55536a2e1eeb272f54c</Sha>
<Sha>ed69753a3ffbdaa08365252c710d57a64d17f859</Sha>
</Dependency>
</ToolsetDependencies>
</Dependencies>
2 changes: 1 addition & 1 deletion eng/common/sdl/packages.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.Guardian.Cli" version="0.7.2"/>
<package id="Microsoft.Guardian.Cli.win10-x64" version="0.20.1"/>
</packages>
2 changes: 1 addition & 1 deletion eng/common/templates/job/execute-sdl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
continueOnError: ${{ parameters.sdlContinueOnError }}
- ${{ if eq(parameters.overrideParameters, '') }}:
- powershell: eng/common/sdl/execute-all-sdl-tools.ps1
-GuardianPackageName Microsoft.Guardian.Cli.0.7.2
-GuardianPackageName Microsoft.Guardian.Cli.win10-x64.0.20.1
-NugetPackageDirectory $(Build.SourcesDirectory)\.packages
-AzureDevOpsAccessToken $(dn-bot-dotnet-build-rw-code-rw)
${{ parameters.additionalParameters }}
Expand Down
2 changes: 1 addition & 1 deletion global.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
}
},
"msbuild-sdks": {
"Microsoft.DotNet.Arcade.Sdk": "1.0.0-beta.20302.3",
"Microsoft.DotNet.Arcade.Sdk": "1.0.0-beta.20326.2",
"Microsoft.DotNet.Helix.Sdk": "2.0.0-beta.19069.2"
}
}
73 changes: 62 additions & 11 deletions src/fsharp/FSharp.Core/string.fs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ namespace Microsoft.FSharp.Core
[<CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix)>]
[<RequireQualifiedAccess>]
module String =
[<Literal>]
/// LOH threshold is calculated from FSharp.Compiler.AbstractIL.Internal.Library.LOH_SIZE_THRESHOLD_BYTES,
/// and is equal to 80_000 / sizeof<char>
let LOH_CHAR_THRESHOLD = 40_000

[<CompiledName("Length")>]
let length (str:string) = if isNull str then 0 else str.Length

Expand All @@ -37,9 +42,13 @@ namespace Microsoft.FSharp.Core
if String.IsNullOrEmpty str then
String.Empty
else
let res = StringBuilder str.Length
str |> iter (fun c -> res.Append(mapping c) |> ignore)
res.ToString()
let result = str.ToCharArray()
let mutable i = 0
for c in result do
result.[i] <- mapping c
i <- i + 1

new String(result)

[<CompiledName("MapIndexed")>]
let mapi (mapping: int -> char -> char) (str:string) =
Expand All @@ -53,13 +62,30 @@ namespace Microsoft.FSharp.Core

[<CompiledName("Filter")>]
let filter (predicate: char -> bool) (str:string) =
if String.IsNullOrEmpty str then
let len = length str

if len = 0 then
String.Empty
else
let res = StringBuilder str.Length

elif len > LOH_CHAR_THRESHOLD then
// By using SB here, which is twice slower than the optimized path, we prevent LOH allocations
// and 'stop the world' collections if the filtering results in smaller strings.
// We also don't pre-allocate SB here, to allow for less mem pressure when filter result is small.
let res = StringBuilder()
str |> iter (fun c -> if predicate c then res.Append c |> ignore)
res.ToString()

else
// Must do it this way, since array.fs is not yet in scope, but this is safe
let target = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked len
let mutable i = 0
for c in str do
if predicate c then
target.[i] <- c
i <- i + 1

String(target, 0, i)

[<CompiledName("Collect")>]
let collect (mapping: char -> string) (str:string) =
if String.IsNullOrEmpty str then
Expand All @@ -81,13 +107,38 @@ namespace Microsoft.FSharp.Core
let replicate (count:int) (str:string) =
if count < 0 then invalidArgInputMustBeNonNegative "count" count

if String.IsNullOrEmpty str then
let len = length str
if len = 0 || count = 0 then
String.Empty

elif len = 1 then
new String(str.[0], count)

elif count <= 4 then
match count with
| 1 -> str
| 2 -> String.Concat(str, str)
| 3 -> String.Concat(str, str, str)
| _ -> String.Concat(str, str, str, str)

else
let res = StringBuilder(count * str.Length)
for i = 0 to count - 1 do
res.Append str |> ignore
res.ToString()
// Using the primitive, because array.fs is not yet in scope. It's safe: both len and count are positive.
let target = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked (len * count)
let source = str.ToCharArray()

// O(log(n)) performance loop:
// Copy first string, then keep copying what we already copied
// (i.e., doubling it) until we reach or pass the halfway point
Array.Copy(source, 0, target, 0, len)
let mutable i = len
while i * 2 < target.Length do
Array.Copy(target, 0, target, i, i)
i <- i * 2

// finally, copy the remain half, or less-then half
Array.Copy(target, 0, target, i, target.Length - i)
new String(target)


[<CompiledName("ForAll")>]
let forall predicate (str:string) =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) Microsoft Corporation. All Rights Reserved. See License.txt in the project root for license information.
// Copyright (c) Microsoft Corporation. All Rights Reserved. See License.txt in the project root for license information.

namespace FSharp.Core.UnitTests.Collections

Expand Down Expand Up @@ -71,11 +71,35 @@ type StringModule() =

[<Test>]
member this.Map() =
let e1 = String.map (fun c -> c) "foo"
Assert.AreEqual("foo", e1)
let e1 = String.map id "xyz"
Assert.AreEqual("xyz", e1)

let e2 = String.map (fun c -> c) null
Assert.AreEqual("", e2)
let e2 = String.map (fun c -> c + char 1) "abcde"
Assert.AreEqual("bcdef", e2)

let e3 = String.map (fun c -> c) null
Assert.AreEqual("", e3)

let e4 = String.map (fun c -> c) String.Empty
Assert.AreEqual("", e4)

let e5 = String.map (fun _ -> 'B') "A"
Assert.AreEqual("B", e5)

let e6 = String.map (fun _ -> failwith "should not raise") null
Assert.AreEqual("", e6)

// this tests makes sure mapping function is not called too many times
let mutable x = 0
let e7 = String.map (fun _ -> if x > 2 then failwith "should not raise" else x <- x + 1; 'x') "abc"
Assert.AreEqual(x, 3)
Assert.AreEqual(e7, "xxx")

// side-effect and "order of operation" test
let mutable x = 0
let e8 = String.map (fun c -> x <- x + 1; c + char x) "abcde"
Assert.AreEqual(x, 5)
Assert.AreEqual(e8, "bdfhj")

[<Test>]
member this.MapI() =
Expand All @@ -87,18 +111,25 @@ type StringModule() =

[<Test>]
member this.Filter() =
let e1 = String.filter (fun c -> true) "foo"
Assert.AreEqual("foo", e1)
let e1 = String.filter (fun c -> true) "Taradiddle"
Assert.AreEqual("Taradiddle", e1)

let e2 = String.filter (fun c -> true) null
Assert.AreEqual("", e2)

let e3 = String.filter (fun c -> c <> 'o') "foo bar"
Assert.AreEqual("f bar", e3)
let e3 = String.filter Char.IsUpper "How Vexingly Quick Daft Zebras Jump!"
Assert.AreEqual("HVQDZJ", e3)

let e4 = String.filter (fun c -> c <> 'o') ""
Assert.AreEqual("", e4)

let e5 = String.filter (fun c -> c > 'B' ) "ABRACADABRA"
Assert.AreEqual("RCDR", e5)

// LOH test with 55k string, which is 110k bytes
let e5 = String.filter (fun c -> c > 'B' ) (String.replicate 5_000 "ABRACADABRA")
Assert.AreEqual(String.replicate 5_000 "RCDR", e5)

[<Test>]
member this.Collect() =
let e1 = String.collect (fun c -> "a"+string c) "foo"
Expand All @@ -125,15 +156,42 @@ type StringModule() =

[<Test>]
member this.Replicate() =
let e1 = String.replicate 0 "foo"
let e1 = String.replicate 0 "Snickersnee"
Assert.AreEqual("", e1)

let e2 = String.replicate 2 "foo"
Assert.AreEqual("foofoo", e2)
let e2 = String.replicate 2 "Collywobbles, "
Assert.AreEqual("Collywobbles, Collywobbles, ", e2)

let e3 = String.replicate 2 null
Assert.AreEqual("", e3)

let e4 = String.replicate 300_000 ""
Assert.AreEqual("", e4)

let e5 = String.replicate 23 "天地玄黃,宇宙洪荒。"
Assert.AreEqual(230 , e5.Length)
Assert.AreEqual("天地玄黃,宇宙洪荒。天地玄黃,宇宙洪荒。", e5.Substring(0, 20))

// This tests the cut-off point for the O(log(n)) algorithm with a prime number
let e6 = String.replicate 84673 "!!!"
Assert.AreEqual(84673 * 3, e6.Length)

// This tests the cut-off point for the O(log(n)) algorithm with a 2^x number
let e7 = String.replicate 1024 "!!!"
Assert.AreEqual(1024 * 3, e7.Length)

let e8 = String.replicate 1 "What a wonderful world"
Assert.AreEqual("What a wonderful world", e8)

let e9 = String.replicate 3 "أضعت طريقي! أضعت طريقي" // means: I'm lost
Assert.AreEqual("أضعت طريقي! أضعت طريقيأضعت طريقي! أضعت طريقيأضعت طريقي! أضعت طريقي", e9)

let e10 = String.replicate 4 "㏖ ㏗ ℵ "
Assert.AreEqual("㏖ ㏗ ℵ ㏖ ㏗ ℵ ㏖ ㏗ ℵ ㏖ ㏗ ℵ ", e10)

let e11 = String.replicate 5 "5"
Assert.AreEqual("55555", e11)

CheckThrowsArgumentException(fun () -> String.replicate -1 "foo" |> ignore)

[<Test>]
Expand Down

0 comments on commit e04ac6d

Please sign in to comment.