Skip to content

Commit

Permalink
feat(stdlib): Add split function to Regex module (#1469)
Browse files Browse the repository at this point in the history
feat(stdlib): Add `splitAll` function to Regex module
  • Loading branch information
spotandjake authored Nov 28, 2022
1 parent 5222648 commit 0c1eb73
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 1 deletion.
39 changes: 39 additions & 0 deletions compiler/test/stdlib/regex.test.gr
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import {
findAllRange,
replace,
replaceAll,
split,
splitAll,
MatchResult,
} from "regex"

Expand Down Expand Up @@ -765,3 +767,40 @@ assert replaceAll(unwrapResult(make("b(ar)")), "bazbarfoo", "$`") == "bazbazfoo"
assert replaceAll(unwrapResult(make("b(ar)")), "bazbarfoo", "$'") == "bazfoofoo"
// https://github.com/grain-lang/grain/issues/1431
assert replaceAll(unwrapResult(make("^.")), "asdf", "-") == "-sdf"

// Regex.split
assert split(unwrapResult(make(",")), "a,b,c,d") == ["a", "b,c,d"]
assert split(unwrapResult(make("|")), "a,b,c,d") == ["a,b,c,d"]
assert split(unwrapResult(make("\n|\r")), "a\nb\nc\rd\n\re") ==
["a", "b\nc\rd\n\re"]
assert split(unwrapResult(make(".")), "abcd") == ["", "bcd"]
assert split(unwrapResult(make("d$")), "abcd") == ["abc", ""]
assert split(unwrapResult(make("b(ar)")), "foo bar") == ["foo ", "ar", ""]
assert split(unwrapResult(make("b(ar)")), "foo bar bar") ==
["foo ", "ar", " bar"]
assert split(unwrapResult(make("b(ar)b(az)")), "foo barbaz bar") ==
["foo ", "ar", "az", " bar"]
assert split(unwrapResult(make("b((a)r)")), "bar") == ["", "ar", "a", ""]
assert split(unwrapResult(make("b(((((a))))r)")), "bar") ==
["", "ar", "a", "a", "a", "a", ""]
assert split(unwrapResult(make("b(((((a))))r)")), "bar bar") ==
["", "ar", "a", "a", "a", "a", " bar"]
// Regex.splitAll
assert splitAll(unwrapResult(make(",")), "a,b,c,d") == ["a", "b", "c", "d"]
assert splitAll(unwrapResult(make("|")), "a,b,c,d") == ["a,b,c,d"]
assert splitAll(unwrapResult(make("\n|\r")), "a\nb\nc\rd\n\re") ==
["a", "b", "c", "d", "", "e"]
assert splitAll(unwrapResult(make(".")), "abcd") == ["", "", "", "", ""]
assert splitAll(unwrapResult(make("d$")), "abcd") == ["abc", ""]
assert splitAll(unwrapResult(make("b(ar)(ar)")), "foo barar test") ==
["foo ", "ar", "ar", " test"]
assert splitAll(
unwrapResult(make("b(ar)(ar)")),
"foo barar test barar test2"
) ==
["foo ", "ar", "ar", " test ", "ar", "ar", " test2"]
assert splitAll(unwrapResult(make("b((a)r)")), "bar") == ["", "ar", "a", ""]
assert splitAll(unwrapResult(make("b(((((a))))r)")), "bar") ==
["", "ar", "a", "a", "a", "a", ""]
assert splitAll(unwrapResult(make("b(((((a))))r)")), "bar bar") ==
["", "ar", "a", "a", "a", "a", " ", "ar", "a", "a", "a", "a", ""]
72 changes: 71 additions & 1 deletion stdlib/regex.gr
Original file line number Diff line number Diff line change
Expand Up @@ -3973,7 +3973,6 @@ let regexReplaceHelp =
all: Bool,
) => {
let buf = makeMatchBuffer(toSearch)
let mut out = []
let rec loop = searchPos => {
let state = Array.make(rx.reNumGroups, None)
let inStart = max(0, searchPos - rx.reMaxLookbehind)
Expand Down Expand Up @@ -4067,3 +4066,74 @@ export let replaceAll =
) => {
regexReplaceHelp(rx, toSearch, replacement, true)
}

let regexSplitHelp = (rx: RegularExpression, str: String, all: Bool) => {
// Get list of matches
let regexMatches = if (all) {
findAll(rx, str)
} else {
match (find(rx, str)) {
None => [],
Some(m) => [m],
}
}
// Perform replacements
let mut out = []
let mut currentLocation = 0
List.forEach(regexMatch => {
let locations = regexMatch.allGroupPositions()
Array.forEachi((pos, i) => {
match (pos) {
Some((start, end)) => {
if (i == 0) {
// Add the string between this match and the last match
out = [String.slice(currentLocation, start, str), ...out]
} else {
// This adds the groups back in
out = [String.slice(start, end, str), ...out]
}
if (end > currentLocation) currentLocation = end
},
None => void,
}
}, locations)
}, regexMatches)
out = [String.slice(currentLocation, String.length(str), str), ...out]
List.reverse(out)
}

/**
* Splits the given string at the first match for the given regular expression.
*
* If the regex pattern contains capture groups, the content of the groups
* will be included in the output list.
*
* @param rx: The regular expression to match
* @param str: The string to split
* @returns A list of the split segments
*
* @example assert Regex.split(Result.unwrap(Regex.make(",")), "a,b,c") == [ "a", "b,c" ]
*
* @since v0.5.5
*/
export let split = (rx: RegularExpression, str: String) => {
regexSplitHelp(rx, str, false)
}

/**
* Splits the given string at every match for the given regular expression.
*
* If the regex pattern contains capture groups, the content of the groups
* will be included in the output list.
*
* @param rx: The regular expression to match
* @param str: The string to split
* @returns A list of the split segments
*
* @example assert Regex.splitAll(Result.unwrap(Regex.make(",")), "a,b,c") == [ "a", "b", "c" ]
*
* @since v0.5.5
*/
export let splitAll = (rx: RegularExpression, str: String) => {
regexSplitHelp(rx, str, true)
}
70 changes: 70 additions & 0 deletions stdlib/regex.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,73 @@ Examples:
assert Regex.replaceAll(Result.unwrap(Regex.make("o")), "skoot", "r") == "skrrt"
```

### Regex.**split**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
split : (RegularExpression, String) -> List<String>
```

Splits the given string at the first match for the given regular expression.

If the regex pattern contains capture groups, the content of the groups
will be included in the output list.

Parameters:

|param|type|description|
|-----|----|-----------|
|`rx`|`RegularExpression`|The regular expression to match|
|`str`|`String`|The string to split|

Returns:

|type|description|
|----|-----------|
|`List<String>`|A list of the split segments|

Examples:

```grain
assert Regex.split(Result.unwrap(Regex.make(",")), "a,b,c") == [ "a", "b,c" ]
```

### Regex.**splitAll**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
splitAll : (RegularExpression, String) -> List<String>
```

Splits the given string at every match for the given regular expression.

If the regex pattern contains capture groups, the content of the groups
will be included in the output list.

Parameters:

|param|type|description|
|-----|----|-----------|
|`rx`|`RegularExpression`|The regular expression to match|
|`str`|`String`|The string to split|

Returns:

|type|description|
|----|-----------|
|`List<String>`|A list of the split segments|

Examples:

```grain
assert Regex.splitAll(Result.unwrap(Regex.make(",")), "a,b,c") == [ "a", "b", "c" ]
```

0 comments on commit 0c1eb73

Please sign in to comment.