-
Notifications
You must be signed in to change notification settings - Fork 326
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cookbook regexp2\n\nRecreate PR #2125 from https://github.com/ocaml/o…
- Loading branch information
Cuihtlauac ALVARADO
committed
May 13, 2024
1 parent
2f496c7
commit e053572
Showing
3 changed files
with
70 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
--- | ||
packages: [] | ||
discussion: | | ||
- **Understanding `Str`: `str` is a library which is brought with OCaml. It contains many functions which deal with regular expression. The documentation of the `Str` module is in the [API reference](https://v2.ocaml.org/api/Str.html). | ||
- **Alternative Libraries:** The `re` packages provides regular expression functions and supports multiple regexp syntaxes (Perl, Posix, Emacs and Glob). Its functions are also purely functionnal (on the opposite, the `Str.matched_group` and `Str.matched_string` use a global state that prevents the concurrent use of two regexp matching sequences). Other packages provide regexp functions: `mikmatch`, `ocamlregexkit`, `ppx_regexp`, `pcre`/`pcre2` (which are compatible with Perl regexp), `re2`, `re_parser`. `tyre` (which comes with a PPX preprocessor `ppx_tyre`), `human-re`. The `tyre` combo defines a `function%tyre` keywork which works as a native OCaml pattern matching, but with on reggular expression. `ppx_regexp` works in the same way but completes `re`. | ||
--- | ||
|
||
(* Compiling a regular expression. Nota, the `{regexp|...|regexp}` is a normal string. This syntax avoids the quoting of `\\`. Indicating `regexp` is optional, but indicates to the reader of the code that the string contains a regular expression. *) | ||
let regexp = Str.regexp {regexp|\([0-9]+\)-\([0-9]+\)-\([0-9]+\)|regexp} | ||
|
||
(* Testing if a string matches the regexp. The index (0) indicates the characters from which the matching is performed. `string_match` only match regular expression with the string at the given index, while `search_forward` will try to match it at the given index and at the following indexes: *) | ||
let () = | ||
if Str.string_match regexp "1971-01-23" 0 then | ||
print_string "The string match\n" | ||
else | ||
print_string "The string doesn't match\n" | ||
let () = | ||
let str = "Date: 1971-01-23" in | ||
let index = Str.search_forward regexp str 0 in | ||
Printf.printf "Date found at index %d (%s)\n" index | ||
(Str.matched_string str) | ||
|
||
(* Getting group substring. Each `\\(` / `\\)` pairs permits you to get the substring corresponding to the enclosed regexp. By convention, the group 0 is the whole substring matching the regexp, and the first explicit group is 1: *) | ||
let () = | ||
let str = "Date: 1971-01-23" in | ||
let _index = Str.search_forward regexp str 0 in | ||
let year = Str.matched_group 1 str | ||
and month = Str.matched_group 2 str | ||
and day = Str.matched_group 3 str in | ||
Printf.printf "year=%s, month=%s, day=%s\n" year month day | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
--- | ||
packages: | ||
- name: "re" | ||
tested_version: "1.10.4" | ||
used_libraries: | ||
- re | ||
- name: "ppx_regexp" | ||
tested_version: "0.5.1" | ||
used_libraries: | ||
- ppx_regexp | ||
discussion: | | ||
- **Understanding `re`:** The `re` library proposes multiple advantages over the `Str` library, which is shipped with OCaml. It supports multiple syntaxes, and its absence of global states permits concurrent pattern matching. It is completed by the `ppx_regexp`, which makes using this library easier. However, only the PCRE syntax is supported. | ||
- **Reference:** `ppx_regexp` is described on [its page](https://github.com/paurkedal/ppx_regexp). It can be completed by the [PCRE syntax](https://www.pcre.org/original/doc/html/pcresyntax.html) or any [PCRE cheat sheet](https://www.debuggex.com/cheatsheet/regex/pcre). | ||
--- | ||
|
||
(* In order to match a string with a regular expression, we use the `match%pcre` keyword in a way similar to the OCaml `match`: *) | ||
|
||
let () = | ||
match%pcre "Date: 1972-01-23 " with | ||
| {re|?<date>(?<year>\d{4})-(?<month>\d\d)-(?<day>\d\d)|re} -> | ||
Printf.printf "Date found: (%s)\n" date; | ||
Printf.printf "Year: (%s)\n" year; | ||
Printf.printf "Month: (%s)\n" month; | ||
Printf.printf "Day: (%s)\n" day; | ||
| _ -> print_string "Date not found\n" | ||
|
||
(* In a similar way, we have a `function%pcre` with perform similar tasks *) | ||
|
||
let all_digits = | ||
function%pcre | ||
| {re|^\d*$|re} -> true | ||
| _ -> false | ||
|
||
let () = | ||
assert (all_digits "1234") | ||
let () = | ||
assert (not @@ all_digits "12x34") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters