-
Notifications
You must be signed in to change notification settings - Fork 128
/
Copy pathstring.mli
427 lines (329 loc) · 18.3 KB
/
string.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
(** An extension of the standard StringLabels. If you [open Base], you'll get these in
the String module. *)
open! Import
type t = string [@@deriving_inline hash, sexp]
include
sig
[@@@ocaml.warning "-32"]
val t_of_sexp : Sexplib.Sexp.t -> t
val sexp_of_t : t -> Sexplib.Sexp.t
val hash_fold_t :
Ppx_hash_lib.Std.Hash.state -> t -> Ppx_hash_lib.Std.Hash.state
val hash : t -> Ppx_hash_lib.Std.Hash.hash_value
end
[@@@end]
(** [Caseless] compares and hashes strings ignoring case, so that for example
[Caseless.equal "OCaml" "ocaml"] and [Caseless.("apple" < "Banana")] are [true], and
[Caseless.Map], [Caseless.Table] lookup and [Caseless.Set] membership is
case-insensitive. [Caseless] also provides case insensitive is_suffix and is_prefix
functions, so that for example [Caseless.is_suffix "OCaml" ~suffix:"AmL"] and
[Caseless.is_prefix "OCaml" ~prefix:"oc"] are [true]. *)
module Caseless : sig
type nonrec t = t [@@deriving_inline hash, sexp]
include
sig
[@@@ocaml.warning "-32"]
val t_of_sexp : Sexplib.Sexp.t -> t
val sexp_of_t : t -> Sexplib.Sexp.t
val hash_fold_t :
Ppx_hash_lib.Std.Hash.state -> t -> Ppx_hash_lib.Std.Hash.state
val hash : t -> Ppx_hash_lib.Std.Hash.hash_value
end
[@@@end]
include Comparable.S with type t := t
val is_suffix : t -> suffix:t -> bool
val is_prefix : t -> prefix:t -> bool
end
include Blit.S with type t := t
include Container.S0 with type t := t with type elt = char
include Identifiable.S with type t := t
(** Maximum length of a string. *)
val max_length : int
external length : t -> int = "%string_length"
external get : t -> int -> char = "%string_safe_get"
val create : int -> t
val make : int -> char -> t
val copy : t -> t
val init : int -> f:(int -> char) -> t
val fill : t -> pos:int -> len:int -> char -> unit
(** String append. Also available unqualified, but re-exported here for documentation
purposes.
Note that [a ^ b] must copy both [a] and [b] into a newly-allocated result string, so
[a ^ b ^ c ^ ... ^ z] is quadratic in the number of strings. [String.concat] does not
have this problem -- it allocates the result buffer only once. The [Rope] module
provides a data structure which uses a similar trick to achieve fast concatenation at
either end of a string. *)
val ( ^ ) : t -> t -> t
(** concatanate all strings in the list using separator [sep] (default sep "") *)
val concat : ?sep:t -> t list -> t
(** Warning: Only returns a copy if changes are necessary! Special characters are
represented by escape sequences, following the lexical conventions of Objective
Caml. *)
val escaped : t -> t
val contains : ?pos:int -> ?len:int -> t -> char -> bool
val uppercase : t -> t
val lowercase : t -> t
val capitalize : t -> t
val uncapitalize : t -> t
val index : t -> char -> int option
val index_exn : t -> char -> int
val rindex : t -> char -> int option
val rindex_exn : t -> char -> int
val index_from : t -> int -> char -> int option
val index_from_exn : t -> int -> char -> int
val rindex_from : t -> int -> char -> int option
val rindex_from_exn : t -> int -> char -> int
(** Substring search and replace functions. They use the Knuth-Morris-Pratt algorithm
(KMP) under the hood.
The functions in the [Search_pattern] module allow the program to preprocess the
searched pattern once and then use it many times without further allocations. *)
module Search_pattern : sig
type t [@@deriving_inline sexp_of]
include sig [@@@ocaml.warning "-32"] val sexp_of_t : t -> Sexplib.Sexp.t end
[@@@end]
(** [create pattern] preprocesses [pattern] as per KMP, building an [int array] of
length [length pattern]. All inputs are valid. *)
val create : string -> t
(** [pos < 0] or [pos >= length string] result in no match (hence [index] returns
[None] and [index_exn] raises). *)
val index : ?pos:int -> t -> in_:string -> int option
val index_exn : ?pos:int -> t -> in_:string -> int
(** [may_overlap] determines whether after a successful match, [index_all] should start
looking for another one at the very next position ([~may_overlap:true]), or jump to
the end of that match and continue from there ([~may_overlap:false]), e.g.:
- [index_all (create "aaa") ~may_overlap:false ~in_:"aaaaBaaaaaa" = [0; 5; 8]]
- [index_all (create "aaa") ~may_overlap:true ~in_:"aaaaBaaaaaa" = [0; 1; 5; 6; 7; 8]]
E.g. [replace_all] internally calls [index_all ~may_overlap:false]. *)
val index_all : t -> may_overlap:bool -> in_:string -> int list
(** Note that the result of [replace_all pattern ~in_:text ~with_:r] may still
contain [pattern], e.g.
{[
replace_all (create "bc") ~in_:"aabbcc" ~with_:"cb" = "aabcbc"
]} *)
val replace_first : ?pos:int -> t -> in_:string -> with_:string -> string
val replace_all : t -> in_:string -> with_:string -> string
end
(** Substring search and replace convenience functions. They call [Search_pattern.create]
and then forget the preprocessed pattern when the search is complete. [pos < 0] or
[pos >= length t] result in no match (hence [substr_index] returns [None] and
[substr_index_exn] raises). [may_overlap] indicates whether to report overlapping
matches, see [Search_pattern.index_all]. *)
val substr_index : ?pos:int -> t -> pattern:t -> int option
val substr_index_exn : ?pos:int -> t -> pattern:t -> int
val substr_index_all : t -> may_overlap:bool -> pattern:t -> int list
val substr_replace_first : ?pos:int -> t -> pattern:t -> with_:t -> t
(** As with [Search_pattern.replace_all], the result may still contain [pattern]. *)
val substr_replace_all : t -> pattern:t -> with_:t -> t
(** [is_substring ~substring:"bar" "foo bar baz"] is true *)
val is_substring : t -> substring:t -> bool
(** [slice s start stop] gets a slice of [s] between [start] and [stop].
[start] and [stop] will be normalized before the access.
(viz. Array.normalize). *)
val slice : t -> int -> int -> t
(** Returns the reversed list of characters contained in a list. *)
val to_list_rev : t -> char list
(** [rev t] returns [t] in reverse order. *)
val rev : t -> t
(** [nget s i] Gets the char at normalized position [i] in [s]. *)
val nget : t -> int -> char
(** [nset s i c] Sets the char at normalized position [i] to [c]. *)
val nset : t -> int -> char -> unit
(** [is_suffix s ~suffix] returns [true] if [s] ends with [suffix]. *)
val is_suffix : t -> suffix:t -> bool
(** [is_prefix s ~prefix] returns [true] if [s] starts with [prefix]. *)
val is_prefix : t -> prefix:t -> bool
(** If the string [s] contains the character [on], then [lsplit2_exn
s ~on] returns a pair containing [s] split around the first
appearance of [on] (from the left).
@raise Not_found When [on] cannot be found in [s] *)
val lsplit2_exn : t -> on:char -> t * t
(** If the string [s] contains the character [on], then [rsplit2_exn
s ~on] returns a pair containing [s] split around the first
appearance of [on] (from the right).
@raise Not_found When [on] cannot be found in [s] *)
val rsplit2_exn : t -> on:char -> t * t
(** [lsplit2 line ~on] optionally returns [line] split into two strings around the * first appearance of [on] from the left *)
val lsplit2 : t -> on:char -> (t * t) option
(** [rsplit2 line ~on] optionally returns [line] split into two strings around the * first appearance of [on] from the right *)
val rsplit2 : t -> on:char -> (t * t) option
(** [split s ~on] @return a list of substrings of [s] that are separated by
[on]. Consecutive [on] characters will cause multiple empty strings
in the result. Splitting the empty string returns a list of the empty
string, not the empty list. *)
val split : t -> on:char -> t list
(** [split_on_chars s ~on] @return a list of all substrings of [s]
that are separated by one of the chars from [on]. [on]
are not grouped. So a grouping of [on] in the source string will
produce multiple empty string splits in the result. *)
val split_on_chars : t -> on:char list -> t list
(** [split_lines t] returns the list of lines that comprise [t]. The lines do
not include the trailing ["\n"] or ["\r\n"]. *)
val split_lines : t -> t list
(** [lfindi ?pos t ~f] returns the smallest [i >= pos] such that [f i t.[i]], if there is
such an [i]. By default, [pos = 0]. *)
val lfindi : ?pos : int -> t -> f:(int -> char -> bool) -> int option
(** [rfindi ?pos t ~f] returns the largest [i <= pos] such that [f i t.[i]], if there is
such an [i]. By default [pos = length t - 1]. *)
val rfindi : ?pos : int -> t -> f:(int -> char -> bool) -> int option
(** Warning: the following strip functions may return the same string passed in *)
(** [lstrip ?drop s] returns a string with consecutive chars satisfying [drop] (by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
beginning of [s]. *)
val lstrip : ?drop:(char -> bool) -> t -> t
(** [rstrip ?drop s] returns a string with consecutive chars satisfying [drop] (by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the end
of [s]. *)
val rstrip : ?drop:(char -> bool) -> t -> t
(** [strip ?drop s] returns a string with consecutive chars satisfying [drop] (by default
white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the
beginning and end of [s]. *)
val strip : ?drop:(char -> bool) -> t -> t
(** [map f s] applies [f] to each character in [s], and returns the
resulting string. *)
val map : t -> f : (char -> char) -> t
(** [mapi f s] applies [f] to each character in [s] and its index, and returns the
resulting string. *)
val mapi : t -> f : (int -> char -> char) -> t
(** [foldi] works similarly to [fold], but also pass in index of each character to [f] *)
val foldi : t -> init : 'a -> f : (int -> 'a -> char -> 'a) -> 'a
(** Like [map], but allows replacement of a single character with zero or two or more
characters. *)
val concat_map : ?sep:t -> t -> f : (char -> t) -> t
(** [filter s ~f:predicate] discards characters not satisfying [predicate] *)
val filter : t -> f : (char -> bool) -> t
(** [tr target replacement s] replaces every instance of [target] in [s] with
[replacement]. *)
val tr : target : char -> replacement : char -> t -> t
(** [tr_inplace target replacement s] destructively modifies s (in place!)
replacing every instance of [target] in [s] with [replacement]. *)
val tr_inplace : target : char -> replacement : char -> t -> unit
(** [chop_suffix_exn s ~suffix] returns a copy of [s] without the trailing [suffix]
@raise Invalid_argument if [suffix] is not a suffix of [s] *)
val chop_suffix_exn : t -> suffix:t -> t
(** [chop_prefix_exn s ~prefix] returns a copy of [s] without the leading [prefix]
@raise Invalid_argument if [prefix] is not a prefix of [s] *)
val chop_prefix_exn : t -> prefix:t -> t
val chop_suffix : t -> suffix:t -> t option
val chop_prefix : t -> prefix:t -> t option
(** [suffix s n] returns the longest suffix of [s] of length less than or equal to [n] *)
val suffix : t -> int -> t
(** [prefix s n] returns the longest prefix of [s] of length less than or equal to [n] *)
val prefix : t -> int -> t
(** [drop_suffix s n] drops the longest suffix of [s] of length less than or equal to [n] *)
val drop_suffix : t -> int -> t
(** [drop_prefix s n] drops the longest prefix of [s] of length less than or equal to [n] *)
val drop_prefix : t -> int -> t
(** [concat_array sep ar] like {!String.concat}, but operates on arrays *)
val concat_array : ?sep : t -> t array -> t
(** slightly faster hash function on strings *)
external hash : t -> int = "Base_hash_string" [@@noalloc]
(** fast equality function on strings, doesn't use compare_val *)
val equal : t -> t -> bool
(** [is_empty s] returns [true] iff [s] is empty (i.e. its length is 0). *)
val is_empty : t -> bool
val of_char : char -> t
val of_char_list : char list -> t
(** Operations for escaping and unescaping strings, with parameterized escape and
escapeworthy characters. Escaping/unescaping using this module is more efficient than
using Pcre. Benchmark code can be found in core/benchmarks/string_escaping.ml. *)
module Escaping : sig
(** [escape_gen_exn escapeworthy_map escape_char] returns a function that will escape a
string [s] as follows: if [(c1,c2)] is in [escapeworthy_map], then all occurences of
[c1] are replaced by [escape_char] concatenated to [c2].
Raises an exception if [escapeworthy_map] is not one-to-one. If [escape_char] is
not in [escapeworthy_map], then it will be escaped to itself.*)
val escape_gen_exn
: escapeworthy_map:(char * char) list
-> escape_char:char
-> (string -> string) Staged.t
val escape_gen
: escapeworthy_map:(char * char) list
-> escape_char:char
-> (string -> string) Or_error.t
(** [escape ~escapeworthy ~escape_char s] is
{[
escape_gen_exn ~escapeworthy_map:(List.zip_exn escapeworthy escapeworthy)
~escape_char
]}.
Duplicates and [escape_char] will be removed from [escapeworthy]. So, no
exception will be raised *)
val escape : escapeworthy:char list -> escape_char:char -> (string -> string) Staged.t
(** [unescape_gen_exn] is the inverse operation of [escape_gen_exn]. That is,
{[
let escape = Staged.unstage (escape_gen_exn ~escapeworthy_map ~escape_char) in
let unescape = Staged.unstage (unescape_gen_exn ~escapeworthy_map ~escape_char) in
assert (s = unescape (escape s))
]}
always succeed when ~escapeworthy_map is not causing exceptions. *)
val unescape_gen_exn
: escapeworthy_map:(char * char) list
-> escape_char:char
-> (string -> string) Staged.t
val unescape_gen
: escapeworthy_map:(char * char) list
-> escape_char:char
-> (string -> string) Or_error.t
(** [unescape ~escape_char] is defined as [unescape_gen_exn ~map:\[\] ~escape_char] *)
val unescape : escape_char:char -> (string -> string) Staged.t
(** Any char in an escaped string is either escaping, escaped or literal. For example,
for escaped string "0_a0__0" with escape_char as '_', pos 1 and 4 are escaping, 2
and 5 are escaped, and the rest are literal
[is_char_escaping s ~escape_char pos] return true if the char at [pos] is escaping,
false otherwise. *)
val is_char_escaping : string -> escape_char:char -> int -> bool
(** [is_char_escaped s ~escape_char pos] return true if the char at [pos] is escaped,
false otherwise. *)
val is_char_escaped : string -> escape_char:char -> int -> bool
(** [is_char_literal s ~escape_char pos] return true if the char at [pos] is not escaped
or escaping. *)
val is_char_literal : string -> escape_char:char -> int -> bool
(** [index s ~escape_char char] find the first literal (not escaped) instance of
char in s starting from 0. *)
val index : string -> escape_char:char -> char -> int option
val index_exn : string -> escape_char:char -> char -> int
(** [rindex s ~escape_char char] find the first literal (not escaped) instance of
char in s starting from the end of s and proceeding towards 0. *)
val rindex : string -> escape_char:char -> char -> int option
val rindex_exn : string -> escape_char:char -> char -> int
(** [index_from s ~escape_char pos char] find the first literal (not escaped)
instance of char in s starting from pos and proceeding towards the end of s. *)
val index_from : string -> escape_char:char -> int -> char -> int option
val index_from_exn : string -> escape_char:char -> int -> char -> int
(** [rindex_from s ~escape_char pos char] find the first literal (not escaped)
instance of char in s starting from pos and towards 0. *)
val rindex_from : string -> escape_char:char -> int -> char -> int option
val rindex_from_exn : string -> escape_char:char -> int -> char -> int
(** [split s ~escape_char ~on] @return a list of substrings of [s] that are separated by
literal versions of [on]. Consecutive [on] characters will cause multiple empty
strings in the result. Splitting the empty string returns a list of the empty
string, not the empty list.
e.g. split ~escape_char:'_' ~on:',' "foo,bar_,baz" = ["foo"; "bar_,baz"] *)
val split : string -> on:char -> escape_char:char -> string list
(** [split_on_chars s ~on] @return a list of all substrings of [s] that are separated by
one of the literal chars from [on]. [on] are not grouped. So a grouping of [on] in
the source string will produce multiple empty string splits in the result.
e.g. split_on_chars ~escape_char:'_' ~on:[',';'|'] "foo_|bar,baz|0" ->
["foo_|bar"; "baz"; "0"] *)
val split_on_chars : string -> on:char list -> escape_char:char -> string list
(** [lsplit2 s on escape_char] splits s into a pair on the first literal instance of
[on] (meaning the first unescaped instance) starting from the left. *)
val lsplit2 : string -> on:char -> escape_char:char -> (string * string) option
val lsplit2_exn : string -> on:char -> escape_char:char -> (string * string)
(** [rsplit2 s on escape_char] splits [s] into a pair on the first literal instance of
[on] (meaning the first unescaped instance) starting from the right. *)
val rsplit2 : string -> on:char -> escape_char:char -> (string * string) option
val rsplit2_exn : string -> on:char -> escape_char:char -> (string * string)
(** These are the same as [lstrip], [rstrip], and [strip] for generic strings, except
that they only drop literal characters - they do not drop characters that are
escaping or escaped. This makes sense if you're trying to get rid of junk
whitespace (for example), because escaped whitespace seems more likely to be
deliberate and not junk. *)
val lstrip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t
val rstrip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t
val strip_literal : ?drop:(char -> bool) -> t -> escape_char:char -> t
end
external unsafe_get : string -> int -> char = "%string_unsafe_get"
(** [String_set_primitives] is generated by a rule in the jbuild to work around the fact
that the %string_*_set primitives are renamed to %bytes_*_set in OCaml 4.04, and then
deleted in OCaml 4.05. *)
include module type of struct include String_set_primitives end