Skip to content

Commit

Permalink
Add :utf8 option to string/1 and add chardata/0 (#187)
Browse files Browse the repository at this point in the history
  • Loading branch information
sabiwara authored Jul 9, 2023
1 parent 1b7332a commit a055fc6
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 8 deletions.
62 changes: 54 additions & 8 deletions lib/stream_data.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1818,6 +1818,10 @@ defmodule StreamData do
end

@ascii_chars ?\s..?~

# "UTF-8 prohibits encoding character numbers between U+D800 and U+DFFF"
@utf8_chars [0..0xD7FF, 0xE000..0x10FFFF]

@alphanumeric_chars [?a..?z, ?A..?Z, ?0..?9]
@printable_chars [
?\n,
Expand Down Expand Up @@ -1850,6 +1854,9 @@ defmodule StreamData do
* `:printable` - printable strings (`String.printable?/1` returns `true`)
are generated. Such strings shrink towards lower codepoints.
* `:utf8` - valid strings (`String.valid?/1` returns `true`)
are generated. Such strings shrink towards lower codepoints.
* a range - strings with characters from the range are generated. Such
strings shrink towards characters that appear earlier in the range.
Expand All @@ -1874,7 +1881,14 @@ defmodule StreamData do
Shrinks towards smaller strings and as described in the description of the
possible values of `kind_or_codepoints` above.
"""
@spec string(:ascii | :alphanumeric | :printable | Range.t() | [Range.t() | pos_integer()]) ::
@spec string(
:ascii
| :alphanumeric
| :printable
| :utf8
| Range.t()
| [Range.t() | pos_integer()]
) ::
t(String.t())
def string(kind_or_codepoints, options \\ [])

Expand All @@ -1890,6 +1904,10 @@ defmodule StreamData do
string(@printable_chars, options)
end

def string(:utf8, options) do
string(@utf8_chars, options)
end

def string(%Range{} = codepoints_range, options) do
string_from_codepoint_data(integer(codepoints_range), options)
end
Expand Down Expand Up @@ -2011,13 +2029,7 @@ defmodule StreamData do
"""
@spec iolist() :: t(iolist())
def iolist() do
# We try to use binaries that scale slower otherwise we end up with iodata with
# big binaries at many levels deep.
scaled_binary = scale_with_exponent(binary(), 0.6)

improper_ending = one_of([scaled_binary, constant([])])
tree = tree(one_of([byte(), scaled_binary]), &maybe_improper_list_of(&1, improper_ending))
map(tree, &List.wrap/1)
iolist_or_chardata_tree(byte(), binary())
end

@doc """
Expand All @@ -2042,6 +2054,40 @@ defmodule StreamData do
])
end

@doc """
Generates chardata.
Chardata are values of the `t:IO.chardata/0` type.
## Examples
Enum.take(StreamData.chardata(), 3)
#=> ["", [""], [12174]]
## Shrinking
Shrinks towards less nested chardata and ultimately towards smaller binaries.
"""
@spec chardata() :: t(IO.chardata())
def chardata() do
codepoint = @utf8_chars |> Enum.map(&{Enum.count(&1), integer(&1)}) |> frequency()

frequency([
{3, string(:utf8)},
{2, iolist_or_chardata_tree(codepoint, string(:utf8))}
])
end

defp iolist_or_chardata_tree(int_type, binary_type) do
# We try to use binaries that scale slower otherwise we end up with iodata with
# big binaries at many levels deep.
scaled_binary = scale_with_exponent(binary_type, 0.6)

improper_ending = one_of([scaled_binary, constant([])])
tree = tree(one_of([int_type, scaled_binary]), &maybe_improper_list_of(&1, improper_ending))
map(tree, &List.wrap/1)
end

@doc """
Generates any term.
Expand Down
12 changes: 12 additions & 0 deletions test/stream_data_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,12 @@ defmodule StreamDataTest do
end
end

property "with :utf8" do
check all string <- string(:utf8) do
assert String.valid?(string)
end
end

property "with a fixed length" do
check all string <- string(:alphanumeric, length: 3) do
assert String.length(string) == 3
Expand Down Expand Up @@ -683,6 +689,12 @@ defmodule StreamDataTest do
end
end

property "chardata/0" do
check all chardata <- chardata(), max_runs: 50 do
assert IO.chardata_to_string(chardata) |> String.valid?()
end
end

property "term/0" do
check all term <- term(), max_runs: 25 do
assert is_boolean(term) or is_integer(term) or is_float(term) or is_binary(term) or
Expand Down

0 comments on commit a055fc6

Please sign in to comment.