-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add parseFloatThousandSep #15421
Add parseFloatThousandSep #15421
Changes from all commits
1ecf6ed
817df69
0b79ec4
53c3e18
b3ada57
ec57030
d63112c
590702a
2163a46
5b2b630
af6821a
d1c768e
237aeef
af078ea
923663d
32d68e9
d2bb63f
6547469
bf88002
2d2f558
a02f7e1
3901c56
4c58bdd
e19a15c
b8310c3
48828f5
a98907b
e1e43fc
ce8c591
842532d
0d2ac4d
c754556
c3f56ed
c151058
a18df8a
64a306f
f5314a1
fce794c
765b6da
7c6c941
d37375e
f8886ba
744355f
a14b96d
4ec5d07
7411586
c8f210d
2ab6f83
38b26f1
6a38d57
eca6572
82a49c3
5e55e92
412c652
2a95434
e181640
b272708
cd9e3b6
41876f2
bbfe1b8
bf6cf49
d11359a
3766512
4bad51a
18fd3c6
4619329
32ae2e5
68a8228
4f3b1ee
6dd41c9
e90786f
87ca6ae
bdfc4f5
7ef7138
2114087
5558fac
cc24825
15d832b
4769a90
12854c1
d747b18
093b826
c61993e
8040f52
92b2ae6
fcc5c35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
## used in comparison to `strutils <strutils.html>`_. | ||
|
||
import strutils | ||
import std/private/since | ||
|
||
proc expandTabs*(s: string, tabSize: int = 8): string {.noSideEffect.} = | ||
## Expand tab characters in `s` replacing them by spaces. | ||
|
@@ -84,3 +85,134 @@ proc rpartition*(s: string, sep: string): (string, string, string) | |
doAssert rpartition("foofoobar", "bar") == ("foofoo", "bar", "") | ||
|
||
return partition(s, sep, right = true) | ||
|
||
|
||
since (1, 5): | ||
type ParseFloatOptions* = enum ## Options for `parseFloatThousandSep`. | ||
pfLeadingDot, ## Allow leading dot, like ".9" and similar. | ||
pfTrailingDot, ## Allow trailing dot, like "9." and similar. | ||
pfSepAnywhere, ## Allow separator anywhere in between, like "9,9", "9,99". | ||
pfDotOptional ## Allow "9", "-0", integer literals, etc. | ||
pfNanInf ## Allow "NaN", "Inf", "-Inf", etc. | ||
|
||
func parseFloatThousandSep*(str: openArray[char]; options: set[ParseFloatOptions] = {}; | ||
sep = ','; decimalDot = '.'): float = | ||
## Convenience func for `parseFloat` which allows for thousand separators, | ||
## this is designed to parse floats as found in the wild formatted for humans. | ||
## | ||
## Fine grained flexibility and strictness is up to the user, | ||
## you can set the `options` using `ParseFloatOptions` enum. | ||
## | ||
## `parseFloatThousandSep` "prepares" `str` and then calls `parseFloat`, | ||
## consequently `parseFloatThousandSep` by design is slower than `parseFloat`. | ||
## | ||
## The following assumptions and requirements must be met: | ||
## - `str` must not be empty string. | ||
## - `str` must be stripped of trailing and leading whitespaces. | ||
## - `sep` and `decimalDot` must be different. | ||
## - `sep` must not be in `{'-', '+', 'e', 'i', 'n', 'f', 'a', '\n'}`. | ||
## - `decimalDot` must not be in `{'-', '+', 'e', 'i', 'n', 'f', 'a', ' ', '\t', '\v', '\c', '\n', '\f'}`. | ||
## | ||
## See also: | ||
## * `parseFloat <strutils.html#parseFloat,string>`_ | ||
runnableExamples: | ||
doAssert parseFloatThousandSep("10,000,000.000") == 10000000.0 | ||
doAssert parseFloatThousandSep("1,222.0001") == 1222.0001 | ||
doAssert parseFloatThousandSep("10.000,0", {}, '.', ',') == 10000.0 | ||
doAssert parseFloatThousandSep("1'000'000,000", {}, '\'', ',') == 1000000.0 | ||
doAssert parseFloatThousandSep("1000000", {pfDotOptional}) == 1000000.0 | ||
doAssert parseFloatThousandSep("-1,000", {pfDotOptional}) == -1000.0 | ||
## You can omit `sep`, but then all subsequent `sep` to the left must also be omitted: | ||
doAssert parseFloatThousandSep("1000,000", {pfDotOptional}) == 1000000.0 | ||
## Examples using different ParseFloatOptions: | ||
doAssert parseFloatThousandSep(".1", {pfLeadingDot}) == 0.1 | ||
doAssert parseFloatThousandSep("1", {pfDotOptional}) == 1.0 | ||
doAssert parseFloatThousandSep("1.", {pfTrailingDot}) == 1.0 | ||
juancarlospaco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
doAssert parseFloatThousandSep("10,0.0,0,0", {pfSepAnywhere}) == 100.0 | ||
doAssert parseFloatThousandSep("01.00") == 1.0 | ||
doAssert parseFloatThousandSep("1,000.000e-9") == 1e-06 | ||
|
||
assert decimalDot notin {'-', '+', 'e', 'i', 'n', 'f', 'a', ' ', '\t', '\v', '\c', '\n', '\f'} | ||
assert sep notin {'-', '+', 'e', 'i', 'n', 'f', 'a', '\n'} | ||
assert sep != decimalDot | ||
|
||
proc parseFloatThousandSepRaise(i: int; c: char; s: openArray[char]) {.noinline, noreturn.} = | ||
raise newException(ValueError, | ||
"Invalid float containing thousand separators, invalid char $1 at index $2 for input $3" % | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. raise newException(ValueError,
"Invalid float containing thousand separators, invalid char $1 at index $2 for input '$3'" % [$c, $i, s.join]) so that it shows as
instead of
|
||
[$c, $i, $s]) | ||
|
||
# Fail fast, before looping. | ||
let strLen = str.len | ||
if strLen == 0: # Empty string. | ||
parseFloatThousandSepRaise(0, ' ', "empty string") | ||
if str[0] == sep: # ",1" | ||
parseFloatThousandSepRaise(0, sep, str) | ||
if pfLeadingDot notin options and str[0] == decimalDot: # ".1" | ||
parseFloatThousandSepRaise(0, decimalDot, str) | ||
if str[^1] == sep: # "1," | ||
parseFloatThousandSepRaise(strLen, sep, str) | ||
if pfTrailingDot notin options and str[^1] == decimalDot: # "1." | ||
parseFloatThousandSepRaise(strLen, decimalDot, str) | ||
if pfSepAnywhere notin options and (str.len <= 4 and sep in str): | ||
parseFloatThousandSepRaise(0, sep, str) # "1,1" | ||
|
||
if (strLen == 3 or strLen == 4) and ( | ||
(str[0] in {'i', 'I'} and str[1] in {'n', 'N'} and str[2] in {'f', 'F'}) or | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cna you simplify this logic? it's buggy for 2 reasons:
|
||
(str[0] in {'n', 'N'} and str[1] in {'a', 'A'} and str[2] in {'n', 'N'}) or | ||
(str[0] in {'+', '-'} and str[1] in {'i', 'I'} and str[2] in {'n', 'N'} and str[3] in {'f', 'F'}) or | ||
(str[0] in {'+', '-'} and str[1] in {'n', 'N'} and str[2] in {'a', 'A'} and str[3] in {'n', 'N'})): | ||
if pfNanInf notin options: | ||
parseFloatThousandSepRaise(0, sep, str) | ||
else: | ||
return parseFloat(str.join) # Allow NaN, Inf, -Inf, +Inf | ||
|
||
var | ||
s = newStringOfCap(strLen) | ||
successive: int | ||
afterDot, lastWasDot, lastWasSep, hasAnySep, isNegative, hasAnyDot, isScientific: bool | ||
|
||
for idx, c in str: | ||
if c in '0' .. '9': # Digits | ||
if hasAnySep and not afterDot and successive > 2: | ||
parseFloatThousandSepRaise(idx, c, str) | ||
else: | ||
s.add c | ||
lastWasSep = false | ||
lastWasDot = false | ||
inc successive | ||
elif c == sep: # Thousands separator, this is NOT the dot | ||
if pfSepAnywhere notin options and (lastWasSep or afterDot) or | ||
(isNegative and idx == 1 or idx == 0) or isScientific: | ||
parseFloatThousandSepRaise(idx, c, str) | ||
else: | ||
lastWasSep = true # Do NOT add the Thousands separator here. | ||
hasAnySep = true | ||
successive = 0 | ||
elif c == decimalDot: # This is the dot | ||
if (not afterDot and not hasAnyDot and not lastWasDot) and | ||
(pfLeadingDot notin options and (isNegative and idx == 1 or idx == 0)) or | ||
(hasAnySep and pfSepAnywhere notin options and successive != 3): # Disallow .1 | ||
parseFloatThousandSepRaise(idx, c, str) | ||
else: | ||
s.add '.' # Replace decimalDot to '.' so parseFloat can take it. | ||
successive = 0 | ||
lastWasDot = true | ||
afterDot = true | ||
hasAnyDot = true | ||
elif c == '-': # Allow negative float | ||
if isNegative or idx != 0 and not isScientific: # Disallow ---1.0 | ||
parseFloatThousandSepRaise(idx, c, str) # Allow 1.0e-9 | ||
else: | ||
s.add '-' | ||
if idx == 0: # Allow 1.0e-9 | ||
isNegative = true | ||
elif c in {'e', 'E'}: # Allow scientific notation | ||
if isScientific: | ||
parseFloatThousandSepRaise(idx, c, str) | ||
else: | ||
s.add 'e' | ||
isScientific = true | ||
|
||
if pfDotOptional notin options and not hasAnyDot: | ||
parseFloatThousandSepRaise(0, sep, str) | ||
result = parseFloat(s) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import strmisc, math | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please move the content of |
||
|
||
|
||
func main() = | ||
doAssert parseFloatThousandSep("0.0") == 0.0 | ||
juancarlospaco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
doAssert parseFloatThousandSep("1.0") == 1.0 | ||
doAssert parseFloatThousandSep("-0.0") == -0.0 | ||
doAssert parseFloatThousandSep("-1.0") == -1.0 | ||
doAssert parseFloatThousandSep("1.000") == 1.0 | ||
doAssert parseFloatThousandSep("1.000") == 1.0 | ||
doAssert parseFloatThousandSep("-1.000") == -1.0 | ||
doAssert parseFloatThousandSep("-1,222.0001") == -1222.0001 | ||
doAssert parseFloatThousandSep("3.141592653589793") == 3.141592653589793 | ||
doAssert parseFloatThousandSep("6.283185307179586") == 6.283185307179586 | ||
doAssert parseFloatThousandSep("2.718281828459045") == 2.718281828459045 | ||
|
||
doAssertRaises(ValueError): discard parseFloatThousandSep(" ", {pfDotOptional}) | ||
doAssertRaises(ValueError): discard parseFloatThousandSep(".1.", {pfLeadingDot,pfTrailingDot}) | ||
doAssertRaises(ValueError): discard parseFloatThousandSep("1ee9", {pfDotOptional}) | ||
doAssertRaises(ValueError): discard parseFloatThousandSep("aNa", {pfNanInf}) | ||
doAssertRaises(ValueError): discard parseFloatThousandSep("fnI", {pfNanInf}) | ||
doAssertRaises(ValueError): discard parseFloatThousandSep("1,000.000,000,E,+,9,0", {pfSepAnywhere}) | ||
for s in ["1,11", "1,1", "1,0000.000", "--", "..", "1,,000", "1..000", | ||
"1,000000", ",1", "1,", "1.", ".1", "10,00.0", "1,000.000ee9", "1e02.2", | ||
"1.0e--9", "Inf", "-Inf", "+Inf", "NaN"]: | ||
doAssertRaises(ValueError): discard parseFloatThousandSep(s) | ||
|
||
doAssert parseFloatThousandSep("10,00.0", {pfSepAnywhere}) == 1000.0 | ||
doAssert parseFloatThousandSep("0", {pfDotOptional}) == 0.0 | ||
doAssert parseFloatThousandSep("-0", {pfDotOptional}) == -0.0 | ||
doAssert parseFloatThousandSep("1,111", {pfDotOptional}) == 1111.0 | ||
doAssert parseFloatThousandSep(".1", {pfLeadingDot}) == 0.1 | ||
doAssert parseFloatThousandSep("1.", {pfTrailingDot}) == 1.0 | ||
doAssert parseFloatThousandSep(".1", {pfLeadingDot,pfTrailingDot}) == 0.1 | ||
doAssert parseFloatThousandSep("1.", {pfLeadingDot,pfTrailingDot}) == 1.0 | ||
doAssert parseFloatThousandSep("1", {pfDotOptional}) == 1.0 | ||
doAssert parseFloatThousandSep("1.0,0,0", {pfSepAnywhere}) == 1.0 | ||
doAssert parseFloatThousandSep(".10", {pfLeadingDot}) == 0.1 | ||
doAssert parseFloatThousandSep("10.", {pfTrailingDot}) == 10.0 | ||
doAssert parseFloatThousandSep("10", {pfDotOptional, pfSepAnywhere}) == 10.0 | ||
doAssert parseFloatThousandSep("1.0,0,0,0,0,0,0,0", {pfSepAnywhere}) == 1.0 | ||
doAssert parseFloatThousandSep("0,0,0,0,0,0,0,0.1", {pfSepAnywhere}) == 0.1 | ||
doAssert parseFloatThousandSep("1.0e9") == 1000000000.0 | ||
doAssert parseFloatThousandSep("1.0e-9") == 1e-09 | ||
doAssert parseFloatThousandSep("1,000.000e9") == 1000000000000.0 | ||
doAssert parseFloatThousandSep("1e9", {pfDotOptional}) == 1000000000.0 | ||
doAssert parseFloatThousandSep("1.0E9") == 1000000000.0 | ||
doAssert parseFloatThousandSep("1.0E-9") == 1e-09 | ||
doAssert parseFloatThousandSep("Inf", {pfNanInf}) == Inf | ||
doAssert parseFloatThousandSep("-Inf", {pfNanInf}) == -Inf | ||
doAssert parseFloatThousandSep("+Inf", {pfNanInf}) == +Inf | ||
juancarlospaco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
doAssert parseFloatThousandSep("1000.000000E+90") == 1e93 | ||
doAssert parseFloatThousandSep("-10 000 000 000.0001", sep=' ') == -10000000000.0001 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
doAssert parseFloatThousandSep("-10 000 000 000,0001", sep=' ', decimalDot = ',') == -10000000000.0001 | ||
doAssert classify(parseFloatThousandSep("NaN", {pfNanInf})) == fcNan | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bug, this shouldn't be accepted: nim> echo parseFloatThousandSep("inf.0", {pfNanInf})
0.0 |
||
|
||
juancarlospaco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
juancarlospaco marked this conversation as resolved.
Show resolved
Hide resolved
|
||
main() | ||
static: main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can be discussed in future work, but just curious if there's a reverse proc for this?
I just found about
insertSep
, maybe they should cross-reference each other?I'm not sure how robust
insertSep
is though; there's alsoformatFloat
andformatEng
, maybe they should be extended to support formatting with thousand separators