Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement fast_float for String#to_f #15195

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
27 changes: 27 additions & 0 deletions spec/manual/string_to_f32_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
require "spec"

# Exhaustively checks that for all 4294967296 possible `Float32` values,
# `to_s.to_f32` returns the original number. Splits the floats into 4096 bins
# for better progress tracking. Also useful as a sort of benchmark.
#
# This was originally added when `String#to_f` moved from `LibC.strtod` to
# `fast_float`, but is applicable to any other implementation as well.
describe "x.to_s.to_f32 == x" do
(0_u32..0xFFF_u32).each do |i|
it "%03x00000..%03xfffff" % {i, i} do
0x100000.times do |j|
bits = i << 20 | j
float = bits.unsafe_as(Float32)
str = float.to_s
val = str.to_f32?.should_not be_nil

if float.nan?
val.nan?.should be_true
else
val.should eq(float)
Math.copysign(1, val).should eq(Math.copysign(1, float))
end
end
end
end
end
103 changes: 103 additions & 0 deletions spec/manual/string_to_f_supplemental_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Runs the fast_float supplemental test suite:
# https://github.com/fastfloat/supplemental_test_files
#
# Supplemental data files for testing floating parsing (credit: Nigel Tao for
# the data)
#
# LICENSE file (Apache 2): https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/LICENSE
#
# Due to the sheer volume of the test cases (5.2+ million test cases across
# 270+ MB of text) these specs are not vendored into the Crystal repository.

require "spec"
require "http/client"
require "../support/number"
require "wait_group"

# these specs permit underflow and overflow to return 0 and infinity
# respectively (when `ret.rc == Errno::ERANGE`), so we have to use
# `Float::FastFloat` directly
def fast_float_to_f32(str)
value = uninitialized Float32
start = str.to_unsafe
finish = start + str.bytesize
options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

ret = Float::FastFloat::BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
{Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
value
end

def fast_float_to_f64(str)
value = uninitialized Float64
start = str.to_unsafe
finish = start + str.bytesize
options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

ret = Float::FastFloat::BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
{Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
value
end

RAW_BASE_URL = "https://raw.githubusercontent.com/fastfloat/supplemental_test_files/7cc512a7c60361ebe1baf54991d7905efdc62aa0/data/" # @1.0.0

TEST_SUITES = %w(
freetype-2-7.txt
google-double-conversion.txt
google-wuffs.txt
ibm-fpgen.txt
lemire-fast-double-parser.txt
lemire-fast-float.txt
more-test-cases.txt
remyoudompheng-fptest-0.txt
remyoudompheng-fptest-1.txt
remyoudompheng-fptest-2.txt
remyoudompheng-fptest-3.txt
tencent-rapidjson.txt
ulfjack-ryu.txt
)

test_suite_cache = {} of String => Array({UInt32, UInt64, String})
puts "Fetching #{TEST_SUITES.size} test suites"
WaitGroup.wait do |wg|
TEST_SUITES.each do |suite|
wg.spawn do
url = RAW_BASE_URL + suite

cache = HTTP::Client.get(url) do |res|
res.body_io.each_line.map do |line|
args = line.split(' ')
raise "BUG: should have 4 args" unless args.size == 4

# f16_bits = args[0].to_u16(16)
f32_bits = args[1].to_u32(16)
f64_bits = args[2].to_u64(16)
str = args[3]

{f32_bits, f64_bits, str}
end.to_a
end

puts "#{cache.size} test cases cached from #{url}"
test_suite_cache[suite] = cache
end
end
end
puts "There are a total of #{test_suite_cache.sum(&.last.size)} test cases"

describe String do
describe "#to_f" do
test_suite_cache.each do |suite, cache|
describe suite do
each_hardware_rounding_mode do |mode, mode_name|
it mode_name do
cache.each do |f32_bits, f64_bits, str|
fast_float_to_f32(str).unsafe_as(UInt32).should eq(f32_bits)
fast_float_to_f64(str).unsafe_as(UInt64).should eq(f64_bits)
end
end
end
end
end
end
end
4 changes: 4 additions & 0 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ describe "String" do
it { "1Y2P0IJ32E8E7".to_i64(36).should eq(9223372036854775807) }
end

# more specs are available in `spec/manual/string_to_f_supplemental_spec.cr`
it "does to_f" do
expect_raises(ArgumentError) { "".to_f }
"".to_f?.should be_nil
Expand All @@ -503,6 +504,7 @@ describe "String" do
" 1234.56 ".to_f?(whitespace: false).should be_nil
expect_raises(ArgumentError) { " 1234.56foo".to_f }
" 1234.56foo".to_f?.should be_nil
"\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f.should eq(1234.56_f64)
"123.45 x".to_f64(strict: false).should eq(123.45_f64)
expect_raises(ArgumentError) { "x1.2".to_f64 }
"x1.2".to_f64?.should be_nil
Expand Down Expand Up @@ -547,6 +549,7 @@ describe "String" do
" 1234.56 ".to_f32?(whitespace: false).should be_nil
expect_raises(ArgumentError) { " 1234.56foo".to_f32 }
" 1234.56foo".to_f32?.should be_nil
"\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f32.should eq(1234.56_f32)
"123.45 x".to_f32(strict: false).should eq(123.45_f32)
expect_raises(ArgumentError) { "x1.2".to_f32 }
"x1.2".to_f32?.should be_nil
Expand Down Expand Up @@ -590,6 +593,7 @@ describe "String" do
" 1234.56 ".to_f64?(whitespace: false).should be_nil
expect_raises(ArgumentError) { " 1234.56foo".to_f64 }
" 1234.56foo".to_f64?.should be_nil
"\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f64.should eq(1234.56_f64)
"123.45 x".to_f64(strict: false).should eq(123.45_f64)
expect_raises(ArgumentError) { "x1.2".to_f64 }
"x1.2".to_f64?.should be_nil
Expand Down
32 changes: 32 additions & 0 deletions spec/support/number.cr
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,35 @@ macro hexfloat(str)
::Float64.parse_hexfloat({{ str }})
{% end %}
end

# See also: https://github.com/crystal-lang/crystal/issues/15192
lib LibC
{% if flag?(:win32) %}
FE_TONEAREST = 0x00000000
FE_DOWNWARD = 0x00000100
FE_UPWARD = 0x00000200
FE_TOWARDZERO = 0x00000300
{% else %}
FE_TONEAREST = 0x00000000
FE_DOWNWARD = 0x00000400
FE_UPWARD = 0x00000800
FE_TOWARDZERO = 0x00000C00
{% end %}

fun fegetround : Int
fun fesetround(round : Int) : Int
end

def with_hardware_rounding_mode(mode, &)
old_mode = LibC.fegetround
LibC.fesetround(mode)
yield ensure LibC.fesetround(old_mode)
end

def each_hardware_rounding_mode(&)
{% for mode in %w(FE_TONEAREST FE_DOWNWARD FE_UPWARD FE_TOWARDZERO) %}
with_hardware_rounding_mode(LibC::{{ mode.id }}) do
yield LibC::{{ mode.id }}, {{ mode }}
end
{% end %}
end
75 changes: 75 additions & 0 deletions src/float/fast_float.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
struct Float
# :nodoc:
# Source port of the floating-point part of fast_float for C++:
# https://github.com/fastfloat/fast_float
#
# fast_float implements the C++17 `std::from_chars`, which accepts a subset of
# the C `strtod` / `strtof`'s string format:
#
# - a leading plus sign is disallowed, but both fast_float and this port
# accept it;
# - the exponent may be required or disallowed, depending on the format
# argument (this port always allows both);
# - hexfloats are not enabled by default, and fast_float doesn't implement it;
# (https://github.com/fastfloat/fast_float/issues/124)
# - hexfloats cannot start with `0x` or `0X`.
#
# The following is their license:
#
# Licensed under either of Apache License, Version 2.0 or MIT license or
# BOOST license.
#
# Unless you explicitly state otherwise, any contribution intentionally
# submitted for inclusion in this repository by you, as defined in the
# Apache-2.0 license, shall be triple licensed as above, without any
# additional terms or conditions.
#
# Main differences from the original fast_float:
#
# - Only `UC == UInt8` is implemented and tested, not the other wide chars;
# - No explicit SIMD (the original mainly uses this for wide char strings).
#
# The following compile-time configuration is assumed:
#
# - #define FASTFLOAT_ALLOWS_LEADING_PLUS
# - #define FLT_EVAL_METHOD 0
module FastFloat
# Current revision: https://github.com/fastfloat/fast_float/tree/v6.1.6

def self.to_f64?(str : String, whitespace : Bool, strict : Bool) : Float64?
value = uninitialized Float64
start = str.to_unsafe
finish = start + str.bytesize
options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

if whitespace
start += str.calc_excess_left
finish -= str.calc_excess_right
end

ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
value
end
end

def self.to_f32?(str : String, whitespace : Bool, strict : Bool) : Float32?
value = uninitialized Float32
start = str.to_unsafe
finish = start + str.bytesize
options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

if whitespace
start += str.calc_excess_left
finish -= str.calc_excess_right
end

ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
value
end
end
end
end

require "./fast_float/parse_number"
Loading
Loading