Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add strict_integer option to parse numbers with commas as strings #537

Merged
merged 1 commit into from
Jan 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions lib/psych.rb
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,10 @@ module Psych
# YAML documents that are supplied via user input. Instead, please use the
# load method or the safe_load method.
#
def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false
def self.unsafe_load yaml, filename: nil, fallback: false, symbolize_names: false, freeze: false, strict_integer: false
result = parse(yaml, filename: filename)
return fallback unless result
result.to_ruby(symbolize_names: symbolize_names, freeze: freeze)
result.to_ruby(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer)
end
class << self; alias :load :unsafe_load; end

Expand Down Expand Up @@ -320,13 +320,13 @@ class << self; alias :load :unsafe_load; end
# Psych.safe_load("---\n foo: bar") # => {"foo"=>"bar"}
# Psych.safe_load("---\n foo: bar", symbolize_names: true) # => {:foo=>"bar"}
#
def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false
def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false
result = parse(yaml, filename: filename)
return fallback unless result

class_loader = ClassLoader::Restricted.new(permitted_classes.map(&:to_s),
permitted_symbols.map(&:to_s))
scanner = ScalarScanner.new class_loader
scanner = ScalarScanner.new class_loader, strict_integer: strict_integer
visitor = if aliases
Visitors::ToRuby.new scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze
else
Expand Down Expand Up @@ -366,14 +366,15 @@ def self.safe_load yaml, permitted_classes: [], permitted_symbols: [], aliases:
# Raises a TypeError when `yaml` parameter is NilClass. This method is
# similar to `safe_load` except that `Symbol` objects are allowed by default.
#
def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false
def self.load yaml, permitted_classes: [Symbol], permitted_symbols: [], aliases: false, filename: nil, fallback: nil, symbolize_names: false, freeze: false, strict_integer: false
safe_load yaml, permitted_classes: permitted_classes,
permitted_symbols: permitted_symbols,
aliases: aliases,
filename: filename,
fallback: fallback,
symbolize_names: symbolize_names,
freeze: freeze
freeze: freeze,
strict_integer: strict_integer
end

###
Expand Down
4 changes: 2 additions & 2 deletions lib/psych/nodes/node.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def each &block
# Convert this node to Ruby.
#
# See also Psych::Visitors::ToRuby
def to_ruby(symbolize_names: false, freeze: false)
Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze).accept(self)
def to_ruby(symbolize_names: false, freeze: false, strict_integer: false)
Visitors::ToRuby.create(symbolize_names: symbolize_names, freeze: freeze, strict_integer: strict_integer).accept(self)
end
alias :transform :to_ruby

Expand Down
22 changes: 15 additions & 7 deletions lib/psych/scalar_scanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,32 @@ class ScalarScanner
FLOAT = /^(?:[-+]?([0-9][0-9_,]*)?\.[0-9]*([eE][-+][0-9]+)?(?# base 10))$/x

# Taken from http://yaml.org/type/int.html
INTEGER = /^(?:[-+]?0b[0-1_,]+ (?# base 2)
|[-+]?0[0-7_,]+ (?# base 8)
|[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10)
|[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x
INTEGER_STRICT = /^(?:[-+]?0b[0-1_]+ (?# base 2)
|[-+]?0[0-7_]+ (?# base 8)
|[-+]?(0|[1-9][0-9_]*) (?# base 10)
|[-+]?0x[0-9a-fA-F_]+ (?# base 16))$/x

# Same as above, but allows commas.
# Not to YML spec, but kept for backwards compatibility
INTEGER_LEGACY = /^(?:[-+]?0b[0-1_,]+ (?# base 2)
|[-+]?0[0-7_,]+ (?# base 8)
|[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10)
|[-+]?0x[0-9a-fA-F_,]+ (?# base 16))$/x

attr_reader :class_loader

# Create a new scanner
def initialize class_loader
def initialize class_loader, strict_integer: false
@symbol_cache = {}
@class_loader = class_loader
@strict_integer = strict_integer
end

# Tokenize +string+ returning the Ruby object
def tokenize string
return nil if string.empty?
return @symbol_cache[string] if @symbol_cache.key?(string)

integer_regex = @strict_integer ? INTEGER_STRICT : INTEGER_LEGACY
# Check for a String type, being careful not to get caught by hash keys, hex values, and
# special floats (e.g., -.inf).
if string.match?(%r{^[^\d.:-]?[[:alpha:]_\s!@#$%\^&*(){}<>|/\\~;=]+}) || string.match?(/\n/)
Expand Down Expand Up @@ -89,7 +97,7 @@ def tokenize string
else
Float(string.gsub(/[,_]|\.([Ee]|$)/, '\1'))
end
elsif string.match?(INTEGER)
elsif string.match?(integer_regex)
parse_int string
else
string
Expand Down
4 changes: 2 additions & 2 deletions lib/psych/visitors/to_ruby.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ module Visitors
###
# This class walks a YAML AST, converting each node to Ruby
class ToRuby < Psych::Visitors::Visitor
def self.create(symbolize_names: false, freeze: false)
def self.create(symbolize_names: false, freeze: false, strict_integer: false)
class_loader = ClassLoader.new
scanner = ScalarScanner.new class_loader
scanner = ScalarScanner.new class_loader, strict_integer: strict_integer
new(scanner, class_loader, symbolize_names: symbolize_names, freeze: freeze)
end

Expand Down
11 changes: 11 additions & 0 deletions test/psych/test_numeric.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,16 @@ def test_does_not_attempt_numeric
str = Psych.load('--- 1.1.1')
assert_equal '1.1.1', str
end

# This behavior is not to YML spec, but is kept for backwards compatibility
def test_string_with_commas
number = Psych.load('--- 12,34,56')
assert_equal 123456, number
end

def test_string_with_commas_with_strict_integer
str = Psych.load('--- 12,34,56', strict_integer: true)
assert_equal '12,34,56', str
end
end
end
25 changes: 25 additions & 0 deletions test/psych/test_scalar_scanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,31 @@ def test_scan_int_commas_and_underscores
assert_equal 0x123456789abcdef, ss.tokenize('0x12_,34,_56,_789abcdef__')
end

def test_scan_strict_int_commas_and_underscores
# this test is to ensure adherance to YML spec using the 'strict_integer' option
scanner = Psych::ScalarScanner.new ClassLoader.new, strict_integer: true
assert_equal 123_456_789, scanner.tokenize('123_456_789')
assert_equal '123,456,789', scanner.tokenize('123,456,789')
assert_equal '1_2,3,4_5,6_789', scanner.tokenize('1_2,3,4_5,6_789')

assert_equal 1, scanner.tokenize('1')
assert_equal 1, scanner.tokenize('+1')
assert_equal(-1, scanner.tokenize('-1'))

assert_equal 0b010101010, scanner.tokenize('0b010101010')
assert_equal 0b010101010, scanner.tokenize('0b01_01_01_010')
assert_equal '0b0,1_0,1_,0,1_01,0', scanner.tokenize('0b0,1_0,1_,0,1_01,0')

assert_equal 01234567, scanner.tokenize('01234567')
assert_equal '0_,,,1_2,_34567', scanner.tokenize('0_,,,1_2,_34567')

assert_equal 0x123456789abcdef, scanner.tokenize('0x123456789abcdef')
assert_equal 0x123456789abcdef, scanner.tokenize('0x12_34_56_789abcdef')
assert_equal '0x12_,34,_56,_789abcdef', scanner.tokenize('0x12_,34,_56,_789abcdef')
assert_equal '0x_12_,34,_56,_789abcdef', scanner.tokenize('0x_12_,34,_56,_789abcdef')
assert_equal '0x12_,34,_56,_789abcdef__', scanner.tokenize('0x12_,34,_56,_789abcdef__')
end

def test_scan_dot
assert_equal '.', ss.tokenize('.')
end
Expand Down