diff --git a/CHANGELOG.md b/CHANGELOG.md index 865e733142c2..7fa31f48fb5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Compatibility: * Fix `Range#size` and return `nil` for beginningless Range when end isn't Numeric (#3039, @rwstauner). * Alias `String#-@` to `String#dedup` (#3039, @itarato). * Fix `Pathname#relative_path_from` to convert string arguments to Pathname objects (@rwstauner). +* Add `String#bytesplice` (#3039, @itarato). Performance: diff --git a/spec/ruby/core/string/bytesplice_spec.rb b/spec/ruby/core/string/bytesplice_spec.rb new file mode 100644 index 000000000000..f13024a79b45 --- /dev/null +++ b/spec/ruby/core/string/bytesplice_spec.rb @@ -0,0 +1,133 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe "String#bytesplice" do + ruby_version_is "3.2" do + it "raises IndexError when index is less than -bytesize" do + -> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when index is greater than bytesize" do + -> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative length" do + -> { "abc".bytesplice(0, -2, "") }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer indices" do + "hello".bytesplice(-5, 0, "xxx").should == "xxxhello" + "hello".bytesplice(0, 0, "xxx").should == "xxxhello" + "hello".bytesplice(0, 1, "xxx").should == "xxxello" + "hello".bytesplice(0, 5, "xxx").should == "xxx" + "hello".bytesplice(0, 6, "xxx").should == "xxx" + end + + it "raises RangeError when range left boundary is less than -bytesize" do + -> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with ranges" do + "hello".bytesplice(-5...-5, "xxx").should == "xxxhello" + "hello".bytesplice(0...0, "xxx").should == "xxxhello" + "hello".bytesplice(0..0, "xxx").should == "xxxello" + "hello".bytesplice(0...1, "xxx").should == "xxxello" + "hello".bytesplice(0..1, "xxx").should == "xxxllo" + "hello".bytesplice(0..-1, "xxx").should == "xxx" + "hello".bytesplice(0...5, "xxx").should == "xxx" + "hello".bytesplice(0...6, "xxx").should == "xxx" + end + + it "raises TypeError when integer index is provided without length argument" do + -> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") + end + + it "replaces on an empty string" do + "".bytesplice(0, 0, "").should == "" + "".bytesplice(0, 0, "xxx").should == "xxx" + end + + it "mutates self" do + s = "hello" + s.bytesplice(2, 1, "xxx").should.equal?(s) + end + + it "raises when string is frozen" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + end +end + +describe "String#bytesplice with multibyte characters" do + ruby_version_is "3.2" do + it "raises IndexError when index is out of byte size boundary" do + -> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer indices" do + "こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(3, 3, "はは").should == "こははにちは" + "こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx" + end + + it "replaces with range" do + "こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは" + "こんにちは".bytesplice(0..-1, "xxx").should == "xxx" + "こんにちは".bytesplice(0...15, "xxx").should == "xxx" + "こんにちは".bytesplice(0...18, "xxx").should == "xxx" + end + + it "treats negative length for range as 0" do + "こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは" + "こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは" + end + + it "raises when ranges not match codepoint boundaries" do + -> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "xxxxxx" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(0, 3, sub) + result.should == "xxxxxxんにちは" + result.encoding.should == Encoding::UTF_8 + + s = "xxxxxx" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(0, 3, sub) + result.should == "こんにちはxxx" + result.encoding.should == Encoding::UTF_8 + end + end +end diff --git a/spec/tags/truffle/methods_tags.txt b/spec/tags/truffle/methods_tags.txt index 05ccb1871f12..533595a0a69d 100644 --- a/spec/tags/truffle/methods_tags.txt +++ b/spec/tags/truffle/methods_tags.txt @@ -113,3 +113,4 @@ fails:Public methods on Thread should include native_thread_id fails:Public methods on UnboundMethod should include private? fails:Public methods on UnboundMethod should include protected? fails:Public methods on UnboundMethod should include public? +fails:Public methods on String should not include bytesplice diff --git a/spec/truffleruby.next-specs b/spec/truffleruby.next-specs index 39d167983f08..a6376293a102 100644 --- a/spec/truffleruby.next-specs +++ b/spec/truffleruby.next-specs @@ -16,3 +16,4 @@ spec/ruby/core/hash/shift_spec.rb spec/ruby/core/range/size_spec.rb spec/ruby/core/string/dedup_spec.rb +spec/ruby/core/string/bytesplice_spec.rb diff --git a/src/main/java/org/truffleruby/core/string/StringNodes.java b/src/main/java/org/truffleruby/core/string/StringNodes.java index 25312e31483f..4423541791fb 100644 --- a/src/main/java/org/truffleruby/core/string/StringNodes.java +++ b/src/main/java/org/truffleruby/core/string/StringNodes.java @@ -4459,4 +4459,16 @@ private String formatTooLongError(int count, RubyString string) { } + @Primitive(name = "string_is_character_head?", lowerFixnum = 2) + public abstract static class IsCharacterHeadPrimitiveNode extends PrimitiveArrayArgumentsNode { + + @Specialization + protected boolean isCharacterHead(RubyEncoding enc, Object string, int byteOffset, + @Cached RubyStringLibrary libString, + @Cached IsCharacterHeadNode isCharacterHeadNode) { + var tstring = libString.getTString(string); + return isCharacterHeadNode.execute(enc, tstring, byteOffset); + } + } + } diff --git a/src/main/ruby/truffleruby/core/string.rb b/src/main/ruby/truffleruby/core/string.rb index 6ab80670b0a8..aec7551b7dbc 100644 --- a/src/main/ruby/truffleruby/core/string.rb +++ b/src/main/ruby/truffleruby/core/string.rb @@ -70,6 +70,49 @@ def byteslice(index_or_range, length = undefined) byteslice index, length end + def bytesplice(index_or_range, length = undefined, str) + is_range = Primitive.is_a?(index_or_range, Range) + + if Primitive.undefined?(length) + raise TypeError, "wrong argument type #{Primitive.class(index_or_range)} (expected Range)" unless is_range + + start, len = Primitive.range_normalized_start_length(index_or_range, bytesize) + len = 0 if len < 0 + else + start = Primitive.rb_to_int(index_or_range) + start += bytesize if start < 0 + len = Primitive.rb_to_int(length) + end + + str = StringValue(str) + + if len < 0 + raise IndexError, "negative length #{len}" + end + + if bytesize < start || start < 0 + if is_range + raise RangeError, "#{index_or_range} out of range" + else + raise IndexError, "index #{index_or_range} out of string" + end + end + + len = bytesize - start if len > bytesize - start + finish = start + len + + if start < bytesize && !Primitive.string_is_character_head?(encoding, self, start) + raise IndexError, "offset #{start} does not land on character boundary" + end + if finish < bytesize && !Primitive.string_is_character_head?(encoding, self, finish) + raise IndexError, "offset #{finish} does not land on character boundary" + end + + Primitive.check_mutable_string(self) + enc = Primitive.encoding_ensure_compatible_str(self, str) + Primitive.string_splice(self, str, start, len, enc) + end + def self.try_convert(obj) Truffle::Type.try_convert obj, String, :to_str end diff --git a/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb b/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb index cb988edd8006..1361446a1a63 100644 --- a/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb +++ b/src/main/ruby/truffleruby/core/truffle/polyglot_methods.rb @@ -80,6 +80,10 @@ def byteslice(...) to_s.byteslice(...) end + def bytesplice(...) + to_s.bytesplice(...) + end + def capitalize(...) to_s.capitalize(...) end