From 6df6b3d8a304c4f33909f2ade737adfe6b8171cd Mon Sep 17 00:00:00 2001 From: Zeljko Predjeskovic <44953551+Zeljko-Predjeskovic@users.noreply.github.com> Date: Sat, 7 Dec 2024 19:56:18 +0100 Subject: [PATCH] Add `String#byte_index(Regex)` (#15248) --- spec/std/string_spec.cr | 21 +++++++++++++++++++++ src/string.cr | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 0a57ee9034a9..2bbc63f7e18e 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -1367,6 +1367,27 @@ describe "String" do "foo foo".byte_index("oo", 2).should eq(5) "こんにちは世界".byte_index("ちは").should eq(9) end + + it "gets byte index of regex" do + str = "0123x" + pattern = /x/ + + str.byte_index(pattern).should eq(4) + str.byte_index(pattern, offset: 4).should eq(4) + str.byte_index(pattern, offset: 5).should be_nil + str.byte_index(pattern, offset: -1).should eq(4) + str.byte_index(/y/).should be_nil + + str = "012abc678" + pattern = /[abc]/ + + str.byte_index(pattern).should eq(3) + str.byte_index(pattern, offset: 2).should eq(3) + str.byte_index(pattern, offset: 5).should eq(5) + str.byte_index(pattern, offset: -4).should eq(5) + str.byte_index(pattern, offset: -1).should be_nil + str.byte_index(/y/).should be_nil + end end describe "includes?" do diff --git a/src/string.cr b/src/string.cr index 4b52d08c7426..d47e87638976 100644 --- a/src/string.cr +++ b/src/string.cr @@ -3886,6 +3886,27 @@ class String nil end + # Returns the byte index of the regex *pattern* in the string, or `nil` if the pattern does not find a match. + # If *offset* is present, it defines the position to start the search. + # + # Negative *offset* can be used to start the search from the end of the string. + # + # ``` + # "hello world".byte_index(/o/) # => 4 + # "hello world".byte_index(/o/, offset: 4) # => 4 + # "hello world".byte_index(/o/, offset: 5) # => 7 + # "hello world".byte_index(/o/, offset: -1) # => nil + # "hello world".byte_index(/y/) # => nil + # ``` + def byte_index(pattern : Regex, offset = 0, options : Regex::MatchOptions = Regex::MatchOptions::None) : Int32? + offset += bytesize if offset < 0 + return if offset < 0 + + if match = pattern.match_at_byte_index(self, offset, options: options) + match.byte_begin + end + end + # Returns the byte index of a char index, or `nil` if out of bounds. # # It is valid to pass `#size` to *index*, and in this case the answer