Skip to content

Commit

Permalink
builtin: add string.is_pure_ascii() (#22748)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuyi98 authored Nov 3, 2024
1 parent 462f186 commit ea19923
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 5 deletions.
22 changes: 19 additions & 3 deletions vlib/builtin/string.v
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,22 @@ pub fn (s string) len_utf8() int {
return l
}

// is_pure_ascii returns whether the string contains only ASCII characters.
// Note that UTF8 encodes such characters in just 1 byte:
// 1 byte: 0xxxxxxx
// 2 bytes: 110xxxxx 10xxxxxx
// 3 bytes: 1110xxxx 10xxxxxx 10xxxxxx
// 4 bytes: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
@[direct_array_access]
pub fn (s string) is_pure_ascii() bool {
for i in 0 .. s.len {
if s[i] >= 0x80 {
return false
}
}
return true
}

// clone_static returns an independent copy of a given array.
// It should be used only in -autofree generated code.
@[inline]
Expand Down Expand Up @@ -1709,7 +1725,7 @@ pub fn (s string) trim(cutset string) string {
if s == '' || cutset == '' {
return s.clone()
}
if cutset.len_utf8() == cutset.len {
if cutset.is_pure_ascii() {
return s.trim_chars(cutset, .trim_both)
} else {
return s.trim_runes(cutset, .trim_both)
Expand Down Expand Up @@ -1825,7 +1841,7 @@ pub fn (s string) trim_left(cutset string) string {
if s == '' || cutset == '' {
return s.clone()
}
if cutset.len_utf8() == cutset.len {
if cutset.is_pure_ascii() {
return s.trim_chars(cutset, .trim_left)
} else {
return s.trim_runes(cutset, .trim_left)
Expand All @@ -1839,7 +1855,7 @@ pub fn (s string) trim_right(cutset string) string {
if s.len < 1 || cutset.len < 1 {
return s.clone()
}
if cutset.len_utf8() == cutset.len {
if cutset.is_pure_ascii() {
return s.trim_chars(cutset, .trim_right)
} else {
return s.trim_runes(cutset, .trim_right)
Expand Down
8 changes: 8 additions & 0 deletions vlib/builtin/string_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ fn test_len_utf8() {
assert 'Λέξη'.len_utf8() == 4
}

fn test_is_pure_ascii() {
assert 'Vlang'.is_pure_ascii()
assert !'María'.is_pure_ascii()
assert !'姓名'.is_pure_ascii()
assert !'Слово'.is_pure_ascii()
assert !'Λέξη'.is_pure_ascii()
}

fn test_ends_with() {
a := 'browser.v'
assert a.ends_with('.v')
Expand Down
2 changes: 1 addition & 1 deletion vlib/v/gen/c/cgen.v
Original file line number Diff line number Diff line change
Expand Up @@ -3795,7 +3795,7 @@ fn (mut g Gen) char_literal(node ast.CharLiteral) {
return
}
// TODO: optimize use L-char instead of u32 when possible
if node.val.len_utf8() < node.val.len {
if !node.val.is_pure_ascii() {
g.write('((rune)0x${node.val.utf32_code().hex()} /* `${node.val}` */)')
return
}
Expand Down
2 changes: 1 addition & 1 deletion vlib/v/gen/js/js.v
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ fn (mut g JsGen) expr(node_ ast.Expr) {
// TODO
}
ast.CharLiteral {
if node.val.len_utf8() < node.val.len {
if !node.val.is_pure_ascii() {
g.write("new rune('${node.val}'.charCodeAt())")
} else {
g.write("new u8('${node.val}')")
Expand Down

0 comments on commit ea19923

Please sign in to comment.