Skip to content

Commit

Permalink
unicode/utf16: add AppendRune
Browse files Browse the repository at this point in the history
AppendRune appends the UTF-16 encoding of a rune to a []uint16.

    BenchmarkEncodeValidASCII-12                24.61ns 16B 1allocs
    BenchmarkEncodeValidJapaneseChars-12        18.79ns 8B  1allocs
    BenchmarkAppendRuneValidASCII-12            6.826ns 0B  0allocs
    BenchmarkAppendRuneValidJapaneseChars-12    3.547ns 0B  0allocs

The ASCII case is written to be inlineable.

Fixes #51896

Change-Id: I593b1029f603297ef6e80e036f2fee2a0938d38d
Reviewed-on: https://go-review.googlesource.com/c/go/+/409054
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Joedian Reid <joedian@golang.org>
  • Loading branch information
qmuntal authored and gopherbot committed Aug 19, 2022
1 parent f65ded5 commit 48297f1
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 0 deletions.
1 change: 1 addition & 0 deletions api/next/51896.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pkg unicode/utf16, func AppendRune([]uint16, int32) []uint16 #51896
17 changes: 17 additions & 0 deletions src/unicode/utf16/utf16.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,23 @@ func Encode(s []rune) []uint16 {
return a[:n]
}

// AppendRune appends the UTF-16 encoding of the Unicode code point r
// to the end of p and returns the extended buffer. If the rune is not
// a valid Unicode code point, it appends the encoding of U+FFFD.
func AppendRune(a []uint16, r rune) []uint16 {
// This function is inlineable for fast handling of ASCII.
switch {
case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
// normal rune
return append(a, uint16(r))
case surrSelf <= r && r <= maxRune:
// needs surrogate sequence
r1, r2 := EncodeRune(r)
return append(a, uint16(r1), uint16(r2))
}
return append(a, replacementChar)
}

// Decode returns the Unicode code point sequence represented
// by the UTF-16 encoding s.
func Decode(s []uint16) []rune {
Expand Down
34 changes: 34 additions & 0 deletions src/unicode/utf16/utf16_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ func TestEncode(t *testing.T) {
}
}

func TestAppendRune(t *testing.T) {
for _, tt := range encodeTests {
var out []uint16
for _, u := range tt.in {
out = AppendRune(out, u)
}
if !reflect.DeepEqual(out, tt.out) {
t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
}
}
}

func TestEncodeRune(t *testing.T) {
for i, tt := range encodeTests {
j := 0
Expand Down Expand Up @@ -193,6 +205,28 @@ func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
}
}

func BenchmarkAppendRuneValidASCII(b *testing.B) {
data := []rune{'h', 'e', 'l', 'l', 'o'}
a := make([]uint16, 0, len(data)*2)
for i := 0; i < b.N; i++ {
for _, u := range data {
a = AppendRune(a, u)
}
a = a[:0]
}
}

func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
data := []rune{'日', '本', '語'}
a := make([]uint16, 0, len(data)*2)
for i := 0; i < b.N; i++ {
for _, u := range data {
a = AppendRune(a, u)
}
a = a[:0]
}
}

func BenchmarkEncodeRune(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
Expand Down

0 comments on commit 48297f1

Please sign in to comment.