Skip to content

Commit

Permalink
ppc64x: decode #18
Browse files Browse the repository at this point in the history
  • Loading branch information
emmansun authored Oct 16, 2024
1 parent 498f95a commit 72efebc
Show file tree
Hide file tree
Showing 3 changed files with 329 additions and 0 deletions.
30 changes: 30 additions & 0 deletions base64_ppc64x.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,17 @@

package base64

import "fmt"

//go:noescape
func encodeAsm(dst, src []byte, lut *[16]byte) int

//go:noescape
func decodeStdAsm(dst, src []byte) int

//go:noescape
func decodeUrlAsm(dst, src []byte) int

func encode(enc *Encoding, dst, src []byte) {
if len(src) >= 16 && enc.lut != nil {
encoded := encodeAsm(dst, src, enc.lut)
Expand All @@ -21,5 +29,27 @@ func encode(enc *Encoding, dst, src []byte) {
}

func decode(enc *Encoding, dst, src []byte) (int, error) {
srcLen := len(src)
if srcLen >= 24 {
remain := srcLen
if enc.lut == &encodeStdLut {
remain = decodeStdAsm(dst, src)
} else if enc.lut == &encodeURLLut {
remain = decodeUrlAsm(dst, src)
}
fmt.Printf("total %d remain: %d\n", srcLen, remain)
if remain < srcLen {
// decoded by ASM
remain = srcLen - remain // remain is decoded length now
src = src[remain:]
dstStart := (remain / 4) * 3
dst = dst[dstStart:]
n, err := decodeGeneric(enc, dst, src)
if cerr, ok := err.(CorruptInputError); ok {
return n + dstStart, CorruptInputError(int(cerr) + remain)
}
return n + dstStart, err
}
}
return decodeGeneric(enc, dst, src)
}
215 changes: 215 additions & 0 deletions base64_ppc64x.s
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,32 @@ DATA base64_const<>+0x80(SB)/8, $0x1919191919191919 // range 0 end
DATA base64_const<>+0x88(SB)/8, $0x1919191919191919 // range 0 end
GLOBL base64_const<>(SB), (NOPTR+RODATA), $144

DATA decode_const<>+0x00(SB)/8, $0x1010010204080408 // standard decode lut hi
DATA decode_const<>+0x08(SB)/8, $0x1010101010101010
DATA decode_const<>+0x10(SB)/8, $0x1511111111111111 // standard decode lut lo
DATA decode_const<>+0x18(SB)/8, $0x1111131A1B1B1B1A
DATA decode_const<>+0x20(SB)/8, $0x2F2F2F2F2F2F2F2F // standard decode mask
DATA decode_const<>+0x28(SB)/8, $0x2F2F2F2F2F2F2F2F
DATA decode_const<>+0x30(SB)/8, $0x00101304BFBFB9B9 // standard decode lut roll
DATA decode_const<>+0x38(SB)/8, $0x0000000000000000
DATA decode_const<>+0x40(SB)/8, $0x1010010204080428 // url decode lut hi
DATA decode_const<>+0x48(SB)/8, $0x1010101010101010
DATA decode_const<>+0x50(SB)/8, $0x1511111111111111 // url decode lut lo
DATA decode_const<>+0x58(SB)/8, $0x1111131B1B1A1B33
DATA decode_const<>+0x60(SB)/8, $0x5E5E5E5E5E5E5E5E // url decode mask
DATA decode_const<>+0x68(SB)/8, $0x5E5E5E5E5E5E5E5E
DATA decode_const<>+0x70(SB)/8, $0x00001104BFBFE0B9 // url decode lut roll
DATA decode_const<>+0x78(SB)/8, $0xB900000000000000
DATA decode_const<>+0x80(SB)/8, $0x4001400140014001 // decode reshufling constant 0
DATA decode_const<>+0x88(SB)/8, $0x4001400140014001
DATA decode_const<>+0x90(SB)/8, $0x1000000110000001 // decode reshufling constant 1
DATA decode_const<>+0x98(SB)/8, $0x1000000110000001
DATA decode_const<>+0xA0(SB)/8, $0x0A09070605030201 // decode reshufling mask for ppc64le
DATA decode_const<>+0xA8(SB)/8, $0x000000000F0E0D0B
DATA decode_const<>+0xB0(SB)/8, $0x010203050607090A // decode reshufling mask for ppc64
DATA decode_const<>+0xB8(SB)/8, $0x0B0D0E0F00000000
GLOBL decode_const<>(SB), (NOPTR+RODATA), $192

#define REV_BYTES V0
#define RESHUFFLE_MASK V1
#define SHIFT_RIGHT_MASK V2
Expand Down Expand Up @@ -105,3 +131,192 @@ loop:
done:
MOVD R7, ret+56(FP)
RET

#undef RESHUFFLE_MASK
#undef SHIFT_RIGHT_MASK
#undef MULHI_MASK
#undef SHIFT_LEFT_MASK
#undef MULLO_MASK
#undef RANGE1_END
#undef RANGE0_END
#undef LUT
#undef X0
#undef X1
#undef X2

#define NIBBLE_MASK V1
#define LUT_HI V2
#define LUT_LO V3
#define DECODE_END V4
#define LUT_ROLL V5
#define RESHUFFLE_CONST0 V6
#define RESHUFFLE_CONST1 V7
#define RESHUFFLE_MASK V8
#define FOUR V9

#define X0 V10
#define X1 V11
#define X2 V12
#define X3 V13
#define ZERO V14

//func decodeStdAsm(dst, src []byte) int
TEXT ·decodeStdAsm(SB),NOSPLIT,$0
MOVD dst_base+0(FP), R4
MOVD src_base+24(FP), R5
MOVD src_len+32(FP), R6

// Load constants
#ifdef GOARCH_ppc64le
MOVD $base64_const<>(SB), R8
LXVD2X (R8), REV_BYTES
#endif
VSPLTISB $0, ZERO
VSPLTISB $0x4, FOUR
VSPLTISB $0x0F, NIBBLE_MASK
MOVD $decode_const<>(SB), R8
LXVD2X (R8), LUT_HI
MOVD $0x10, R9
LXVD2X (R8)(R9), LUT_LO
MOVD $0x20, R9
LXVD2X (R8)(R9), DECODE_END
MOVD $0x30, R9
LXVD2X (R8)(R9), LUT_ROLL
MOVD $0x80, R9
LXVD2X (R8)(R9), RESHUFFLE_CONST0
MOVD $0x90, R9
LXVD2X (R8)(R9), RESHUFFLE_CONST1
#ifdef GOARCH_ppc64le
MOVD $0xA0, R9
#else
MOVD $0xB0, R9
#endif
LXVD2X (R8)(R9), RESHUFFLE_MASK

MOVD $0, R7
MOVD R7, R8
loop:
// load data
LXVD2X (R5)(R7), X0
#ifdef GOARCH_ppc64le
VPERM X0, X0, REV_BYTES, X0
#endif
// validate input
VSRW(X0, FOUR, X1)
VAND(X1, NIBBLE_MASK, X1) // high nibble
VAND(X0, NIBBLE_MASK, X2)
VPERM LUT_HI, LUT_HI, X1, X3
VPERM LUT_LO, LUT_LO, X2, X2
VAND(X3, X2, X2)
VCMPEQUBCC X2, ZERO, X3
BGE CR6, done

// translate
VCMPEQUB X0, DECODE_END, X2
VADDUBM X1, X2, X1

VPERM LUT_ROLL, LUT_ROLL, X1, X1
VADDUBM X0, X1, X0

// PMADDUBSW
VMULEUB X0, RESHUFFLE_CONST0, X1
VMULOUB X0, RESHUFFLE_CONST0, X2
VADDUHM X1, X2, X0
// PMADDWD
VMULEUH X0, RESHUFFLE_CONST1, X1
VMULOUH X0, RESHUFFLE_CONST1, X2
VADDUWM X1, X2, X0

VPERM X0, X0, RESHUFFLE_MASK, X0
STXVD2X X0, (R4)(R8)

ADD $-16, R6
ADD $16, R7
ADD $12, R8
CMP R6, $24
BGE loop

done:
MOVD R6, ret+48(FP)
RET

//func decodeUrlAsm(dst, src []byte) int
TEXT ·decodeUrlAsm(SB),NOSPLIT,$0
MOVD dst_base+0(FP), R4
MOVD src_base+24(FP), R5
MOVD src_len+32(FP), R6

// Load constants
#ifdef GOARCH_ppc64le
MOVD $base64_const<>(SB), R8
LXVD2X (R8), REV_BYTES
#endif
VSPLTISB $0, ZERO
VSPLTISB $0x4, FOUR
VSPLTISB $0x0F, NIBBLE_MASK
MOVD $decode_const<>(SB), R8
MOVD $0x40, R9
LXVD2X (R8)(R9), LUT_HI
MOVD $0x50, R9
LXVD2X (R8)(R9), LUT_LO
MOVD $0x60, R9
LXVD2X (R8)(R9), DECODE_END
MOVD $0x70, R9
LXVD2X (R8)(R9), LUT_ROLL
MOVD $0x80, R9
LXVD2X (R8)(R9), RESHUFFLE_CONST0
MOVD $0x90, R9
LXVD2X (R8)(R9), RESHUFFLE_CONST1
#ifdef GOARCH_ppc64le
MOVD $0xA0, R9
#else
MOVD $0xB0, R9
#endif
LXVD2X (R8)(R9), RESHUFFLE_MASK

MOVD $0, R7
MOVD R7, R8
loop:
// load data
LXVD2X (R5)(R7), X0
#ifdef GOARCH_ppc64le
VPERM X0, X0, REV_BYTES, X0
#endif
// validate input
VSRW(X0, FOUR, X1)
VAND(X1, NIBBLE_MASK, X1) // high nibble
VAND(X0, NIBBLE_MASK, X2)
VPERM LUT_HI, LUT_HI, X1, X3
VPERM LUT_LO, LUT_LO, X2, X2
VAND(X3, X2, X2)
VCMPEQUBCC X2, ZERO, X3
BGE CR6, done

// translate
VCMPGTUB X0, DECODE_END, X2
VSUBUBM X1, X2, X1

VPERM LUT_ROLL, LUT_ROLL, X1, X1
VADDUBM X0, X1, X0

// PMADDUBSW
VMULEUB X0, RESHUFFLE_CONST0, X1
VMULOUB X0, RESHUFFLE_CONST0, X2
VADDUHM X1, X2, X0
// PMADDWD
VMULEUH X0, RESHUFFLE_CONST1, X1
VMULOUH X0, RESHUFFLE_CONST1, X2
VADDUWM X1, X2, X0

VPERM X0, X0, RESHUFFLE_MASK, X0
STXVD2X X0, (R4)(R8)

ADD $-16, R6
ADD $16, R7
ADD $12, R8
CMP R6, $24
BGE loop

done:
MOVD R6, ret+48(FP)
RET
84 changes: 84 additions & 0 deletions base64_ppc64x_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,87 @@ func TestStdEncodeAsm(t *testing.T) {

}
}

func TestStdDecodeSIMD(t *testing.T) {
pairs := []testpair{
{"abcdefghijkl", "YWJjZGVmZ2hpamtsYWJjZGVmZ2hpamts"},
{"\x2b\xf7\xcc\x27\x01\xfe\x43\x97\xb4\x9e\xbe\xed", "K/fMJwH+Q5e0nr7tK/fMJwH+Q5e0nr7t"},
{"abcdefghijklabcdefghijklabcdefghijkl", "YWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamts"},
}
for _, p := range pairs {
expected := []byte(p.decoded)
src := []byte(p.encoded)
dst := make([]byte, len(expected))

ret := decodeStdAsm(dst, src)
if ret == len(src) {
t.Errorf("should return decode")
}
if !bytes.Equal(dst, expected) {
t.Errorf("got %x, expected %x", dst, expected)
}
}
}

func TestURLEncodeSIMD(t *testing.T) {
pairs := []testpair{
{"!?$*&()'-=@~0000", "IT8kKiYoKSctPUB-"},
{"\x2b\xf7\xcc\x27\x01\xfe\x43\x97\xb4\x9e\xbe\xed\x5a\xcc\x70\x90", "K_fMJwH-Q5e0nr7t"},
{"!?$*&()'-=@~!?$*&()'-=@~0000", "IT8kKiYoKSctPUB-IT8kKiYoKSctPUB-"},
}
for _, p := range pairs {
src := []byte(p.decoded)
expected := []byte(p.encoded)
dst := make([]byte, len(expected))

ret := encodeAsm(dst, src, &encodeURLLut)
if ret != len(expected) {
t.Errorf("should return %v", len(expected))
}
if !bytes.Equal(dst, expected) {
t.Errorf("got %v", string(dst))
}

}
}

func TestUrlDecodeSIMD(t *testing.T) {
pairs := []testpair{
{"!?$*&()'-=@~", "IT8kKiYoKSctPUB-IT8kKiYoKSctPUB-"},
{"\x2b\xf7\xcc\x27\x01\xfe\x43\x97\xb4\x9e\xbe\xed", "K_fMJwH-Q5e0nr7tK_fMJwH-Q5e0nr7t"},
{"!?$*&()'-=@~!?$*&()'-=@~!?$*&()'-=@~", "IT8kKiYoKSctPUB-IT8kKiYoKSctPUB-IT8kKiYoKSctPUB-IT8kKiYoKSctPUB-"},
}
for _, p := range pairs {
expected := []byte(p.decoded)
src := []byte(p.encoded)
dst := make([]byte, len(expected))

ret := decodeUrlAsm(dst, src)
if ret == len(src) {
t.Errorf("should return decode")
}
if !bytes.Equal(dst, expected) {
t.Errorf("got %x, expected %x", dst, expected)
}
}
}


func BenchmarkEncode(b *testing.B) {
data := make([]byte, 8192)
dst := make([]byte, StdEncoding.EncodedLen(8192))
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
StdEncoding.Encode(dst, data)
}
}

func BenchmarkDecode(b *testing.B) {
data := []byte(StdEncoding.EncodeToString(make([]byte, 8192)))
dbuf := make([]byte, StdEncoding.DecodedLen(len(data)))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
StdEncoding.Decode(dbuf, data)
}
}

0 comments on commit 72efebc

Please sign in to comment.