Skip to content

Commit

Permalink
feat: json string column
Browse files Browse the repository at this point in the history
  • Loading branch information
SpencerTorres committed Jan 29, 2025
1 parent e0f5a9a commit 1353ef1
Show file tree
Hide file tree
Showing 11 changed files with 330 additions and 0 deletions.
3 changes: 3 additions & 0 deletions compress/_golden/data_compressed_lz4hc.hex
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
00000000 34 39 ad b3 8d 96 d2 87 bb 3b aa 1e 3f 4b 64 f5 |49.......;..?Kd.|
00000010 82 1d 00 00 00 af 00 00 00 8f 48 65 6c 6c 6f 21 |..........Hello!|
00000020 0a 48 07 00 8d 70 48 65 6c 6c 6f 21 0a |.H...pHello!.|
Binary file added compress/_golden/data_compressed_lz4hc.raw
Binary file not shown.
6 changes: 6 additions & 0 deletions proto/_golden/col_json_str.hex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5|
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":|
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t|
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob|
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"|
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}|
Binary file added proto/_golden/col_json_str.raw
Binary file not shown.
6 changes: 6 additions & 0 deletions proto/_golden/col_json_str_bytes.hex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5|
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":|
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t|
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob|
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"|
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}|
Binary file added proto/_golden/col_json_str_bytes.raw
Binary file not shown.
6 changes: 6 additions & 0 deletions proto/_golden/column_of_json_bytes.hex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5|
00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":|
00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t|
00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob|
00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"|
00000050 63 22 3a 20 32 30 7d 7d |c": 20}}|
Binary file added proto/_golden/column_of_json_bytes.raw
Binary file not shown.
158 changes: 158 additions & 0 deletions proto/col_json_str.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package proto

import (
"github.com/go-faster/errors"
)

const JSONStringSerializationVersion uint64 = 1

// ColJSONStr represents String column.
//
// Use ColJSONBytes for []bytes ColumnOf implementation.
type ColJSONStr struct {
str ColStr
}

// Append string to column.
func (c *ColJSONStr) Append(v string) {
c.str.Append(v)
}

// AppendBytes append byte slice as string to column.
func (c *ColJSONStr) AppendBytes(v []byte) {
c.str.AppendBytes(v)
}

func (c *ColJSONStr) AppendArr(v []string) {
c.str.AppendArr(v)
}

// Compile-time assertions for ColJSONStr.
var (
_ ColInput = ColJSONStr{}
_ ColResult = (*ColJSONStr)(nil)
_ Column = (*ColJSONStr)(nil)
_ ColumnOf[string] = (*ColJSONStr)(nil)
_ Arrayable[string] = (*ColJSONStr)(nil)
)

// Type returns ColumnType of JSON.
func (ColJSONStr) Type() ColumnType {
return ColumnTypeJSON
}

// Rows returns count of rows in column.
func (c ColJSONStr) Rows() int {
return c.str.Rows()
}

// Reset resets data in row, preserving capacity for efficiency.
func (c *ColJSONStr) Reset() {
c.str.Reset()
}

// EncodeColumn encodes String rows to *Buffer.
func (c ColJSONStr) EncodeColumn(b *Buffer) {
b.PutUInt64(JSONStringSerializationVersion)

c.str.EncodeColumn(b)
}

// WriteColumn writes JSON rows to *Writer.
func (c ColJSONStr) WriteColumn(w *Writer) {
w.ChainBuffer(func(b *Buffer) {
b.PutUInt64(JSONStringSerializationVersion)
})

c.str.WriteColumn(w)
}

// ForEach calls f on each string from column.
func (c ColJSONStr) ForEach(f func(i int, s string) error) error {
return c.str.ForEach(f)
}

// First returns the first row of the column.
func (c ColJSONStr) First() string {
return c.str.First()
}

// Row returns row with number i.
func (c ColJSONStr) Row(i int) string {
return c.str.Row(i)
}

// RowBytes returns row with number i as byte slice.
func (c ColJSONStr) RowBytes(i int) []byte {
return c.str.RowBytes(i)
}

// ForEachBytes calls f on each string from column as byte slice.
func (c ColJSONStr) ForEachBytes(f func(i int, b []byte) error) error {
return c.str.ForEachBytes(f)
}

// DecodeColumn decodes String rows from *Reader.
func (c *ColJSONStr) DecodeColumn(r *Reader, rows int) error {
jsonSerializationVersion, err := r.UInt64()
if err != nil {
return errors.Wrap(err, "failed to read json serialization version")
}

if jsonSerializationVersion != JSONStringSerializationVersion {
return errors.Errorf("received invalid JSON string serialization version %d. Setting \"output_format_native_write_json_as_string\" must be enabled.", jsonSerializationVersion)
}

return c.str.DecodeColumn(r, rows)
}

// LowCardinality returns LowCardinality(JSON).
func (c *ColJSONStr) LowCardinality() *ColLowCardinality[string] {
return c.str.LowCardinality()
}

// Array is helper that creates Array(JSON).
func (c *ColJSONStr) Array() *ColArr[string] {
return c.str.Array()
}

// Nullable is helper that creates Nullable(JSON).
func (c *ColJSONStr) Nullable() *ColNullable[string] {
return c.str.Nullable()
}

// ColJSONBytes is ColJSONStr wrapper to be ColumnOf for []byte.
type ColJSONBytes struct {
ColJSONStr
}

// Row returns row with number i.
func (c ColJSONBytes) Row(i int) []byte {
return c.RowBytes(i)
}

// Append byte slice to column.
func (c *ColJSONBytes) Append(v []byte) {
c.AppendBytes(v)
}

// AppendArr append slice of byte slices to column.
func (c *ColJSONBytes) AppendArr(v [][]byte) {
for _, s := range v {
c.Append(s)
}
}

// Array is helper that creates Array(JSON).
func (c *ColJSONBytes) Array() *ColArr[[]byte] {
return &ColArr[[]byte]{
Data: c,
}
}

// Nullable is helper that creates Nullable(JSON).
func (c *ColJSONBytes) Nullable() *ColNullable[[]byte] {
return &ColNullable[[]byte]{
Values: c,
}
}
150 changes: 150 additions & 0 deletions proto/col_json_str_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package proto

import (
"bytes"
"io"
"testing"

"github.com/stretchr/testify/require"

"github.com/ClickHouse/ch-go/internal/gold"
)

var testJSONValues = []string{
"{\"x\": 5, \"y\": 10}",
"{\"a\": \"test\", \"b\": \"test2\"}",
"{\"a\": \"obj test\", \"b\": {\"c\": 20}}",
}

func TestColJSONBytes(t *testing.T) {
testColumn(t, "json_bytes", func() ColumnOf[[]byte] {
return new(ColJSONBytes)
}, []byte(testJSONValues[0]), []byte(testJSONValues[1]), []byte(testJSONValues[2]))
}

func TestColJSONStr_AppendBytes(t *testing.T) {
var data ColJSONStr

data.AppendBytes([]byte(testJSONValues[0]))
data.AppendBytes([]byte(testJSONValues[1]))
data.AppendBytes([]byte(testJSONValues[2]))

var buf Buffer
data.EncodeColumn(&buf)

t.Run("Golden", func(t *testing.T) {
gold.Bytes(t, buf.Buf, "col_json_str_bytes")
})
t.Run("Ok", func(t *testing.T) {
br := bytes.NewReader(buf.Buf)
r := NewReader(br)

var dec ColJSONStr
require.NoError(t, dec.DecodeColumn(r, 3))
require.Equal(t, data, dec)

t.Run("ForEach", func(t *testing.T) {
var output []string
f := func(i int, s string) error {
output = append(output, s)
return nil
}
require.NoError(t, dec.ForEach(f))
require.Equal(t, testJSONValues, output)
})
})
}

func TestColJSONStr_EncodeColumn(t *testing.T) {
var data ColJSONStr

input := testJSONValues
rows := len(input)
for _, s := range input {
data.Append(s)
}

var buf Buffer
data.EncodeColumn(&buf)

t.Run("Golden", func(t *testing.T) {
gold.Bytes(t, buf.Buf, "col_json_str")
})
t.Run("Ok", func(t *testing.T) {
br := bytes.NewReader(buf.Buf)
r := NewReader(br)

var dec ColJSONStr
require.NoError(t, dec.DecodeColumn(r, rows))
require.Equal(t, data, dec)

t.Run("ForEach", func(t *testing.T) {
var output []string
f := func(i int, s string) error {
output = append(output, s)
return nil
}
require.NoError(t, dec.ForEach(f))
require.Equal(t, input, output)
})
})
t.Run("EOF", func(t *testing.T) {
r := NewReader(bytes.NewReader(nil))

var dec ColJSONStr
require.ErrorIs(t, dec.DecodeColumn(r, rows), io.EOF)
})
}

func BenchmarkColJSONStr_DecodeColumn(b *testing.B) {
const rows = 1_000
var data ColJSONStr
for i := 0; i < rows; i++ {
data.Append("{\"x\": 5}")
}

var buf Buffer
data.EncodeColumn(&buf)

br := bytes.NewReader(buf.Buf)
r := NewReader(br)

var dec ColJSONStr
if err := dec.DecodeColumn(r, rows); err != nil {
b.Fatal(err)
}

b.SetBytes(int64(len(buf.Buf)))
b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
br.Reset(buf.Buf)
r.raw.Reset(br)
dec.Reset()

if err := dec.DecodeColumn(r, rows); err != nil {
b.Fatal(err)
}
}
}

func BenchmarkColJSONStr_EncodeColumn(b *testing.B) {
const rows = 1_000
var data ColJSONStr
for i := 0; i < rows; i++ {
data.Append("{\"x\": 5}")
}

var buf Buffer
data.EncodeColumn(&buf)

b.SetBytes(int64(len(buf.Buf)))
b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
buf.Reset()
data.EncodeColumn(&buf)
}
}
1 change: 1 addition & 0 deletions proto/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ const (
ColumnTypePoint ColumnType = "Point"
ColumnTypeInterval ColumnType = "Interval"
ColumnTypeNothing ColumnType = "Nothing"
ColumnTypeJSON ColumnType = "JSON"
)

// colWrap wraps Column with type t.
Expand Down

0 comments on commit 1353ef1

Please sign in to comment.