diff --git a/cmd/ksuid/main.go b/cmd/ksuid/main.go index f965cf1..833d54d 100644 --- a/cmd/ksuid/main.go +++ b/cmd/ksuid/main.go @@ -11,7 +11,7 @@ import ( "text/template" "time" - "github.com/segmentio/ksuid" + "github.com/signoz/ksuid" ) var ( @@ -128,7 +128,7 @@ func printTemplate(id ksuid.KSUID) { String string Raw string Time time.Time - Timestamp uint32 + Timestamp uint64 Payload string }{ String: id.String(), diff --git a/go.mod b/go.mod index 269c1aa..f853f32 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module github.com/segmentio/ksuid +module github.com/signoz/ksuid go 1.12 diff --git a/ksuid.go b/ksuid.go index 79bbe56..85ea330 100644 --- a/ksuid.go +++ b/ksuid.go @@ -15,14 +15,14 @@ import ( const ( // KSUID's epoch starts more recently so that the 32-bit number space gives a // significantly higher useful lifetime of around 136 years from March 2017. - // This number (14e8) was picked to be easy to remember. - epochStamp int64 = 1400000000 + // This number (14e8 in seconds) was picked to be easy to remember. + epochStamp int64 = 1400000000000000000 // Timestamp is a uint32 - timestampLengthInBytes = 4 + timestampLengthInBytes = 8 // Payload is 16-bytes - payloadLengthInBytes = 16 + payloadLengthInBytes = 12 // KSUIDs are 20 bytes when binary encoded byteLength = timestampLengthInBytes + payloadLengthInBytes @@ -38,8 +38,9 @@ const ( ) // KSUIDs are 20 bytes: -// 00-03 byte: uint32 BE UTC timestamp with custom epoch -// 04-19 byte: random "payload" +// +// 00-07 byte: uint64 BE UTC timestamp with nanosecond epoch +// 08-19 byte: random "payload" type KSUID [byteLength]byte var ( @@ -71,8 +72,8 @@ func (i KSUID) Time() time.Time { // The timestamp portion of the ID as a bare integer which is uncorrected // for KSUID's special epoch. -func (i KSUID) Timestamp() uint32 { - return binary.BigEndian.Uint32(i[:timestampLengthInBytes]) +func (i KSUID) Timestamp() uint64 { + return binary.BigEndian.Uint64(i[:timestampLengthInBytes]) } // The 16-byte random payload without the timestamp @@ -201,12 +202,12 @@ func ParseOrNil(s string) KSUID { return ksuid } -func timeToCorrectedUTCTimestamp(t time.Time) uint32 { - return uint32(t.Unix() - epochStamp) +func timeToCorrectedUTCTimestamp(t time.Time) uint64 { + return uint64(t.UnixNano() - epochStamp) } -func correctedUTCTimestampToTime(ts uint32) time.Time { - return time.Unix(int64(ts)+epochStamp, 0) +func correctedUTCTimestampToTime(ts uint64) time.Time { + return time.Unix(0, int64(ts)+epochStamp) } // Generates a new KSUID. In the strange case that random bytes @@ -241,7 +242,7 @@ func NewRandomWithTime(t time.Time) (ksuid KSUID, err error) { } ts := timeToCorrectedUTCTimestamp(t) - binary.BigEndian.PutUint32(ksuid[:timestampLengthInBytes], ts) + binary.BigEndian.PutUint64(ksuid[:timestampLengthInBytes], ts) return } @@ -254,7 +255,7 @@ func FromParts(t time.Time, payload []byte) (KSUID, error) { var ksuid KSUID ts := timeToCorrectedUTCTimestamp(t) - binary.BigEndian.PutUint32(ksuid[:timestampLengthInBytes], ts) + binary.BigEndian.PutUint64(ksuid[:timestampLengthInBytes], ts) copy(ksuid[timestampLengthInBytes:], payload) @@ -353,11 +354,11 @@ func quickSort(a []KSUID, lo int, hi int) { // Next returns the next KSUID after id. func (id KSUID) Next() KSUID { - zero := makeUint128(0, 0) + zero := makeUint96(0, 0) t := id.Timestamp() - u := uint128Payload(id) - v := add128(u, makeUint128(0, 1)) + u := uint96Payload(id) + v := add96(u, makeUint96(0, 1)) if v == zero { // overflow t++ @@ -368,11 +369,11 @@ func (id KSUID) Next() KSUID { // Prev returns the previoud KSUID before id. func (id KSUID) Prev() KSUID { - max := makeUint128(math.MaxUint64, math.MaxUint64) + max := makeUint96(math.MaxUint32, math.MaxUint64) t := id.Timestamp() - u := uint128Payload(id) - v := sub128(u, makeUint128(0, 1)) + u := uint96Payload(id) + v := sub96(u, makeUint96(0, 1)) if v == max { // overflow t-- diff --git a/ksuid_test.go b/ksuid_test.go index 3620187..e38fdcc 100644 --- a/ksuid_test.go +++ b/ksuid_test.go @@ -7,6 +7,7 @@ import ( "fmt" "sort" "strings" + "sync" "testing" "time" ) @@ -307,9 +308,11 @@ func TestPrevNext(t *testing.T) { func TestGetTimestamp(t *testing.T) { nowTime := time.Now() x, _ := NewRandomWithTime(nowTime) + y, _ := NewRandomWithTime(time.Now()) + fmt.Println(x, y) xTime := int64(x.Timestamp()) - unix := nowTime.Unix() - if xTime != unix - epochStamp { + unix := nowTime.UnixNano() + if xTime != unix-epochStamp { t.Fatal(xTime, "!=", unix) } } @@ -387,3 +390,72 @@ func BenchmarkNew(b *testing.B) { } }) } + +// TestTimeMonotonicity verifies timestamps are monotonically increasing +func TestTimeMonotonicity(t *testing.T) { + count := 10000 + ids := make([]KSUID, count) + + for i := 0; i < count; i++ { + ids[i] = New() + } + + // Verify timestamps are monotonic + for i := 1; i < count; i++ { + if ids[i].Time().Before(ids[i-1].Time()) { + t.Errorf("Time monotonicity violated at index %d", i) + } + } +} + +// TestConcurrentUniqueness verifies no collisions in concurrent generation +func TestConcurrentUniqueness(t *testing.T) { + count := 100000 + ids := make([]KSUID, count) + var wg sync.WaitGroup + + for i := 0; i < count; i++ { + wg.Add(1) + go func(index int) { + defer wg.Done() + ids[index] = New() + }(i) + } + wg.Wait() + + // Check for duplicates + seen := make(map[KSUID]bool) + for _, id := range ids { + if seen[id] { + t.Error("Duplicate KSUID found") + } + seen[id] = true + } +} + +// BenchmarkCollisionProbability generates many IDs in same nanosecond +func BenchmarkCollisionProbability(b *testing.B) { + b.StopTimer() + ids := make([]KSUID, b.N) + + // Force same timestamp for all IDs + timestamp := time.Now() + b.StartTimer() + + for i := 0; i < b.N; i++ { + ids[i], _ = NewRandomWithTime(timestamp) + } + + // Check for collisions + seen := make(map[KSUID]bool) + collisions := 0 + + for _, id := range ids { + if seen[id] { + collisions++ + } + seen[id] = true + } + + b.ReportMetric(float64(collisions), "collisions") +} diff --git a/rand.go b/rand.go index 66edbd4..28dc831 100644 --- a/rand.go +++ b/rand.go @@ -49,7 +49,8 @@ type randSourceReader struct { func (r *randSourceReader) Read(b []byte) (int, error) { // optimized for generating 16 bytes payloads - binary.LittleEndian.PutUint64(b[:8], r.source.Uint64()) - binary.LittleEndian.PutUint64(b[8:], r.source.Uint64()) + val := r.source.Uint64() + binary.LittleEndian.PutUint32(b[:4], uint32(val)) // Use lower 32 bits + binary.LittleEndian.PutUint64(b[4:], r.source.Uint64()) // Generate new 64 bits return 16, nil } diff --git a/set.go b/set.go index a6b0e65..ef7ac11 100644 --- a/set.go +++ b/set.go @@ -69,7 +69,7 @@ func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { if !IsSorted(ids) { Sort(ids) } - one := makeUint128(0, 1) + one := makeUint96(0, 1) // The first KSUID is always written to the set, this is the starting // point for all deltas. @@ -78,7 +78,7 @@ func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { timestamp := ids[0].Timestamp() lastKSUID := ids[0] - lastValue := uint128Payload(ids[0]) + lastValue := uint96Payload(ids[0]) for i := 1; i != len(ids); i++ { id := ids[i] @@ -88,25 +88,25 @@ func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { } t := id.Timestamp() - v := uint128Payload(id) + v := uint96Payload(id) if t != timestamp { d := t - timestamp - n := varintLength32(d) + n := varintLength64(d) set = append(set, timeDelta|byte(n)) - set = appendVarint32(set, d, n) + set = appendVarint64(set, d, n) set = append(set, id[timestampLengthInBytes:]...) timestamp = t } else { - d := sub128(v, lastValue) + d := sub96(v, lastValue) if d != one { - n := varintLength128(d) + n := varintLength96(d) set = append(set, payloadDelta|byte(n)) - set = appendVarint128(set, d, n) + set = appendVarint96(set, d, n) } else { l, c := rangeLength(ids[i+1:], t, id, v) m := uint64(l + 1) @@ -117,7 +117,7 @@ func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { i += c id = ids[i] - v = uint128Payload(id) + v = uint96Payload(id) } } @@ -128,8 +128,8 @@ func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { return CompressedSet(set) } -func rangeLength(ids []KSUID, timestamp uint32, lastKSUID KSUID, lastValue uint128) (length int, count int) { - one := makeUint128(0, 1) +func rangeLength(ids []KSUID, timestamp uint64, lastKSUID KSUID, lastValue uint96) (length int, count int) { + one := makeUint96(0, 1) for i := range ids { id := ids[i] @@ -143,9 +143,9 @@ func rangeLength(ids []KSUID, timestamp uint32, lastKSUID KSUID, lastValue uint1 return } - v := uint128Payload(id) + v := uint96Payload(id) - if sub128(v, lastValue) != one { + if sub96(v, lastValue) != one { count = i return } @@ -164,6 +164,11 @@ func appendVarint128(b []byte, v uint128, n int) []byte { return append(b, c[len(c)-n:]...) } +func appendVarint96(b []byte, v uint96, n int) []byte { + c := v.bytes() + return append(b, c[len(c)-n:]...) +} + func appendVarint64(b []byte, v uint64, n int) []byte { c := [8]byte{} binary.BigEndian.PutUint64(c[:], v) @@ -182,6 +187,12 @@ func varint128(b []byte) uint128 { return makeUint128FromPayload(a[:]) } +func varint96(b []byte) uint96 { + a := [12]byte{} + copy(a[12-len(b):], b) + return makeUint96FromPayload(a[:]) +} + func varint64(b []byte) uint64 { a := [8]byte{} copy(a[8-len(b):], b) @@ -201,6 +212,16 @@ func varintLength128(v uint128) int { return varintLength64(v[0]) } +func varintLength96(v uint96) int { + if v[2] != 0 { + return 8 + varintLength32(v[2]) + } + if v[1] != 0 { + return 4 + varintLength32(v[1]) + } + return varintLength32(v[0]) +} + func varintLength64(v uint64) int { switch { case (v & 0xFFFFFFFFFFFFFF00) == 0: @@ -263,15 +284,15 @@ type CompressedSetIter struct { offset int seqlength uint64 - timestamp uint32 - lastValue uint128 + timestamp uint64 + lastValue uint96 } // Next moves the iterator forward, returning true if there a KSUID was found, // or false if the iterator as reached the end of the set it was created from. func (it *CompressedSetIter) Next() bool { if it.seqlength != 0 { - value := incr128(it.lastValue) + value := incr96(it.lastValue) it.KSUID = value.ksuid(it.timestamp) it.seqlength-- it.lastValue = value @@ -298,27 +319,27 @@ func (it *CompressedSetIter) Next() bool { it.offset = off1 it.timestamp = it.KSUID.Timestamp() - it.lastValue = uint128Payload(it.KSUID) + it.lastValue = uint96Payload(it.KSUID) case timeDelta: off0 := it.offset off1 := off0 + cnt off2 := off1 + payloadLengthInBytes - it.timestamp += varint32(it.content[off0:off1]) + it.timestamp += varint64(it.content[off0:off1]) - binary.BigEndian.PutUint32(it.KSUID[:timestampLengthInBytes], it.timestamp) + binary.BigEndian.PutUint64(it.KSUID[:8], it.timestamp) copy(it.KSUID[timestampLengthInBytes:], it.content[off1:off2]) it.offset = off2 - it.lastValue = uint128Payload(it.KSUID) + it.lastValue = uint96Payload(it.KSUID) case payloadDelta: off0 := it.offset off1 := off0 + cnt - delta := varint128(it.content[off0:off1]) - value := add128(it.lastValue, delta) + delta := varint96(it.content[off0:off1]) + value := add96(it.lastValue, delta) it.KSUID = value.ksuid(it.timestamp) it.offset = off1 @@ -328,7 +349,7 @@ func (it *CompressedSetIter) Next() bool { off0 := it.offset off1 := off0 + cnt - value := incr128(it.lastValue) + value := incr96(it.lastValue) it.KSUID = value.ksuid(it.timestamp) it.seqlength = varint64(it.content[off0:off1]) it.offset = off1 diff --git a/uint96.go b/uint96.go new file mode 100644 index 0000000..9b3e05d --- /dev/null +++ b/uint96.go @@ -0,0 +1,103 @@ +package ksuid + +import ( + "encoding/binary" + "fmt" + "math/bits" +) + +// uint96 represents an unsigned 96 bits little endian integer. + +// So there are two different endian considerations here: +// The internal array structure is little-endian (lowest bits in lowest index) +// The external byte serialization is big-endian (highest bits in lowest byte address) + +type uint96 [3]uint32 // [0] holds low 32 bits, [1] holds middle 32 bits, [2] holds high 32 bits + +func uint96Payload(ksuid KSUID) uint96 { + return makeUint96FromPayload(ksuid[timestampLengthInBytes:]) +} + +// uint32(low): Takes the lowest 32 bits of the low uint64 +// uint32(low >> 32): Shifts the low value right by 32 bits and takes the result, giving us the middle 32 bits +func makeUint96(high uint32, low uint64) uint96 { + return uint96{ + uint32(low), // lowest 32 bits + uint32(low >> 32), // middle 32 bits + high, // highest 32 bits + } +} + +func makeUint96FromPayload(payload []byte) uint96 { + return uint96{ + binary.BigEndian.Uint32(payload[8:]), // low (4 bytes) + binary.BigEndian.Uint32(payload[4:8]), // middle (4 bytes) + binary.BigEndian.Uint32(payload[:4]), // high (4 bytes) + } +} + +func (v uint96) ksuid(timestamp uint64) (out KSUID) { + binary.BigEndian.PutUint64(out[:8], timestamp) // time (8 bytes) + binary.BigEndian.PutUint32(out[8:12], v[2]) // high (4 bytes) + binary.BigEndian.PutUint32(out[12:16], v[1]) // middle (4 bytes) + binary.BigEndian.PutUint32(out[16:], v[0]) // low (4 bytes) + return +} + +// The external byte serialization is big-endian (highest bits in lowest byte address) +func (v uint96) bytes() (out [12]byte) { + binary.BigEndian.PutUint32(out[:4], v[2]) + binary.BigEndian.PutUint32(out[4:8], v[1]) + binary.BigEndian.PutUint32(out[8:], v[0]) + return +} + +func (v uint96) String() string { + return fmt.Sprintf("0x%08X%08X%08X", v[2], v[1], v[0]) +} + +func cmp96(x, y uint96) int { + if x[2] < y[2] { + return -1 + } + if x[2] > y[2] { + return 1 + } + if x[1] < y[1] { + return -1 + } + if x[1] > y[1] { + return 1 + } + if x[0] < y[0] { + return -1 + } + if x[0] > y[0] { + return 1 + } + return 0 +} + +func add96(x, y uint96) (z uint96) { + var c uint32 + z[0], c = bits.Add32(x[0], y[0], 0) + z[1], c = bits.Add32(x[1], y[1], c) + z[2], _ = bits.Add32(x[2], y[2], c) + return +} + +func sub96(x, y uint96) (z uint96) { + var b uint32 + z[0], b = bits.Sub32(x[0], y[0], 0) + z[1], b = bits.Sub32(x[1], y[1], b) + z[2], _ = bits.Sub32(x[2], y[2], b) + return +} + +func incr96(x uint96) (z uint96) { + var c uint32 + z[0], c = bits.Add32(x[0], 1, 0) + z[1], c = bits.Add32(x[1], c, 0) + z[2], _ = bits.Add32(x[2], c, 0) + return +} diff --git a/uint96_test.go b/uint96_test.go new file mode 100644 index 0000000..d2b6113 --- /dev/null +++ b/uint96_test.go @@ -0,0 +1,286 @@ +package ksuid + +import ( + "fmt" + "testing" +) + +func TestMakeUint96(t *testing.T) { + tests := []struct { + name string + high uint32 + low uint64 + expected uint96 + }{ + { + name: "zero values", + high: 0, + low: 0, + expected: uint96{0, 0, 0}, + }, + { + name: "only low bits set", + high: 0, + low: 0x123456789A, + expected: uint96{0x3456789A, 0x12, 0x0}, + }, + { + name: "only high bits set", + high: 0xABCDEF12, + low: 0, + expected: uint96{0, 0, 0xABCDEF12}, + }, + { + name: "all parts set", + high: 0x12345678, + low: 0xABCDEF0123456789, + expected: uint96{0x23456789, 0xABCDEF01, 0x12345678}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := makeUint96(test.high, test.low) + if result != test.expected { + t.Errorf("makeUint96(%#x, %#x) = %v, want %v", + test.high, test.low, result, test.expected) + } + }) + } +} + +func TestCmp96(t *testing.T) { + tests := []struct { + x uint96 + y uint96 + result int + }{ + { + x: makeUint96(0, 0), + y: makeUint96(0, 0), + result: 0, + }, + { + x: makeUint96(0, 1), + y: makeUint96(0, 0), + result: +1, + }, + { + x: makeUint96(0, 0), + y: makeUint96(0, 1), + result: -1, + }, + { + x: makeUint96(1, 0), + y: makeUint96(0, 1), + result: +1, + }, + { + x: makeUint96(0, 1), + y: makeUint96(1, 0), + result: -1, + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("cmp96(%s,%s)", test.x, test.y), func(t *testing.T) { + if result := cmp96(test.x, test.y); result != test.result { + t.Error(result, "!=", test.result) + } + }) + } +} + +func TestAdd96(t *testing.T) { + tests := []struct { + name string + x uint96 + y uint96 + result uint96 + }{ + { + name: "zero plus zero equals zero", + x: makeUint96(0, 0), + y: makeUint96(0, 0), + result: makeUint96(0, 0), + }, + { + name: "one plus zero equals one", + x: makeUint96(0, 1), + y: makeUint96(0, 0), + result: makeUint96(0, 1), + }, + { + name: "zero plus one equals one", + x: makeUint96(0, 0), + y: makeUint96(0, 1), + result: makeUint96(0, 1), + }, + { + name: "high one plus low one", + x: makeUint96(1, 0), + y: makeUint96(0, 1), + result: makeUint96(1, 1), + }, + { + name: "low one plus high one", + x: makeUint96(0, 1), + y: makeUint96(1, 0), + result: makeUint96(1, 1), + }, + { + // x: 0x00000000_00000000_FFFFFFFF + // y: 0x00000000_00000000_00000001 + // ---------------------------------------- + // sum: 0x00000000_00000001_00000000 + name: "carry from low to middle word", + x: uint96{0xFFFFFFFF, 0, 0}, + y: uint96{1, 0, 0}, + result: uint96{0, 1, 0}, + }, + { + name: "overflow wraps to zero", + x: uint96{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + y: uint96{1, 0, 0}, + result: uint96{0, 0, 0}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if result := add96(test.x, test.y); result != test.result { + t.Error(result, "!=", test.result) + } + }) + } +} + +func TestSub96(t *testing.T) { + tests := []struct { + name string + x uint96 + y uint96 + result uint96 + }{ + { + name: "zero minus zero equals zero", + x: makeUint96(0, 0), + y: makeUint96(0, 0), + result: makeUint96(0, 0), + }, + { + name: "one minus zero equals one", + x: makeUint96(0, 1), + y: makeUint96(0, 0), + result: makeUint96(0, 1), + }, + { + name: "zero minus one equals max value", + x: makeUint96(0, 0), + y: makeUint96(0, 1), + result: uint96{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + }, + { + // 00,000001,000000 + // - 00,000000,000001 + // = 00,000000,FFFFFF + name: "borrow from middle word", + x: uint96{0, 1, 0}, + y: uint96{1, 0, 0}, + result: uint96{0xFFFFFFFF, 0, 0}, + }, + { + name: "borrow from high word", + x: uint96{0, 0, 1}, + y: uint96{1, 0, 0}, + result: uint96{0xFFFFFFFF, 0xFFFFFFFF, 0}, + }, + { + name: "chain of borrows", + x: uint96{0, 0, 1}, + y: uint96{1, 1, 0}, + result: uint96{0xFFFFFFFF, 0xFFFFFFFE, 0}, + }, + { + name: "borrow across all words", + x: uint96{0, 0, 1}, + y: uint96{1, 1, 1}, + result: uint96{0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if result := sub96(test.x, test.y); result != test.result { + t.Errorf("%s: got %v, want %v", test.name, result, test.result) + } + }) + } +} + +func TestIncr96(t *testing.T) { + tests := []struct { + name string + x uint96 + result uint96 + }{ + { + name: "zero plus one equals one", + x: makeUint96(0, 0), + result: makeUint96(0, 1), + }, + { + name: "carry from low to middle word", + x: uint96{0xFFFFFFFF, 0, 0}, + result: uint96{0, 1, 0}, + }, + { + name: "overflow wraps to zero", + x: uint96{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, + result: makeUint96(0, 0), + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("incr96(%s)", test.x), func(t *testing.T) { + if result := incr96(test.x); result != test.result { + t.Error(result, "!=", test.result) + } + }) + } +} + +func BenchmarkCmp96(b *testing.B) { + x := makeUint96(0, 0) + y := makeUint96(0, 0) + + for i := 0; i != b.N; i++ { + cmp96(x, y) + } +} + +func BenchmarkAdd96(b *testing.B) { + x := makeUint96(0, 0) + y := makeUint96(0, 0) + + for i := 0; i != b.N; i++ { + add96(x, y) + } +} + +func BenchmarkSub96(b *testing.B) { + x := makeUint96(0, 0) + y := makeUint96(0, 0) + + for i := 0; i != b.N; i++ { + sub96(x, y) + } +} + +func BenchmarkIncr96(b *testing.B) { + x := makeUint96(0, 0) + + for i := 0; i != b.N; i++ { + incr96(x) + } +}