Skip to content

Commit

Permalink
Remove priority queue from RHTPQMap (#462)
Browse files Browse the repository at this point in the history
Co-authored-by: Youngteac Hong <susukang98@gmail.com>
  • Loading branch information
blurfx and hackerwins authored Feb 16, 2023
1 parent 61e6953 commit 3d9fe9d
Show file tree
Hide file tree
Showing 17 changed files with 265 additions and 560 deletions.
6 changes: 3 additions & 3 deletions api/converter/from_bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
// BytesToObject creates an Object from the given byte array.
func BytesToObject(snapshot []byte) (*crdt.Object, error) {
if snapshot == nil {
return crdt.NewObject(crdt.NewRHTPriorityQueueMap(), time.InitialTicket), nil
return crdt.NewObject(crdt.NewElementRHT(), time.InitialTicket), nil
}

pbElem := &api.JSONElement{}
Expand Down Expand Up @@ -63,13 +63,13 @@ func fromJSONElement(pbElem *api.JSONElement) (crdt.Element, error) {
}

func fromJSONObject(pbObj *api.JSONElement_JSONObject) (*crdt.Object, error) {
members := crdt.NewRHTPriorityQueueMap()
members := crdt.NewElementRHT()
for _, pbNode := range pbObj.Nodes {
elem, err := fromJSONElement(pbNode.Element)
if err != nil {
return nil, err
}
members.SetInternal(pbNode.Key, elem)
members.Set(pbNode.Key, elem)
}

createdAt, err := fromTimeTicket(pbObj.CreatedAt)
Expand Down
2 changes: 1 addition & 1 deletion api/converter/from_pb.go
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ func fromElement(pbElement *api.JSONElementSimple) (crdt.Element, error) {
return nil, err
}
return crdt.NewObject(
crdt.NewRHTPriorityQueueMap(),
crdt.NewElementRHT(),
createdAt,
), nil
case api.ValueType_VALUE_TYPE_JSON_ARRAY:
Expand Down
2 changes: 1 addition & 1 deletion api/converter/to_bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func toCounter(counter *crdt.Counter) (*api.JSONElement, error) {
}, nil
}

func toRHTNodes(rhtNodes []*crdt.RHTPQMapNode) ([]*api.RHTNode, error) {
func toRHTNodes(rhtNodes []*crdt.ElementRHTNode) ([]*api.RHTNode, error) {
var pbRHTNodes []*api.RHTNode
for _, rhtNode := range rhtNodes {
pbElem, err := toJSONElement(rhtNode.Element())
Expand Down
9 changes: 4 additions & 5 deletions design/data-structure.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: data-structure
target-version: 0.3.0
target-version: 0.3.1
---

# Data Structures
Expand Down Expand Up @@ -69,18 +69,17 @@ The code above uses `Primitive`, `Object` and `Array` in JSON-like group.
CRDT data structures are used by JSON-like group to resolve conflicts in concurrent editing.

- `RHT`(Replicated Hash Table): similar to hash table, but resolves concurrent-editing conflicts.
- `RHTPQMap`: extended `RHT` with a [priority queue](https://en.wikipedia.org/wiki/Priority_queue) to resolve conflicts for the same key. Logically added later will have higher priority([LWW, Last Writer Win](https://crdt.tech/glossary)).
- `ElementRHT`: similar to `RHT`, but has elements as values.
- `RGATreeList`: extended `RGA(Replicated Growable Array)` with an additional index tree. The index tree manages the indices of elements and provides faster access to elements at the int-based index.
- `RGATreeSplit`: extended `RGATreeList` allowing characters to be represented as blocks rather than each single character.

### Common Group

Common data structures can be used for general purposes.

- `Heap`: A priority queue. We use [max heap](https://en.wikipedia.org/wiki/Heap_(data_structure)); the last added value has the highest priority(LWW).
- `SplayTree`: A tree that moves nodes to the root by [splaying](https://en.wikipedia.org/wiki/Splay_tree#Splaying). This is effective when user frequently access the same location, such as text editing. We use `SplayTree` as an index tree to give each node a weight, and to quickly access the node based on the index.
- [`SplayTree`](https://en.wikipedia.org/wiki/Splay_tree): A tree that moves nodes to the root by splaying. This is effective when user frequently access the same location, such as text editing. We use `SplayTree` as an index tree to give each node a weight, and to quickly access the node based on the index.
- [`LLRBTree`](https://en.wikipedia.org/wiki/Left-leaning_red%E2%80%93black_tree): A tree simpler than Red-Black Tree. Newly added `floor` method finds the node of the largest key less than or equal to the given key.
- `Trie`: A data structure that can quickly search for prefixes of sequence data such as strings. We use `Trie` to remove nested events when the contents of the `Document`' are modified at once.
- [`Trie`](https://en.wikipedia.org/wiki/Trie): A data structure that can quickly search for prefixes of sequence data such as strings. We use `Trie` to remove nested events when the contents of the `Document`' are modified at once.

### Risks and Mitigation

Expand Down
Binary file modified design/media/data-structure.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
208 changes: 208 additions & 0 deletions pkg/document/crdt/element_rht.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
/*
* Copyright 2023 The Yorkie Authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package crdt

import (
"fmt"
"sort"
"strings"

"github.com/yorkie-team/yorkie/pkg/document/time"
)

// ElementRHTNode is a node of ElementRHT.
type ElementRHTNode struct {
key string
elem Element
}

func newElementRHTNode(key string, elem Element) *ElementRHTNode {
return &ElementRHTNode{
key: key,
elem: elem,
}
}

// Remove removes this node. It only marks the deleted time (tombstone).
func (n *ElementRHTNode) Remove(removedAt *time.Ticket) bool {
if removedAt != nil && removedAt.After(n.elem.CreatedAt()) {
if n.elem.RemovedAt() == nil || removedAt.After(n.elem.RemovedAt()) {
return n.elem.Remove(removedAt)
}
}
return false
}

func (n *ElementRHTNode) isRemoved() bool {
return n.elem.RemovedAt() != nil
}

// Key returns the key of this node.
func (n *ElementRHTNode) Key() string {
return n.key
}

// Element returns the element of this node.
func (n *ElementRHTNode) Element() Element {
return n.elem
}

// ElementRHT is a hashtable with logical clock(Replicated hashtable).
type ElementRHT struct {
// nodeMapByKey is a map with values of nodes by key.
nodeMapByKey map[string]*ElementRHTNode
// nodeMapByCreatedAt is a map with values of nodes by creation time.
// Even if an element is removed by `set` or `delete`, it remains in
// nodeMapByCreatedAt and will be deleted physically by GC.
nodeMapByCreatedAt map[string]*ElementRHTNode
}

// NewElementRHT creates a new instance of ElementRHT.
func NewElementRHT() *ElementRHT {
return &ElementRHT{
nodeMapByKey: make(map[string]*ElementRHTNode),
nodeMapByCreatedAt: make(map[string]*ElementRHTNode),
}
}

// Get returns the value of the given key.
func (rht *ElementRHT) Get(key string) Element {
if node, ok := rht.nodeMapByKey[key]; ok {
if node.isRemoved() {
return nil
}
return node.elem
}
return nil
}

// Has returns whether the element exists of the given key or not.
func (rht *ElementRHT) Has(key string) bool {
if node, ok := rht.nodeMapByKey[key]; ok {
return node != nil && !node.isRemoved()
}
return false
}

// Set sets the value of the given key. If there is an existing value, it is removed.
func (rht *ElementRHT) Set(k string, v Element) Element {
node, ok := rht.nodeMapByKey[k]
var removed Element
if ok && node.Remove(v.CreatedAt()) {
removed = node.elem
}
newNode := newElementRHTNode(k, v)
rht.nodeMapByCreatedAt[v.CreatedAt().Key()] = newNode
if !ok || v.CreatedAt().After(node.elem.CreatedAt()) {
rht.nodeMapByKey[k] = newNode
}

return removed
}

// Delete deletes the Element of the given key.
func (rht *ElementRHT) Delete(k string, deletedAt *time.Ticket) Element {
node, ok := rht.nodeMapByKey[k]
if !ok {
return nil
}

if !node.Remove(deletedAt) {
return nil
}

return node.elem
}

// DeleteByCreatedAt deletes the Element of the given creation time.
func (rht *ElementRHT) DeleteByCreatedAt(createdAt *time.Ticket, deletedAt *time.Ticket) Element {
node, ok := rht.nodeMapByCreatedAt[createdAt.Key()]
if !ok {
return nil
}

if !node.Remove(deletedAt) {
return nil
}

return node.elem
}

// Elements returns a map of elements because the map easy to use for loop.
// TODO: If we encounter performance issues, we need to replace this with other solution.
func (rht *ElementRHT) Elements() map[string]Element {
members := make(map[string]Element)
for _, node := range rht.nodeMapByKey {
if !node.isRemoved() {
members[node.key] = node.elem
}
}

return members
}

// Nodes returns a map of elements because the map easy to use for loop.
// TODO: If we encounter performance issues, we need to replace this with other solution.
func (rht *ElementRHT) Nodes() []*ElementRHTNode {
var nodes []*ElementRHTNode
for _, node := range rht.nodeMapByKey {
nodes = append(nodes, node)
}

return nodes
}

// purge physically purge child element.
func (rht *ElementRHT) purge(elem Element) {
node, ok := rht.nodeMapByCreatedAt[elem.CreatedAt().Key()]
if !ok {
panic("fail to find: " + elem.CreatedAt().Key())
}
delete(rht.nodeMapByCreatedAt, node.elem.CreatedAt().Key())

nodeByKey, ok := rht.nodeMapByKey[node.key]
if ok && node == nodeByKey {
delete(rht.nodeMapByKey, nodeByKey.key)
}
}

// Marshal returns the JSON encoding of this map.
func (rht *ElementRHT) Marshal() string {
members := rht.Elements()

size := len(members)

// Extract and sort the keys
keys := make([]string, 0, size)
for k := range members {
keys = append(keys, k)
}
sort.Strings(keys)

sb := strings.Builder{}
sb.WriteString("{")
for idx, k := range keys {
if idx > 0 {
sb.WriteString(",")
}
value := members[k]
sb.WriteString(fmt.Sprintf(`"%s":%s`, EscapeString(k), value.Marshal()))
}
sb.WriteString("}")

return sb.String()
}
12 changes: 6 additions & 6 deletions pkg/document/crdt/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ import (
// Object represents a JSON object, but unlike regular JSON, it has time
// tickets which is created by logical clock.
type Object struct {
memberNodes *RHTPriorityQueueMap
memberNodes *ElementRHT
createdAt *time.Ticket
movedAt *time.Ticket
removedAt *time.Ticket
}

// NewObject creates a new instance of Object.
func NewObject(memberNodes *RHTPriorityQueueMap, createdAt *time.Ticket) *Object {
func NewObject(memberNodes *ElementRHT, createdAt *time.Ticket) *Object {
return &Object{
memberNodes: memberNodes,
createdAt: createdAt,
Expand Down Expand Up @@ -95,10 +95,10 @@ func (o *Object) Marshal() string {

// DeepCopy copies itself deeply.
func (o *Object) DeepCopy() Element {
members := NewRHTPriorityQueueMap()
members := NewElementRHT()

for _, node := range o.memberNodes.Nodes() {
members.SetInternal(node.key, node.elem.DeepCopy())
members.Set(node.key, node.elem.DeepCopy())
}

obj := NewObject(members, o.createdAt)
Expand Down Expand Up @@ -141,7 +141,7 @@ func (o *Object) Remove(removedAt *time.Ticket) bool {
return false
}

// RHTNodes returns the RHTPriorityQueueMap nodes.
func (o *Object) RHTNodes() []*RHTPQMapNode {
// RHTNodes returns the ElementRHT nodes.
func (o *Object) RHTNodes() []*ElementRHTNode {
return o.memberNodes.Nodes()
}
2 changes: 1 addition & 1 deletion pkg/document/crdt/object_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func TestObject(t *testing.T) {
root := helper.TestRoot()
ctx := helper.TextChangeContext(root)

obj := crdt.NewObject(crdt.NewRHTPriorityQueueMap(), ctx.IssueTimeTicket())
obj := crdt.NewObject(crdt.NewElementRHT(), ctx.IssueTimeTicket())

obj.Set("k1", crdt.NewPrimitive("v1", ctx.IssueTimeTicket()))
assert.Equal(t, `{"k1":"v1"}`, obj.Marshal())
Expand Down
Loading

0 comments on commit 3d9fe9d

Please sign in to comment.