From 16fffbb2549e1b6f7e368fb2a38f77f5e1b3b083 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 25 Jul 2023 14:46:09 +0100 Subject: [PATCH 01/27] Don't store channel history for non-leaf revisions --- db/revtree.go | 37 +++++++----- db/revtree_test.go | 90 ++++++++++++++++++++++------- rest/adminapitest/admin_api_test.go | 10 +++- 3 files changed, 96 insertions(+), 41 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 08c2dc11bf..7abe076fd5 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -44,22 +44,21 @@ type RevTree map[string]*RevInfo // rev IDs, with a parallel array of parent indexes. Ordering in the arrays doesn't matter. // So the parent of Revs[i] is Revs[Parents[i]] (unless Parents[i] == -1, which denotes a root.) type revTreeList struct { - Revs []string `json:"revs"` // The revision IDs - Parents []int `json:"parents"` // Index of parent of each revision (-1 if root) - Deleted []int `json:"deleted,omitempty"` // Indexes of revisions that are deletions - Bodies_Old []string `json:"bodies,omitempty"` // JSON of each revision (legacy) - BodyMap map[string]string `json:"bodymap,omitempty"` // JSON of each revision - BodyKeyMap map[string]string `json:"bodyKeyMap,omitempty"` // Keys of revision bodies stored in external documents - Channels []base.Set `json:"channels"` - HasAttachments []int `json:"hasAttachments,omitempty"` // Indexes of revisions that has attachments + Revs []string `json:"revs"` // The revision IDs + Parents []int `json:"parents"` // Index of parent of each revision (-1 if root) + Deleted []int `json:"deleted,omitempty"` // Indexes of revisions that are deletions + Bodies_Old []string `json:"bodies,omitempty"` // JSON of each revision (legacy) + BodyMap map[string]string `json:"bodymap,omitempty"` // JSON of each revision + BodyKeyMap map[string]string `json:"bodyKeyMap,omitempty"` // Keys of revision bodies stored in external documents + ChannelsMap map[string]base.Set `json:"channelsMap,omitempty"` // Channels for non-winning leaf revisions (current channels stored outside of history, and don't have a use-case for storing non-leaf channels) + HasAttachments []int `json:"hasAttachments,omitempty"` // Indexes of revisions that has attachments } func (tree RevTree) MarshalJSON() ([]byte, error) { n := len(tree) rep := revTreeList{ - Revs: make([]string, n), - Parents: make([]int, n), - Channels: make([]base.Set, n), + Revs: make([]string, n), + Parents: make([]int, n), } revIndexes := map[string]int{"": -1} @@ -81,7 +80,13 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { rep.BodyKeyMap[strconv.FormatInt(int64(i), 10)] = info.BodyKey } } - rep.Channels[i] = info.Channels + // TODO: Check if leaf revision before storing + if len(info.Channels) > 0 { + if rep.ChannelsMap == nil { + rep.ChannelsMap = make(map[string]base.Set, 1) + } + rep.ChannelsMap[strconv.FormatInt(int64(i), 10)] = info.Channels + } if info.Deleted { if rep.Deleted == nil { rep.Deleted = make([]int, 0, 1) @@ -126,8 +131,8 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { return } - // validate revTreeList revs, parents and channels lists are of equal length - if !(len(rep.Revs) == len(rep.Parents) && len(rep.Revs) == len(rep.Channels)) { + // validate revTreeList revs and parents lists are of equal length + if !(len(rep.Revs) == len(rep.Parents)) { return errors.New("revtreelist data is invalid, revs/parents/channels counts are inconsistent") } @@ -149,8 +154,8 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { info.BodyKey = bodyKey } } - if rep.Channels != nil { - info.Channels = rep.Channels[i] + if c, ok := rep.ChannelsMap[stringIndex]; ok && c != nil { + info.Channels = c } parentIndex := rep.Parents[i] if parentIndex >= 0 { diff --git a/db/revtree_test.go b/db/revtree_test.go index 0b34f42e52..1675f94d92 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -23,22 +23,43 @@ import ( "github.com/stretchr/testify/require" ) -// 1-one -- 2-two -- 3-three -var testmap = RevTree{"3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}")}, - "2-two": {ID: "2-two", Parent: "1-one", Channels: base.SetOf("ABC", "CBS")}, - "1-one": {ID: "1-one", Channels: base.SetOf("ABC")}} - -// / 3-three +// testmap is a revtree with a single branch // -// 1-one -- 2-two +// ┌────────┐ ┌────────┐ ╔════════╗ +// │ 1-one │──│ 2-two │──║3-three ║ +// └────────┘ └────────┘ ╚════════╝ +var testmap = RevTree{ + "3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}")}, + "2-two": {ID: "2-two", Parent: "1-one"}, + "1-one": {ID: "1-one"}, +} + +// branchymap is a branched revtree with two revisions in conflict // -// \ 3-drei -var branchymap = RevTree{"3-three": {ID: "3-three", Parent: "2-two"}, - "2-two": {ID: "2-two", Parent: "1-one"}, - "1-one": {ID: "1-one"}, - "3-drei": {ID: "3-drei", Parent: "2-two"}} +// ┌────────┐ +// ┌─│3-three │ +// ┌────────┐ ┌────────┐ │ └────────┘ +// │ 1-one │──│ 2-two │─┤ +// └────────┘ └────────┘ │ ╔════════╗ +// └─║ 3-drei ║ +// ╚════════╝ +var branchymap = RevTree{ + "3-three": {ID: "3-three", Parent: "2-two", Channels: base.SetOf("EN")}, + "2-two": {ID: "2-two", Parent: "1-one"}, + "1-one": {ID: "1-one"}, + "3-drei": {ID: "3-drei", Parent: "2-two", Channels: base.SetOf("DE")}, // winner because lower ASCII value (d vs. t) +} -var multiroot = RevTree{"3-a": {ID: "3-a", Parent: "2-a"}, +// multiroot is a revtree with multiple roots (disconnected branches) +// +// ┌────────┐ ┌────────┐ ╔════════╗ +// │ 1-a │─│ 2-a │─║ 3-a ║ +// └────────┘ └────────┘ ╚════════╝ +// ┌────────┐ ┌────────┐ +// │ 6-b │─│ 7-b │ +// └────────┘ └────────┘ +var multiroot = RevTree{ + "3-a": {ID: "3-a", Parent: "2-a"}, "2-a": {ID: "2-a", Parent: "1-a"}, "1-a": {ID: "1-a"}, "7-b": {ID: "7-b", Parent: "6-b"}, @@ -51,11 +72,20 @@ type BranchSpec struct { Digest string } -// / 3-a -- 4-a -- 5-a ...... etc (winning branch) -// 1-a -- 2-a -// \ 3-b -- 4-b ... etc (losing branch #1) -// \ 3-c -- 4-c ... etc (losing branch #2) -// \ 3-d -- 4-d ... etc (losing branch #n) +// getMultiBranchTestRevtree1 returns a rev tree with the specified number of revisions/branches +// +// ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ╔════════╗ +// │ 1-a │─│ 2-a │─┬──│ 3-a │─│ 4-a │─║ 5-a ║ +// └────────┘ └────────┘ │ └────────┘ └────────┘ ╚════════╝ +// │ ┌────────┐ ┌────────┐ +// ├──│ 3-b │─│ 4-b │ +// │ └────────┘ └────────┘ +// │ ┌────────┐ ┌────────┐ +// ├──│ 3-c │─│ 4-c │ +// │ └────────┘ └────────┘ +// │ ┌────────┐ ┌────────┐ +// └──│ 3-d │─│ 4-d │ +// └────────┘ └────────┘ // // NOTE: the 1-a -- 2-a unconflicted branch can be longer, depending on value of unconflictedBranchNumRevs func getMultiBranchTestRevtree1(ctx context.Context, unconflictedBranchNumRevs, winningBranchNumRevs int, losingBranches []BranchSpec) RevTree { @@ -188,19 +218,35 @@ func TestGetMultiBranchTestRevtree(t *testing.T) { } func TestRevTreeUnmarshalOldFormat(t *testing.T) { - const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodies": ["{}", "", ""], "channels": [null, ["ABC", "CBS"], ["ABC"]]}` + const testJSON = `{ + "revs": ["3-three", "2-two", "1-one"], + "parents": [1, 2, -1], + "bodies": ["{}", "", ""], + "channels": [null, ["ABC", "CBS"], ["ABC"]] +}` gotmap := testUnmarshal(t, testJSON) fmt.Printf("Unmarshaled to %v\n", gotmap) } func TestRevTreeUnmarshal(t *testing.T) { - const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodymap": {"0":"{}"}, "channels": [null, ["ABC", "CBS"], ["ABC"]]}` + // 'channels' is an old revtree property that stored channel history for previous revisions. + // we moved non-winning leaf revision channel information into a 'channelMap' property instead to handle the case where documents are in conflict. + const testJSON = `{ + "revs": ["3-three", "2-two", "1-one"], + "parents": [1, 2, -1], + "bodymap": {"0":"{}"}, + "channels": [null, ["ABC", "CBS"], ["ABC"]] +}` gotmap := testUnmarshal(t, testJSON) fmt.Printf("Unmarshaled to %v\n", gotmap) } -func TestRevTreeUnmarshalRevChannelCountMismatch(t *testing.T) { - const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodymap": {"0":"{}"}, "channels": [null, ["ABC", "CBS"]]}` +func TestRevTreeUnmarshalRevParentCountMismatch(t *testing.T) { + const testJSON = `{ + "revs": ["3-three", "2-two", "1-one"], + "parents": [1, -1], + "bodymap": {"0":"{}"} +}` gotmap := RevTree{} err := base.JSONUnmarshal([]byte(testJSON), &gotmap) assert.Errorf(t, err, "revtreelist data is invalid, revs/parents/channels counts are inconsistent") diff --git a/rest/adminapitest/admin_api_test.go b/rest/adminapitest/admin_api_test.go index 601e8a3012..39f6ee0566 100644 --- a/rest/adminapitest/admin_api_test.go +++ b/rest/adminapitest/admin_api_test.go @@ -2096,12 +2096,15 @@ func TestRawRedaction(t *testing.T) { // Test redact being disabled by default res = rt.SendAdminRequest("GET", "/{{.keyspace}}/_raw/testdoc", ``) + bodyBytes := res.Body.Bytes() + t.Logf("body: %s", string(bodyBytes)) var body map[string]interface{} - err := base.JSONUnmarshal(res.Body.Bytes(), &body) + err := base.JSONUnmarshal(bodyBytes, &body) assert.NoError(t, err) syncData := body[base.SyncPropertyName] assert.Equal(t, map[string]interface{}{"achannel": nil}, syncData.(map[string]interface{})["channels"]) - assert.Equal(t, []interface{}{[]interface{}{"achannel"}}, syncData.(map[string]interface{})["history"].(map[string]interface{})["channels"]) + // active revision for a doc that isn't in conflict, so don't expect a channelsMap + assert.Nil(t, syncData.(map[string]interface{})["channelsMap"]) // Test redacted body = map[string]interface{}{} @@ -2111,7 +2114,8 @@ func TestRawRedaction(t *testing.T) { syncData = body[base.SyncPropertyName] require.NotNil(t, syncData) assert.NotEqual(t, map[string]interface{}{"achannel": nil}, syncData.(map[string]interface{})["channels"]) - assert.NotEqual(t, []interface{}{[]interface{}{"achannel"}}, syncData.(map[string]interface{})["history"].(map[string]interface{})["channels"]) + // active revision for a doc that isn't in conflict, so don't expect a channelsMap + assert.Nil(t, syncData.(map[string]interface{})["channelsMap"]) // Test include doc false doesn't return doc body = map[string]interface{}{} From 10507a728fc3c5f70cac8e1df2d81b5bcf2aacd2 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 26 Jul 2023 12:41:20 +0100 Subject: [PATCH 02/27] Extend RevTree unmarshal test for more cases (conflicting revs with channels in particular) --- db/revtree.go | 4 ++-- db/revtree_test.go | 35 +++++++++++++++++++++-------------- go.mod | 4 ++-- go.sum | 7 ++----- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 7abe076fd5..65f4ee3904 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -28,8 +28,8 @@ type RevInfo struct { BodyKey string // Used when revision body stored externally (doc key used for external storage) Deleted bool depth uint32 - Body []byte // Used when revision body stored inline (stores bodies) - Channels base.Set + Body []byte // Used when revision body stored inline (stores bodies) + Channels base.Set // Set only if the revision is a leaf revision (we don't store channel history per-revision) HasAttachments bool } diff --git a/db/revtree_test.go b/db/revtree_test.go index 1675f94d92..9739fc7cb5 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -11,6 +11,7 @@ package db import ( "context" "fmt" + "github.com/davecgh/go-spew/spew" "log" "os" "sort" @@ -187,11 +188,11 @@ func getMultiBranchTestRevtree1(ctx context.Context, unconflictedBranchNumRevs, } -func testUnmarshal(t *testing.T, jsonString string) RevTree { - gotmap := RevTree{} - assert.NoError(t, base.JSONUnmarshal([]byte(jsonString), &gotmap), "Couldn't parse RevTree from JSON") - assert.Equal(t, testmap, gotmap) - return gotmap +func assertRevTreeUnmarshal(t *testing.T, jsonString string, expectedRevtree RevTree) bool { + var gotmap RevTree + require.NoError(t, base.JSONUnmarshal([]byte(jsonString), &gotmap)) + t.Logf("Unmarshalled %s %v", jsonString, spew.Sprint(gotmap)) + return assert.Equal(t, expectedRevtree, gotmap) } // Make sure that the getMultiBranchTestRevtree1() helper works as expected @@ -224,8 +225,7 @@ func TestRevTreeUnmarshalOldFormat(t *testing.T) { "bodies": ["{}", "", ""], "channels": [null, ["ABC", "CBS"], ["ABC"]] }` - gotmap := testUnmarshal(t, testJSON) - fmt.Printf("Unmarshaled to %v\n", gotmap) + assertRevTreeUnmarshal(t, testJSON, testmap) } func TestRevTreeUnmarshal(t *testing.T) { @@ -237,8 +237,7 @@ func TestRevTreeUnmarshal(t *testing.T) { "bodymap": {"0":"{}"}, "channels": [null, ["ABC", "CBS"], ["ABC"]] }` - gotmap := testUnmarshal(t, testJSON) - fmt.Printf("Unmarshaled to %v\n", gotmap) + assertRevTreeUnmarshal(t, testJSON, testmap) } func TestRevTreeUnmarshalRevParentCountMismatch(t *testing.T) { @@ -252,11 +251,19 @@ func TestRevTreeUnmarshalRevParentCountMismatch(t *testing.T) { assert.Errorf(t, err, "revtreelist data is invalid, revs/parents/channels counts are inconsistent") } -func TestRevTreeMarshal(t *testing.T) { - bytes, err := base.JSONMarshal(testmap) - assert.NoError(t, err, "Couldn't write RevTree to JSON") - fmt.Printf("Marshaled RevTree as %s\n", string(bytes)) - testUnmarshal(t, string(bytes)) +func TestRevTreeMarshalUnmarshalRoundtrip(t *testing.T) { + tests := map[string]RevTree{ + "testmap": testmap, + "branchymap": branchymap, + "multiroot": multiroot, + } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + bytes, err := base.JSONMarshal(test) + require.NoError(t, err) + assertRevTreeUnmarshal(t, string(bytes), test) + }) + } } func TestRevTreeAccess(t *testing.T) { diff --git a/go.mod b/go.mod index 46921ec62f..4abb2ca950 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( github.com/couchbaselabs/go-fleecedelta v0.0.0-20220909152808-6d09efa7a338 github.com/couchbaselabs/gocbconnstr v1.0.5 github.com/couchbaselabs/rosmar v0.0.0-20240404180245-795e6df684f3 + github.com/davecgh/go-spew v1.1.1 github.com/elastic/gosigar v0.14.3 github.com/felixge/fgprof v0.9.3 github.com/google/uuid v1.6.0 @@ -58,8 +59,7 @@ require ( github.com/couchbase/tools-common/types v1.0.0 // indirect github.com/couchbase/tools-common/utils v1.0.0 // indirect github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20230515165046-68b522a21131 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/go-ole/go-ole v1.2.6 // indirect + github.com/go-ole/go-ole v1.3.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect diff --git a/go.sum b/go.sum index c59f8ba9ad..05ed403e99 100644 --- a/go.sum +++ b/go.sum @@ -51,8 +51,6 @@ github.com/couchbase/goprotostellar v1.0.1 h1:mtDVYTgnnDSQ3t7mQRG6jl/tOXKOuuFM9P github.com/couchbase/goprotostellar v1.0.1/go.mod h1:gs1eioLVOHETTFWxDY4v7Q/kRPMgqmX6t/TPcI429ls= github.com/couchbase/goutils v0.1.2 h1:gWr8B6XNWPIhfalHNog3qQKfGiYyh4K4VhO3P2o9BCs= github.com/couchbase/goutils v0.1.2/go.mod h1:h89Ek/tiOxxqjz30nPPlwZdQbdB8BwgnuBxeoUe/ViE= -github.com/couchbase/sg-bucket v0.0.0-20240326230241-0b197e169b27 h1:FGNvJsAQk6JZzuVXvoLXcoSQzOnQxWkywzYJFQqzXEg= -github.com/couchbase/sg-bucket v0.0.0-20240326230241-0b197e169b27/go.mod h1:5me3TJLTPfR0s3aMJZcPoTu5FT8oelaINz5l7Q3cApE= github.com/couchbase/sg-bucket v0.0.0-20240402154301-12625d8851a8 h1:kfWMYvUgSg2yIZJx+t63Ucl+zorvFqlYayXPkiXFtSE= github.com/couchbase/sg-bucket v0.0.0-20240402154301-12625d8851a8/go.mod h1:5me3TJLTPfR0s3aMJZcPoTu5FT8oelaINz5l7Q3cApE= github.com/couchbase/tools-common/cloud v1.0.0 h1:SQZIccXoedbrThehc/r9BJbpi/JhwJ8X00PDjZ2gEBE= @@ -74,8 +72,6 @@ github.com/couchbaselabs/gocbconnstr v1.0.5 h1:e0JokB5qbcz7rfnxEhNRTKz8q1svoRvDo github.com/couchbaselabs/gocbconnstr v1.0.5/go.mod h1:KV3fnIKMi8/AzX0O9zOrO9rofEqrRF1d2rG7qqjxC7o= github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20230515165046-68b522a21131 h1:2EAfFswAfgYn3a05DVcegiw6DgMgn1Mv5eGz6IHt1Cw= github.com/couchbaselabs/gocbconnstr/v2 v2.0.0-20230515165046-68b522a21131/go.mod h1:o7T431UOfFVHDNvMBUmUxpHnhivwv7BziUao/nMl81E= -github.com/couchbaselabs/rosmar v0.0.0-20240326232309-04dfb3337b60 h1:w9E8CEvQia8BPA+2Ai6dJh64wYTmxNUrXNPkKhtPpGw= -github.com/couchbaselabs/rosmar v0.0.0-20240326232309-04dfb3337b60/go.mod h1:MnlZ8BXE9Z7rUQEyb069P/6E9+YVkUxcqW5cmN23h0I= github.com/couchbaselabs/rosmar v0.0.0-20240404180245-795e6df684f3 h1:AUvojYsPc2WgiO9xRalRQLyXzooRRWemdEkiGl+PZno= github.com/couchbaselabs/rosmar v0.0.0-20240404180245-795e6df684f3/go.mod h1:SM0w4YHwXFMIyfqUbkpXZNWwAQKLwsUH91fsKUooMqw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -91,8 +87,9 @@ github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g= github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= +github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= From 44b8fac015bff3f0950925ffba27d5676afb45b2 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 26 Jul 2023 12:58:10 +0100 Subject: [PATCH 03/27] Add failing test for non-leaf revisions containing channels --- db/revtree_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/db/revtree_test.go b/db/revtree_test.go index 9739fc7cb5..c9e7aeea0b 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -344,6 +344,37 @@ func TestRevTreeCompareRevIDs(t *testing.T) { assert.Equal(t, 1, compareRevIDs(ctx, "5-bbb", "1-zzz")) } +// TestRevTreeChannelMapLeafOnly ensures that only leaf revisions populate channel information from/to channelMap. +func TestRevTreeChannelMapLeafOnly(t *testing.T) { + tree := branchymap.copy() + err := tree.addRevision(t.Name(), RevInfo{ + ID: "4-four", + Parent: "3-three", + Channels: base.SetOf("EN-us", "EN-gb"), + }) + require.NoError(t, err) + bytes, err := base.JSONMarshal(tree) + require.NoError(t, err) + var gotmap RevTree + require.NoError(t, base.JSONUnmarshal(bytes, &gotmap)) + assert.Equal(t, tree, gotmap) + t.Logf("bytes: %s", string(bytes)) + t.Logf("tree: %s", spew.Sprint(tree)) + + ri, err := gotmap.getInfo("3-three") + require.NoError(t, err) + assert.Nil(t, ri.Channels) // no channels on remarshalled RevTree for non-leaf revision + + ri, err = gotmap.getInfo("4-four") + require.NoError(t, err) + assert.Equal(t, base.SetOf("EN-us", "EN-gb"), ri.Channels) + + ri, err = gotmap.getInfo("3-drei") + require.NoError(t, err) + assert.Equal(t, base.SetOf("DE"), ri.Channels) + +} + func TestRevTreeIsLeaf(t *testing.T) { assert.True(t, branchymap.isLeaf("3-three"), "isLeaf failed on 3-three") assert.True(t, branchymap.isLeaf("3-drei"), "isLeaf failed on 3-drei") From dc69d8ba57920d2c444c8b3c9f270c153fa80bf3 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 26 Jul 2023 13:18:48 +0100 Subject: [PATCH 04/27] Fix godoc describing wrong revtree winner due to inverse ASCII comparison --- db/revtree_test.go | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/db/revtree_test.go b/db/revtree_test.go index c9e7aeea0b..c23915690f 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -37,18 +37,18 @@ var testmap = RevTree{ // branchymap is a branched revtree with two revisions in conflict // -// ┌────────┐ -// ┌─│3-three │ -// ┌────────┐ ┌────────┐ │ └────────┘ +// ╔════════╗ +// ┌─║3-three ║ +// ┌────────┐ ┌────────┐ │ ╚════════╝ // │ 1-one │──│ 2-two │─┤ -// └────────┘ └────────┘ │ ╔════════╗ -// └─║ 3-drei ║ -// ╚════════╝ +// └────────┘ └────────┘ │ ┌────────┐ +// └─│ 3-drei │ +// └────────┘ var branchymap = RevTree{ - "3-three": {ID: "3-three", Parent: "2-two", Channels: base.SetOf("EN")}, + "3-three": {ID: "3-three", Parent: "2-two", Channels: base.SetOf("EN")}, // winner because higher ASCII value (d vs. t) "2-two": {ID: "2-two", Parent: "1-one"}, "1-one": {ID: "1-one"}, - "3-drei": {ID: "3-drei", Parent: "2-two", Channels: base.SetOf("DE")}, // winner because lower ASCII value (d vs. t) + "3-drei": {ID: "3-drei", Parent: "2-two", Channels: base.SetOf("DE")}, } // multiroot is a revtree with multiple roots (disconnected branches) @@ -346,32 +346,48 @@ func TestRevTreeCompareRevIDs(t *testing.T) { // TestRevTreeChannelMapLeafOnly ensures that only leaf revisions populate channel information from/to channelMap. func TestRevTreeChannelMapLeafOnly(t *testing.T) { + + // Add a 4th revision with channels to supersede rev 3-three + // ┌─────────────┐ ╔═══════════════════════╗ + // ┌─│3-three (EN) │──║ 4-four (EN-us, EN-gb) ║ + // ┌────────┐ ┌────────┐ │ └─────────────┘ ╚═══════════════════════╝ + // │ 1-one │──│ 2-two │─┤ + // └────────┘ └────────┘ │ ┌─────────────┐ + // └─│ 3-drei (DE) │ + // └─────────────┘ + rev4Channels := base.SetOf("EN-us", "EN-gb") tree := branchymap.copy() err := tree.addRevision(t.Name(), RevInfo{ ID: "4-four", Parent: "3-three", - Channels: base.SetOf("EN-us", "EN-gb"), + Channels: rev4Channels, }) require.NoError(t, err) + bytes, err := base.JSONMarshal(tree) require.NoError(t, err) var gotmap RevTree require.NoError(t, base.JSONUnmarshal(bytes, &gotmap)) - assert.Equal(t, tree, gotmap) + t.Logf("bytes: %s", string(bytes)) t.Logf("tree: %s", spew.Sprint(tree)) + // unmarshal again to assert on stored format + var storedMap revTreeList + require.NoError(t, base.JSONUnmarshal(bytes, &storedMap)) + assert.Len(t, storedMap.ChannelsMap, 2, "expected only two channelsMap entries (for the leaf revisions)") + ri, err := gotmap.getInfo("3-three") require.NoError(t, err) assert.Nil(t, ri.Channels) // no channels on remarshalled RevTree for non-leaf revision ri, err = gotmap.getInfo("4-four") require.NoError(t, err) - assert.Equal(t, base.SetOf("EN-us", "EN-gb"), ri.Channels) + assert.Equal(t, rev4Channels, ri.Channels) // leaf revision channels ri, err = gotmap.getInfo("3-drei") require.NoError(t, err) - assert.Equal(t, base.SetOf("DE"), ri.Channels) + assert.Equal(t, base.SetOf("DE"), ri.Channels) // leaf revision channels } From 15ef84896ff820bd18fcd448bbfcb8781e83f14e Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 26 Jul 2023 14:34:42 +0100 Subject: [PATCH 05/27] Only populate channels for leaf revisions on marshal/unmarshal --- db/revtree.go | 74 +++++++++++++++++++++++++++++++++++----------- db/revtree_test.go | 30 ++++++++++++------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 65f4ee3904..90e98ef391 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -15,6 +15,7 @@ import ( "fmt" "math" "strconv" + "sync" "github.com/couchbase/sync_gateway/base" ) @@ -62,10 +63,15 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } revIndexes := map[string]int{"": -1} + // there's no way to lookup leaves by key without doing a full tree iteration, so we'll just do it once the first time we need it + var leaves map[string]*RevInfo + leavesOnce := sync.Once{} + i := 0 for _, info := range tree { revIndexes[info.ID] = i rep.Revs[i] = info.ID + if info.Body != nil || info.BodyKey != "" { // Marshal either the BodyKey or the Body, depending on whether a BodyKey is specified if info.BodyKey == "" { @@ -80,25 +86,37 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { rep.BodyKeyMap[strconv.FormatInt(int64(i), 10)] = info.BodyKey } } - // TODO: Check if leaf revision before storing + + // for leaf revisions we'll store channel information + // there's some duplication here for the current rev + // (active channels are stored outside the revtree) + // but special casing this doesn't seem worthwhile if len(info.Channels) > 0 { - if rep.ChannelsMap == nil { - rep.ChannelsMap = make(map[string]base.Set, 1) + leavesOnce.Do(func() { + leaves = tree.Leaves() + }) + if _, isLeaf := leaves[info.ID]; isLeaf { + if rep.ChannelsMap == nil { + rep.ChannelsMap = make(map[string]base.Set, 1) + } + rep.ChannelsMap[strconv.FormatInt(int64(i), 10)] = info.Channels } - rep.ChannelsMap[strconv.FormatInt(int64(i), 10)] = info.Channels } + if info.Deleted { if rep.Deleted == nil { rep.Deleted = make([]int, 0, 1) } rep.Deleted = append(rep.Deleted, i) } + if info.HasAttachments { if rep.HasAttachments == nil { rep.HasAttachments = make([]int, 0, 1) } rep.HasAttachments = append(rep.HasAttachments, i) } + i++ } @@ -154,29 +172,40 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { info.BodyKey = bodyKey } } - if c, ok := rep.ChannelsMap[stringIndex]; ok && c != nil { - info.Channels = c - } parentIndex := rep.Parents[i] if parentIndex >= 0 { info.Parent = rep.Revs[parentIndex] } (*tree)[revid] = &info } - if rep.Deleted != nil { - for _, i := range rep.Deleted { - info := (*tree)[rep.Revs[i]] - info.Deleted = true // because tree[rep.Revs[i]].Deleted=true is a compile error - (*tree)[rep.Revs[i]] = info - } + + for _, i := range rep.Deleted { + info := (*tree)[rep.Revs[i]] + info.Deleted = true // because tree[rep.Revs[i]].Deleted=true is a compile error + (*tree)[rep.Revs[i]] = info } - if rep.HasAttachments != nil { - for _, i := range rep.HasAttachments { - info := (*tree)[rep.Revs[i]] - info.HasAttachments = true - (*tree)[rep.Revs[i]] = info + + for _, i := range rep.HasAttachments { + info := (*tree)[rep.Revs[i]] + info.HasAttachments = true + (*tree)[rep.Revs[i]] = info + } + + var leaves map[string]*RevInfo + if len(rep.ChannelsMap) > 0 { + leaves = tree.Leaves() + } + for iStr, channels := range rep.ChannelsMap { + i, err := strconv.ParseInt(iStr, 10, 64) + if err != nil { + return err + } + info := (*tree)[rep.Revs[i]] + if _, isLeaf := leaves[info.ID]; isLeaf { + info.Channels = channels } } + return } @@ -291,6 +320,15 @@ func (tree RevTree) GetLeavesFiltered(filter func(revId string) bool) []string { } +// Leaves returns a map of revisions that are leaves in the tree. +func (tree RevTree) Leaves() map[string]*RevInfo { + m := make(map[string]*RevInfo) + tree.forEachLeaf(func(info *RevInfo) { + m[info.ID] = info + }) + return m +} + func (tree RevTree) forEachLeaf(callback func(*RevInfo)) { isParent := map[string]bool{} for _, info := range tree { diff --git a/db/revtree_test.go b/db/revtree_test.go index c23915690f..e44ba156be 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -30,7 +30,7 @@ import ( // │ 1-one │──│ 2-two │──║3-three ║ // └────────┘ └────────┘ ╚════════╝ var testmap = RevTree{ - "3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}")}, + "3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}"), Channels: base.SetOf("ABC", "CBS")}, "2-two": {ID: "2-two", Parent: "1-one"}, "1-one": {ID: "1-one"}, } @@ -219,6 +219,10 @@ func TestGetMultiBranchTestRevtree(t *testing.T) { } func TestRevTreeUnmarshalOldFormat(t *testing.T) { + // 'channels' is an old revtree property that stored channel history for previous revisions. + // we moved non-winning leaf revision channel information into a 'channelMap' property instead to handle the case where documents are in conflict. + // bodies is an old revtree property that stored full bodies for previous revisions. + // we moved this into bodyMap for inline bodies and bodyKeyMap for externally stored bodies. const testJSON = `{ "revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], @@ -234,8 +238,8 @@ func TestRevTreeUnmarshal(t *testing.T) { const testJSON = `{ "revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], - "bodymap": {"0":"{}"}, - "channels": [null, ["ABC", "CBS"], ["ABC"]] + "bodymap": {"0":"{\"foo\":\"bar\"}"}, + "channelsMap": {"0": ["ABC", "CBS"] }` assertRevTreeUnmarshal(t, testJSON, testmap) } @@ -364,19 +368,25 @@ func TestRevTreeChannelMapLeafOnly(t *testing.T) { }) require.NoError(t, err) + // Intentionally keep a non-leaf channelsMap entry to ensure that unmarshalling strips it + treeJSON := []byte(`{ + "revs":["3-three","2-two","1-one","3-drei","4-four"], + "parents":[1,2,-1,1,0], + "channelsMap":{"0":["EN"],"3":["DE"],"4":["EN-gb","EN-us"]} +}`) + + // marshal RevTree into storage format bytes, err := base.JSONMarshal(tree) require.NoError(t, err) - var gotmap RevTree - require.NoError(t, base.JSONUnmarshal(bytes, &gotmap)) - - t.Logf("bytes: %s", string(bytes)) - t.Logf("tree: %s", spew.Sprint(tree)) - - // unmarshal again to assert on stored format + // unmarshal back into revTreeList to ensure non-leaf channels are stripped on marshal for stored format var storedMap revTreeList require.NoError(t, base.JSONUnmarshal(bytes, &storedMap)) assert.Len(t, storedMap.ChannelsMap, 2, "expected only two channelsMap entries (for the leaf revisions)") + // unmarshal treeJSON into RevTree to ensure non-leaf channels are stripped on unmarshal + var gotmap RevTree + require.NoError(t, base.JSONUnmarshal(treeJSON, &gotmap)) + ri, err := gotmap.getInfo("3-three") require.NoError(t, err) assert.Nil(t, ri.Channels) // no channels on remarshalled RevTree for non-leaf revision From 058ce82f02444f2e65d305531d62f664d2946155 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 27 Jul 2023 15:19:39 +0100 Subject: [PATCH 06/27] Keep Channels_Old for backwards compatibility (reading existing marshalled revtrees) --- db/database_test.go | 1 + db/revtree.go | 36 ++++++++++++++++++++++++------------ db/revtree_test.go | 8 ++++---- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/db/database_test.go b/db/database_test.go index 97c9330678..3e87cf3fd4 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -544,6 +544,7 @@ func TestGetRemovalMultiChannel(t *testing.T) { // Try with a user who has access to this revision. collection.user = userAlice body, err = collection.Get1xRevBody(ctx, "doc1", rev2ID, true, nil) + // FIXME: Returns no error and returns document! assertHTTPError(t, err, 404) require.Nil(t, body) diff --git a/db/revtree.go b/db/revtree.go index 90e98ef391..1ef7c9bdc9 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -48,9 +48,10 @@ type revTreeList struct { Revs []string `json:"revs"` // The revision IDs Parents []int `json:"parents"` // Index of parent of each revision (-1 if root) Deleted []int `json:"deleted,omitempty"` // Indexes of revisions that are deletions - Bodies_Old []string `json:"bodies,omitempty"` // JSON of each revision (legacy) + Bodies_Old []string `json:"bodies,omitempty"` // Deprecated: JSON of each revision (see bodymap) BodyMap map[string]string `json:"bodymap,omitempty"` // JSON of each revision BodyKeyMap map[string]string `json:"bodyKeyMap,omitempty"` // Keys of revision bodies stored in external documents + Channels_Old []base.Set `json:"channels,omitempty"` // Deprecated: Channels for revisions (see channelsMap) ChannelsMap map[string]base.Set `json:"channelsMap,omitempty"` // Channels for non-winning leaf revisions (current channels stored outside of history, and don't have a use-case for storing non-leaf channels) HasAttachments []int `json:"hasAttachments,omitempty"` // Indexes of revisions that has attachments } @@ -191,18 +192,29 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { (*tree)[rep.Revs[i]] = info } - var leaves map[string]*RevInfo - if len(rep.ChannelsMap) > 0 { - leaves = tree.Leaves() - } - for iStr, channels := range rep.ChannelsMap { - i, err := strconv.ParseInt(iStr, 10, 64) - if err != nil { - return err + if len(rep.ChannelsMap) > 0 || len(rep.Channels_Old) > 0 { + leaves := tree.Leaves() + for iStr, channels := range rep.ChannelsMap { + i, err := strconv.ParseInt(iStr, 10, 64) + if err != nil { + return err + } + info := (*tree)[rep.Revs[i]] + if _, isLeaf := leaves[info.ID]; isLeaf { + info.Channels = channels + } } - info := (*tree)[rep.Revs[i]] - if _, isLeaf := leaves[info.ID]; isLeaf { - info.Channels = channels + + // we shouldn't be in a situation where we have both channels and channelsMap populated, but we still need to handle the old format. + for i, channels := range rep.Channels_Old { + info := (*tree)[rep.Revs[i]] + if _, isLeaf := leaves[info.ID]; isLeaf { + // set only if we've not already populated from ChannelsMap (we shouldn't ever have both) + if info.Channels != nil { + return fmt.Errorf("RevTree leaf %q had channels set already (from channelsMap)", info.ID) + } + info.Channels = channels + } } } diff --git a/db/revtree_test.go b/db/revtree_test.go index e44ba156be..2a534e3415 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -30,7 +30,7 @@ import ( // │ 1-one │──│ 2-two │──║3-three ║ // └────────┘ └────────┘ ╚════════╝ var testmap = RevTree{ - "3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}"), Channels: base.SetOf("ABC", "CBS")}, + "3-three": {ID: "3-three", Parent: "2-two", Body: []byte(`{"foo":"bar"}`), Channels: base.SetOf("ABC", "CBS")}, "2-two": {ID: "2-two", Parent: "1-one"}, "1-one": {ID: "1-one"}, } @@ -226,8 +226,8 @@ func TestRevTreeUnmarshalOldFormat(t *testing.T) { const testJSON = `{ "revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], - "bodies": ["{}", "", ""], - "channels": [null, ["ABC", "CBS"], ["ABC"]] + "bodies": ["{\"foo\":\"bar\"}", "", ""], + "channels": [["ABC", "CBS"], null, ["ABC"]] }` assertRevTreeUnmarshal(t, testJSON, testmap) } @@ -239,7 +239,7 @@ func TestRevTreeUnmarshal(t *testing.T) { "revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodymap": {"0":"{\"foo\":\"bar\"}"}, - "channelsMap": {"0": ["ABC", "CBS"] + "channelsMap": {"0": ["ABC", "CBS"]} }` assertRevTreeUnmarshal(t, testJSON, testmap) } From 8b23e1ed2a11a759577fc392838dc29ed467a8f7 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 8 Aug 2023 11:03:49 +0100 Subject: [PATCH 07/27] wip --- db/database_test.go | 5 +++-- db/document.go | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/db/database_test.go b/db/database_test.go index 3e87cf3fd4..7012ea4ba0 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -544,7 +544,7 @@ func TestGetRemovalMultiChannel(t *testing.T) { // Try with a user who has access to this revision. collection.user = userAlice body, err = collection.Get1xRevBody(ctx, "doc1", rev2ID, true, nil) - // FIXME: Returns no error and returns document! + // FIXME (bbrks): Returns removal with no error assertHTTPError(t, err, 404) require.Nil(t, body) @@ -692,7 +692,8 @@ func TestDeltaSyncWhenToRevIsChannelRemoval(t *testing.T) { require.NoError(t, db.DbStats.InitDeltaSyncStats()) delta, redactedRev, err = collection.GetDelta(ctx, "doc1", rev1ID, rev2ID) - require.Equal(t, base.HTTPErrorf(404, "missing"), err) + // FIXME (bbrks): Returns no error and returns document! + assert.Equal(t, base.HTTPErrorf(404, "missing"), err) assert.Nil(t, delta) assert.Nil(t, redactedRev) } diff --git a/db/document.go b/db/document.go index f8b20913db..b45783c8d7 100644 --- a/db/document.go +++ b/db/document.go @@ -1003,6 +1003,7 @@ func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBy activeChannels := make(base.Set) // Add active channels to the channel set if the the revision is available in the revision tree. + // TODO (bbrks): Should be looking elsewhere in SyncData for non-leaf channel information. if revInfo, ok := doc.History[revID]; ok { for channel, _ := range revInfo.Channels { activeChannels[channel] = struct{}{} From 5ce9e153e41b5ab5c8e774b85b53a2212ebf3f6d Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 15 Aug 2023 16:13:31 +0100 Subject: [PATCH 08/27] Build activeChannels from IsChannelRemoval based on sync.Channels --- db/database_test.go | 2 -- db/document.go | 21 ++++++++------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/db/database_test.go b/db/database_test.go index 7012ea4ba0..98120a896d 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -544,7 +544,6 @@ func TestGetRemovalMultiChannel(t *testing.T) { // Try with a user who has access to this revision. collection.user = userAlice body, err = collection.Get1xRevBody(ctx, "doc1", rev2ID, true, nil) - // FIXME (bbrks): Returns removal with no error assertHTTPError(t, err, 404) require.Nil(t, body) @@ -692,7 +691,6 @@ func TestDeltaSyncWhenToRevIsChannelRemoval(t *testing.T) { require.NoError(t, db.DbStats.InitDeltaSyncStats()) delta, redactedRev, err = collection.GetDelta(ctx, "doc1", rev1ID, rev2ID) - // FIXME (bbrks): Returns no error and returns document! assert.Equal(t, base.HTTPErrorf(404, "missing"), err) assert.Nil(t, delta) assert.Nil(t, redactedRev) diff --git a/db/document.go b/db/document.go index b45783c8d7..d3c4f85b25 100644 --- a/db/document.go +++ b/db/document.go @@ -981,6 +981,7 @@ func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) ( // Set of channels returned from IsChannelRemoval are "Active" channels and NOT "Removed". func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBytes []byte, history Revisions, channels base.Set, isRemoval bool, isDelete bool, err error) { + activeChannels := make(base.Set) removedChannels := make(base.Set) // Iterate over the document's channel history, looking for channels that were removed at revID. If found, also identify whether the removal was a tombstone. @@ -990,26 +991,16 @@ func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBy if removal.Deleted == true { isDelete = true } + } else { + activeChannels[channel] = struct{}{} } } + // If no matches found, return isRemoval=false if len(removedChannels) == 0 { return nil, nil, nil, false, false, nil } - // Construct removal body - // doc ID and rev ID aren't required to be inserted here, as both of those are available in the request. - bodyBytes = []byte(RemovedRedactedDocument) - - activeChannels := make(base.Set) - // Add active channels to the channel set if the the revision is available in the revision tree. - // TODO (bbrks): Should be looking elsewhere in SyncData for non-leaf channel information. - if revInfo, ok := doc.History[revID]; ok { - for channel, _ := range revInfo.Channels { - activeChannels[channel] = struct{}{} - } - } - // Build revision history for revID revHistory, err := doc.History.getHistory(revID) if err != nil { @@ -1022,6 +1013,10 @@ func (doc *Document) IsChannelRemoval(ctx context.Context, revID string) (bodyBy } history = encodeRevisions(ctx, doc.ID, revHistory) + // Construct removal body + // doc ID and rev ID aren't required to be inserted here, as both of those are available in the request. + bodyBytes = []byte(RemovedRedactedDocument) + return bodyBytes, history, activeChannels, true, isDelete, nil } From f32d9383c79061604120da161fbaad2b6603407c Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 15 Aug 2023 16:28:19 +0100 Subject: [PATCH 09/27] goimports --- db/revtree_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/revtree_test.go b/db/revtree_test.go index 2a534e3415..33a434ae51 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -11,7 +11,6 @@ package db import ( "context" "fmt" - "github.com/davecgh/go-spew/spew" "log" "os" "sort" @@ -20,6 +19,7 @@ import ( "time" "github.com/couchbase/sync_gateway/base" + "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) From 81143362637b189196c6d50b2b53f89b1dd96eb7 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 12 Sep 2023 16:22:03 +0100 Subject: [PATCH 10/27] Remove leaf check from RevTree Unmarshal (assume JSON is always correct and isn't storing non-leaf channelmap entries) --- db/revtree.go | 12 ++++++------ db/revtree_test.go | 24 ------------------------ 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 1ef7c9bdc9..31e7d4953e 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -192,20 +192,20 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { (*tree)[rep.Revs[i]] = info } - if len(rep.ChannelsMap) > 0 || len(rep.Channels_Old) > 0 { - leaves := tree.Leaves() + if len(rep.ChannelsMap) > 0 { for iStr, channels := range rep.ChannelsMap { i, err := strconv.ParseInt(iStr, 10, 64) if err != nil { return err } info := (*tree)[rep.Revs[i]] - if _, isLeaf := leaves[info.ID]; isLeaf { - info.Channels = channels - } + info.Channels = channels } + } - // we shouldn't be in a situation where we have both channels and channelsMap populated, but we still need to handle the old format. + // we shouldn't be in a situation where we have both channels and channelsMap populated, but we still need to handle the old format + if len(rep.Channels_Old) > 0 { + leaves := tree.Leaves() for i, channels := range rep.Channels_Old { info := (*tree)[rep.Revs[i]] if _, isLeaf := leaves[info.ID]; isLeaf { diff --git a/db/revtree_test.go b/db/revtree_test.go index 33a434ae51..c6ca6be003 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -368,13 +368,6 @@ func TestRevTreeChannelMapLeafOnly(t *testing.T) { }) require.NoError(t, err) - // Intentionally keep a non-leaf channelsMap entry to ensure that unmarshalling strips it - treeJSON := []byte(`{ - "revs":["3-three","2-two","1-one","3-drei","4-four"], - "parents":[1,2,-1,1,0], - "channelsMap":{"0":["EN"],"3":["DE"],"4":["EN-gb","EN-us"]} -}`) - // marshal RevTree into storage format bytes, err := base.JSONMarshal(tree) require.NoError(t, err) @@ -382,23 +375,6 @@ func TestRevTreeChannelMapLeafOnly(t *testing.T) { var storedMap revTreeList require.NoError(t, base.JSONUnmarshal(bytes, &storedMap)) assert.Len(t, storedMap.ChannelsMap, 2, "expected only two channelsMap entries (for the leaf revisions)") - - // unmarshal treeJSON into RevTree to ensure non-leaf channels are stripped on unmarshal - var gotmap RevTree - require.NoError(t, base.JSONUnmarshal(treeJSON, &gotmap)) - - ri, err := gotmap.getInfo("3-three") - require.NoError(t, err) - assert.Nil(t, ri.Channels) // no channels on remarshalled RevTree for non-leaf revision - - ri, err = gotmap.getInfo("4-four") - require.NoError(t, err) - assert.Equal(t, rev4Channels, ri.Channels) // leaf revision channels - - ri, err = gotmap.getInfo("3-drei") - require.NoError(t, err) - assert.Equal(t, base.SetOf("DE"), ri.Channels) // leaf revision channels - } func TestRevTreeIsLeaf(t *testing.T) { From 1ae9dedc53ec8eb1446d5c63c95527cc0e6920ee Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 13:13:58 +0100 Subject: [PATCH 11/27] Cleanup addRevision error formatting --- db/revtree.go | 17 ++++++++--------- db/revtree_test.go | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 31e7d4953e..3423fdffbc 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -409,20 +409,19 @@ func (tree RevTree) findAncestorFromSet(revid string, ancestors []string) string } // Records a revision in a RevTree. -func (tree RevTree) addRevision(docid string, info RevInfo) (err error) { +func (tree RevTree) addRevision(docid string, info RevInfo) error { revid := info.ID if revid == "" { - err = errors.New(fmt.Sprintf("doc: %v, RevTree addRevision, empty revid is illegal", docid)) - return + return fmt.Errorf("doc: %v, RevTree addRevision, empty revid is illegal", docid) } if tree.contains(revid) { - err = errors.New(fmt.Sprintf("doc: %v, RevTree addRevision, already contains rev %q", docid, revid)) - return + return fmt.Errorf("doc: %v, RevTree addRevision, already contains rev %q", docid, revid) } - parent := info.Parent - if parent != "" && !tree.contains(parent) { - err = errors.New(fmt.Sprintf("doc: %v, RevTree addRevision, parent id %q is missing", docid, parent)) - return + if p := info.Parent; p != "" { + parent, ok := tree[p] + if !ok { + return fmt.Errorf("doc: %v, RevTree addRevision for rev %q, parent id %q is missing", docid, revid, p) + } } tree[revid] = &info return nil diff --git a/db/revtree_test.go b/db/revtree_test.go index c6ca6be003..76a997f6c2 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -336,7 +336,7 @@ func TestRevTreeAddRevisionWithMissingParent(t *testing.T) { assert.Equal(t, testmap, tempmap) err := tempmap.addRevision("testdoc", RevInfo{ID: "5-five", Parent: "4-four"}) - assert.Equal(t, fmt.Sprintf("doc: %v, RevTree addRevision, parent id %q is missing", "testdoc", "4-four"), err.Error()) + assert.Equal(t, fmt.Sprintf("doc: %v, RevTree addRevision for rev %q, parent id %q is missing", "testdoc", "5-five", "4-four"), err.Error()) } func TestRevTreeCompareRevIDs(t *testing.T) { From 8f70fdda669889ed433319cbf6acfef2ec8fd27d Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 13:14:37 +0100 Subject: [PATCH 12/27] Skip leaf check when storing for ChannelMap - nil out parent channels on addRevision --- db/revtree.go | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 3423fdffbc..cd71e5e1b2 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -15,7 +15,6 @@ import ( "fmt" "math" "strconv" - "sync" "github.com/couchbase/sync_gateway/base" ) @@ -64,10 +63,6 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } revIndexes := map[string]int{"": -1} - // there's no way to lookup leaves by key without doing a full tree iteration, so we'll just do it once the first time we need it - var leaves map[string]*RevInfo - leavesOnce := sync.Once{} - i := 0 for _, info := range tree { revIndexes[info.ID] = i @@ -88,20 +83,12 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } } - // for leaf revisions we'll store channel information - // there's some duplication here for the current rev - // (active channels are stored outside the revtree) - // but special casing this doesn't seem worthwhile + // for non-winning leaf revisions we'll store channel information if len(info.Channels) > 0 { - leavesOnce.Do(func() { - leaves = tree.Leaves() - }) - if _, isLeaf := leaves[info.ID]; isLeaf { - if rep.ChannelsMap == nil { - rep.ChannelsMap = make(map[string]base.Set, 1) - } - rep.ChannelsMap[strconv.FormatInt(int64(i), 10)] = info.Channels + if rep.ChannelsMap == nil { + rep.ChannelsMap = make(map[string]base.Set, 1) } + rep.ChannelsMap[strconv.FormatInt(int64(i), 10)] = info.Channels } if info.Deleted { @@ -422,6 +409,7 @@ func (tree RevTree) addRevision(docid string, info RevInfo) error { if !ok { return fmt.Errorf("doc: %v, RevTree addRevision for rev %q, parent id %q is missing", docid, revid, p) } + parent.Channels = nil } tree[revid] = &info return nil From 84ad764a3db2e95534fb78dcfdde84a488977c48 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 14:03:41 +0100 Subject: [PATCH 13/27] Change TestRevTreeChannelMapLeafOnly assertion to non-winning leaf only --- db/revtree.go | 2 +- db/revtree_test.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index cd71e5e1b2..e7272f6e3b 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -29,7 +29,7 @@ type RevInfo struct { Deleted bool depth uint32 Body []byte // Used when revision body stored inline (stores bodies) - Channels base.Set // Set only if the revision is a leaf revision (we don't store channel history per-revision) + Channels base.Set // Set only if the revision is a non-winning leaf revision (we don't store channel history per-revision) HasAttachments bool } diff --git a/db/revtree_test.go b/db/revtree_test.go index 76a997f6c2..b3136c821f 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -348,7 +348,7 @@ func TestRevTreeCompareRevIDs(t *testing.T) { assert.Equal(t, 1, compareRevIDs(ctx, "5-bbb", "1-zzz")) } -// TestRevTreeChannelMapLeafOnly ensures that only leaf revisions populate channel information from/to channelMap. +// TestRevTreeChannelMapLeafOnly ensures that only non-winning leaf revisions populate channel information from/to channelMap. func TestRevTreeChannelMapLeafOnly(t *testing.T) { // Add a 4th revision with channels to supersede rev 3-three @@ -374,7 +374,7 @@ func TestRevTreeChannelMapLeafOnly(t *testing.T) { // unmarshal back into revTreeList to ensure non-leaf channels are stripped on marshal for stored format var storedMap revTreeList require.NoError(t, base.JSONUnmarshal(bytes, &storedMap)) - assert.Len(t, storedMap.ChannelsMap, 2, "expected only two channelsMap entries (for the leaf revisions)") + assert.Len(t, storedMap.ChannelsMap, 1, "expected only one channelsMap entry (for the non-winning leaf revisions)") } func TestRevTreeIsLeaf(t *testing.T) { From dc8985ea35c11935cda66fe9cbdaddb239ec246c Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 14:16:26 +0100 Subject: [PATCH 14/27] Skip ChannelsMap storage on Marshal for the winning revision (it's stored elsewhere outside of the RevTree) --- db/revtree.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db/revtree.go b/db/revtree.go index e7272f6e3b..be0a9b6cd6 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -63,6 +63,8 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } revIndexes := map[string]int{"": -1} + winner, _, _ := tree.winningRevision() + i := 0 for _, info := range tree { revIndexes[info.ID] = i @@ -84,7 +86,7 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } // for non-winning leaf revisions we'll store channel information - if len(info.Channels) > 0 { + if winner != info.ID && len(info.Channels) > 0 { if rep.ChannelsMap == nil { rep.ChannelsMap = make(map[string]base.Set, 1) } From 4c8d9ed59273cf2a1a7987718565096b75a2c89d Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 14:33:48 +0100 Subject: [PATCH 15/27] Move RevTree channel info into tests --- db/revtree_test.go | 55 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/db/revtree_test.go b/db/revtree_test.go index b3136c821f..cb6044fc4a 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -30,7 +30,7 @@ import ( // │ 1-one │──│ 2-two │──║3-three ║ // └────────┘ └────────┘ ╚════════╝ var testmap = RevTree{ - "3-three": {ID: "3-three", Parent: "2-two", Body: []byte(`{"foo":"bar"}`), Channels: base.SetOf("ABC", "CBS")}, + "3-three": {ID: "3-three", Parent: "2-two", Body: []byte(`{"foo":"bar"}`)}, "2-two": {ID: "2-two", Parent: "1-one"}, "1-one": {ID: "1-one"}, } @@ -45,10 +45,10 @@ var testmap = RevTree{ // └─│ 3-drei │ // └────────┘ var branchymap = RevTree{ - "3-three": {ID: "3-three", Parent: "2-two", Channels: base.SetOf("EN")}, // winner because higher ASCII value (d vs. t) + "3-three": {ID: "3-three", Parent: "2-two"}, // winner because higher ASCII value (d vs. t) "2-two": {ID: "2-two", Parent: "1-one"}, "1-one": {ID: "1-one"}, - "3-drei": {ID: "3-drei", Parent: "2-two", Channels: base.SetOf("DE")}, + "3-drei": {ID: "3-drei", Parent: "2-two"}, } // multiroot is a revtree with multiple roots (disconnected branches) @@ -229,17 +229,47 @@ func TestRevTreeUnmarshalOldFormat(t *testing.T) { "bodies": ["{\"foo\":\"bar\"}", "", ""], "channels": [["ABC", "CBS"], null, ["ABC"]] }` - assertRevTreeUnmarshal(t, testJSON, testmap) + tree := testmap.copy() + + // set desired channels + ri, err := tree.getInfo("3-three") + require.NoError(t, err) + ri.Channels = base.SetOf("ABC", "CBS") + + assertRevTreeUnmarshal(t, testJSON, tree) } -func TestRevTreeUnmarshal(t *testing.T) { - // 'channels' is an old revtree property that stored channel history for previous revisions. +func TestRevTreeUnmarshalChannelMap(t *testing.T) { // we moved non-winning leaf revision channel information into a 'channelMap' property instead to handle the case where documents are in conflict. + const testJSON = `{ + "revs": ["3-drei", "3-three", "2-two", "1-one"], + "parents": [2, 2, 3, -1], + "bodies": ["{\"foo\":\"buzz\"}", "{\"foo\":\"bar\"}", "", ""], + "channels": [["DE"], ["EN"], null, ["ABC"]] +}` + tree := testmap.copy() + + // set desired channels + ri, err := tree.getInfo("3-three") + require.NoError(t, err) + ri.Channels = base.SetOf("EN") + + err = tree.addRevision("", RevInfo{ + ID: "3-drei", + Parent: "2-two", + Body: []byte(`{"foo":"buzz"}`), + Channels: base.SetOf("DE"), + }) + require.NoError(t, err) + + assertRevTreeUnmarshal(t, testJSON, tree) +} + +func TestRevTreeUnmarshal(t *testing.T) { const testJSON = `{ "revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], - "bodymap": {"0":"{\"foo\":\"bar\"}"}, - "channelsMap": {"0": ["ABC", "CBS"]} + "bodymap": {"0":"{\"foo\":\"bar\"}"} }` assertRevTreeUnmarshal(t, testJSON, testmap) } @@ -359,18 +389,23 @@ func TestRevTreeChannelMapLeafOnly(t *testing.T) { // └────────┘ └────────┘ │ ┌─────────────┐ // └─│ 3-drei (DE) │ // └─────────────┘ - rev4Channels := base.SetOf("EN-us", "EN-gb") tree := branchymap.copy() err := tree.addRevision(t.Name(), RevInfo{ ID: "4-four", Parent: "3-three", - Channels: rev4Channels, + Channels: base.SetOf("EN-us", "EN-gb"), }) require.NoError(t, err) + // insert channel into tree - we don't store it in the globals because each test requires different channel data. + ri, err := tree.getInfo("3-drei") + require.NoError(t, err) + ri.Channels = base.SetOf("DE") + // marshal RevTree into storage format bytes, err := base.JSONMarshal(tree) require.NoError(t, err) + // unmarshal back into revTreeList to ensure non-leaf channels are stripped on marshal for stored format var storedMap revTreeList require.NoError(t, base.JSONUnmarshal(bytes, &storedMap)) From 12da4520eaae828c64488c8c18c7d5e20e4530d4 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 15:30:23 +0100 Subject: [PATCH 16/27] Change now false assertion in TestSyncFnOnPush --- db/database_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/db/database_test.go b/db/database_test.go index 98120a896d..39c04058f5 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -1464,7 +1464,10 @@ func TestSyncFnOnPush(t *testing.T) { "public": &channels.ChannelRemoval{Seq: 2, RevID: "4-four"}, }, doc.Channels) - assert.Equal(t, base.SetOf("clibup"), doc.History["4-four"].Channels) + // We no longer store channels for the winning revision in the RevTree, + // so don't expect it to be in doc.History like it used to be... + // The above assertion ensured the doc was *actually* in the correct channel. + assert.Nil(t, doc.History["4-four"].Channels) } func TestInvalidChannel(t *testing.T) { From 9359d53f60d6dc82560e086f044762ac0780d4c7 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 15:30:49 +0100 Subject: [PATCH 17/27] Handle winning revision logic in authorizeDoc --- db/crud.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/db/crud.go b/db/crud.go index fc5d147da4..0372307c27 100644 --- a/db/crud.go +++ b/db/crud.go @@ -537,7 +537,16 @@ func (col *DatabaseCollectionWithUser) authorizeDoc(doc *Document, revid string) if revid == "" { revid = doc.CurrentRev } - if rev := doc.History[revid]; rev != nil { + + if revid == doc.CurrentRev { + ch := base.SetOf() + for channelName, channelRemoval := range doc.Channels { + if channelRemoval == nil || channelRemoval.Seq == 0 { + ch.Add(channelName) + } + } + return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, ch) + } else if rev := doc.History[revid]; rev != nil { // Authenticate against specific revision: return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, rev.Channels) } else { From 29969e4150bf6deabdfff39463d718dfd293ef12 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Wed, 13 Sep 2023 15:41:47 +0100 Subject: [PATCH 18/27] Make revCacheLoaderForDocument work for winning revision channels not in history --- db/crud.go | 7 +------ db/document.go | 11 +++++++++++ db/revision_cache_interface.go | 7 ++++++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/db/crud.go b/db/crud.go index 0372307c27..abdaa9793a 100644 --- a/db/crud.go +++ b/db/crud.go @@ -539,12 +539,7 @@ func (col *DatabaseCollectionWithUser) authorizeDoc(doc *Document, revid string) } if revid == doc.CurrentRev { - ch := base.SetOf() - for channelName, channelRemoval := range doc.Channels { - if channelRemoval == nil || channelRemoval.Seq == 0 { - ch.Add(channelName) - } - } + ch := doc.currentChannels() return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, ch) } else if rev := doc.History[revid]; rev != nil { // Authenticate against specific revision: diff --git a/db/document.go b/db/document.go index d3c4f85b25..9c791f9146 100644 --- a/db/document.go +++ b/db/document.go @@ -1213,3 +1213,14 @@ func (doc *Document) MarshalWithXattr() (data []byte, xdata []byte, err error) { return data, xdata, nil } + +// Returns a set of the current (winning revision's) channels for the document. +func (doc *Document) currentChannels() base.Set { + ch := base.SetOf() + for channelName, channelRemoval := range doc.Channels { + if channelRemoval == nil || channelRemoval.Seq == 0 { + ch.Add(channelName) + } + } + return ch +} diff --git a/db/revision_cache_interface.go b/db/revision_cache_interface.go index cd8ba32b39..16f3d0a5b0 100644 --- a/db/revision_cache_interface.go +++ b/db/revision_cache_interface.go @@ -283,7 +283,12 @@ func revCacheLoaderForDocument(ctx context.Context, backingStore RevisionCacheBa return bodyBytes, body, history, channels, removed, nil, deleted, nil, getHistoryErr } history = encodeRevisions(ctx, doc.ID, validatedHistory) - channels = doc.History[revid].Channels + + if doc.CurrentRev == revid { + channels = doc.currentChannels() + } else { + channels = doc.History[revid].Channels + } return bodyBytes, body, history, channels, removed, attachments, deleted, doc.Expiry, err } From eaec520d3dd83c36f3e0642c7fb11c10fdcc3719 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 14 Sep 2023 09:47:29 +0100 Subject: [PATCH 19/27] Fix missing context --- db/revtree.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/revtree.go b/db/revtree.go index be0a9b6cd6..4ef7be53f0 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -63,7 +63,7 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } revIndexes := map[string]int{"": -1} - winner, _, _ := tree.winningRevision() + winner, _, _ := tree.winningRevision(context.TODO()) i := 0 for _, info := range tree { From 3bfaaeae01806cfccbf007741ab34f5a50e17299 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Mon, 18 Sep 2023 19:16:30 +0100 Subject: [PATCH 20/27] Store set of currentRevChannels on Document at unmarshal time --- db/crud.go | 18 ++++++------------ db/document.go | 17 +++++++++++++++++ db/revision_cache_interface.go | 6 ++---- db/revision_cache_test.go | 4 ++-- rest/access_test.go | 2 +- rest/changestest/changes_api_test.go | 2 +- 6 files changed, 29 insertions(+), 20 deletions(-) diff --git a/db/crud.go b/db/crud.go index abdaa9793a..5da9585240 100644 --- a/db/crud.go +++ b/db/crud.go @@ -529,25 +529,19 @@ func (db *DatabaseCollectionWithUser) Get1xRevAndChannels(ctx context.Context, d } // Returns an HTTP 403 error if the User is not allowed to access any of this revision's channels. -func (col *DatabaseCollectionWithUser) authorizeDoc(doc *Document, revid string) error { +func (col *DatabaseCollectionWithUser) authorizeDoc(ctx context.Context, doc *Document, revid string) error { user := col.user if doc == nil || user == nil { return nil // A nil User means access control is disabled } - if revid == "" { - revid = doc.CurrentRev - } - if revid == doc.CurrentRev { - ch := doc.currentChannels() - return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, ch) - } else if rev := doc.History[revid]; rev != nil { - // Authenticate against specific revision: - return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, rev.Channels) - } else { + channelsForRev, ok := doc.channelsForRev(revid) + if !ok { // No such revision; let the caller proceed and return a 404 return nil } + + return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, channelsForRev) } // Gets a revision of a document. If it's obsolete it will be loaded from the database if possible. @@ -685,7 +679,7 @@ func (db *DatabaseCollectionWithUser) getAncestorJSON(ctx context.Context, doc * // instead returns a minimal deletion or removal revision to let them know it's gone. func (db *DatabaseCollectionWithUser) get1xRevFromDoc(ctx context.Context, doc *Document, revid string, listRevisions bool) (bodyBytes []byte, removed bool, err error) { var attachments AttachmentsMeta - if err := db.authorizeDoc(doc, revid); err != nil { + if err := db.authorizeDoc(ctx, doc, revid); err != nil { // As a special case, you don't need channel access to see a deletion revision, // otherwise the client's replicator can't process the deletion (since deletions // usually aren't on any channels at all!) But don't show the full body. (See #59) diff --git a/db/document.go b/db/document.go index 9c791f9146..712cde3215 100644 --- a/db/document.go +++ b/db/document.go @@ -181,6 +181,8 @@ type Document struct { RevID string DocAttachments AttachmentsMeta inlineSyncData bool + + currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time) } type historyOnlySyncData struct { @@ -1214,6 +1216,21 @@ func (doc *Document) MarshalWithXattr() (data []byte, xdata []byte, err error) { return data, xdata, nil } +// channelsForRev returns the set of channels the given revision is in for the document +// Channel information is only stored for leaf nodes in the revision tree, as we don't keep full history of channel information +func (doc *Document) channelsForRev(revid string) (base.Set, bool) { + if revid == "" || doc.CurrentRev == revid { + return doc.currentRevChannels, true + } + + if rev, ok := doc.History[revid]; ok { + return rev.Channels, true + } + + // no rev + return nil, false +} + // Returns a set of the current (winning revision's) channels for the document. func (doc *Document) currentChannels() base.Set { ch := base.SetOf() diff --git a/db/revision_cache_interface.go b/db/revision_cache_interface.go index 16f3d0a5b0..31ff4d5ebe 100644 --- a/db/revision_cache_interface.go +++ b/db/revision_cache_interface.go @@ -284,10 +284,8 @@ func revCacheLoaderForDocument(ctx context.Context, backingStore RevisionCacheBa } history = encodeRevisions(ctx, doc.ID, validatedHistory) - if doc.CurrentRev == revid { - channels = doc.currentChannels() - } else { - channels = doc.History[revid].Channels + if revChannels, ok := doc.channelsForRev(revid); ok { + channels = revChannels } return bodyBytes, body, history, channels, removed, attachments, deleted, doc.Expiry, err diff --git a/db/revision_cache_test.go b/db/revision_cache_test.go index 1451d353d9..3bae02b261 100644 --- a/db/revision_cache_test.go +++ b/db/revision_cache_test.go @@ -139,7 +139,7 @@ func TestBackingStore(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "Jens", docRev.DocID) assert.NotNil(t, docRev.History) - assert.NotNil(t, docRev.Channels) + assert.NotNil(t, docRev.Channels) // FIXME (bbrks): Failing test w/ revcache changes assert.Equal(t, int64(0), cacheHitCounter.Value()) assert.Equal(t, int64(1), cacheMissCounter.Value()) assert.Equal(t, int64(1), getDocumentCounter.Value()) @@ -159,7 +159,7 @@ func TestBackingStore(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "Jens", docRev.DocID) assert.NotNil(t, docRev.History) - assert.NotNil(t, docRev.Channels) + assert.NotNil(t, docRev.Channels) // FIXME (bbrks): Failing test w/ revcache changes assert.Equal(t, int64(1), cacheHitCounter.Value()) assert.Equal(t, int64(2), cacheMissCounter.Value()) assert.Equal(t, int64(2), getDocumentCounter.Value()) diff --git a/rest/access_test.go b/rest/access_test.go index 8abbee7a55..62e033a7b5 100644 --- a/rest/access_test.go +++ b/rest/access_test.go @@ -716,7 +716,7 @@ func TestAllDocsAccessControl(t *testing.T) { allDocsResult = allDocsResponse{} err = base.JSONUnmarshal(response.Body.Bytes(), &allDocsResult) require.NoError(t, err) - require.Len(t, allDocsResult.Rows, 3) + require.Len(t, allDocsResult.Rows, 3) // FIXME (bbrks): Forbidden/403 with revcache changes assert.Equal(t, "doc3", allDocsResult.Rows[0].ID) assert.Equal(t, "doc4", allDocsResult.Rows[1].ID) assert.Equal(t, "doc5", allDocsResult.Rows[2].ID) diff --git a/rest/changestest/changes_api_test.go b/rest/changestest/changes_api_test.go index 441f3b748a..3df909a92e 100644 --- a/rest/changestest/changes_api_test.go +++ b/rest/changestest/changes_api_test.go @@ -2161,7 +2161,7 @@ func TestChangesIncludeDocs(t *testing.T) { var resultBody db.Body assert.NoError(t, expectedBody.Unmarshal(expectedChange.Doc)) assert.NoError(t, resultBody.Unmarshal(result.Doc)) - db.AssertEqualBodies(t, expectedBody, resultBody) + db.AssertEqualBodies(t, expectedBody, resultBody) // FIXME (bbrks): Missing channels after revcache changes } else { assert.Equal(t, expectedChange.Doc, result.Doc) } From 220c275a9656b7eb9c065f9b89fadd3c36516a64 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Thu, 2 Nov 2023 13:27:35 +0000 Subject: [PATCH 21/27] Move currentRevChannels logic down into SyncData type --- db/document.go | 35 +++++++++++++++++++++++++++++++++++ db/revision_cache_test.go | 10 +++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/db/document.go b/db/document.go index 712cde3215..a1c1c1ea85 100644 --- a/db/document.go +++ b/db/document.go @@ -95,6 +95,41 @@ type SyncData struct { addedRevisionBodies []string // revIDs of non-winning revision bodies that have been added (and so require persistence) removedRevisionBodyKeys map[string]string // keys of non-winning revisions that have been removed (and so may require deletion), indexed by revID + + currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time) +} + +func (sd *SyncData) UnmarshalJSON(b []byte) error { + + // type alias avoids UnmarshalJSON stack overflow (forces fallback to standard JSON unmarshal instead of this one) + type stdSyncData SyncData + var tmp stdSyncData + + err := base.JSONUnmarshal(b, &tmp) + if err != nil { + return err + } + + *sd = SyncData(tmp) + + // determine current revision's channels and store in-memory (avoids Channels iteration at access-check time) + sd.currentRevChannels = sd.getCurrentChannels() + + return nil +} + +// determine set of current channels based on removal entries. +func (sd *SyncData) getCurrentChannels() base.Set { + if len(sd.Channels) > 0 { + ch := base.SetOf() + for channelName, channelRemoval := range sd.Channels { + if channelRemoval == nil || channelRemoval.Seq == 0 { + ch.Add(channelName) + } + } + return ch + } + return nil } func (sd *SyncData) HashRedact(salt string) SyncData { diff --git a/db/revision_cache_test.go b/db/revision_cache_test.go index 3bae02b261..87089eafb8 100644 --- a/db/revision_cache_test.go +++ b/db/revision_cache_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/couchbase/sync_gateway/base" + "github.com/couchbase/sync_gateway/channels" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -47,9 +48,16 @@ func (t *testBackingStore) GetDocument(ctx context.Context, docid string, unmars doc.CurrentRev = "1-abc" doc.History = RevTree{ doc.CurrentRev: { - Channels: base.SetOf("*"), + ID: doc.CurrentRev, }, } + + doc.Channels = channels.ChannelMap{ + "*": &channels.ChannelRemoval{RevID: doc.CurrentRev}, + } + // currentRevChannels usually populated on JSON unmarshal + doc.currentRevChannels = doc.getCurrentChannels() + return doc, nil } From 52495bce86360debca3dc418358d9b80cc21cde9 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Fri, 3 Nov 2023 12:35:17 +0000 Subject: [PATCH 22/27] Remove old FIXMEs --- db/revision_cache_test.go | 4 ++-- rest/changestest/changes_api_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/db/revision_cache_test.go b/db/revision_cache_test.go index 87089eafb8..0ff4c23e07 100644 --- a/db/revision_cache_test.go +++ b/db/revision_cache_test.go @@ -147,7 +147,7 @@ func TestBackingStore(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "Jens", docRev.DocID) assert.NotNil(t, docRev.History) - assert.NotNil(t, docRev.Channels) // FIXME (bbrks): Failing test w/ revcache changes + assert.NotNil(t, docRev.Channels) assert.Equal(t, int64(0), cacheHitCounter.Value()) assert.Equal(t, int64(1), cacheMissCounter.Value()) assert.Equal(t, int64(1), getDocumentCounter.Value()) @@ -167,7 +167,7 @@ func TestBackingStore(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "Jens", docRev.DocID) assert.NotNil(t, docRev.History) - assert.NotNil(t, docRev.Channels) // FIXME (bbrks): Failing test w/ revcache changes + assert.NotNil(t, docRev.Channels) assert.Equal(t, int64(1), cacheHitCounter.Value()) assert.Equal(t, int64(2), cacheMissCounter.Value()) assert.Equal(t, int64(2), getDocumentCounter.Value()) diff --git a/rest/changestest/changes_api_test.go b/rest/changestest/changes_api_test.go index 3df909a92e..441f3b748a 100644 --- a/rest/changestest/changes_api_test.go +++ b/rest/changestest/changes_api_test.go @@ -2161,7 +2161,7 @@ func TestChangesIncludeDocs(t *testing.T) { var resultBody db.Body assert.NoError(t, expectedBody.Unmarshal(expectedChange.Doc)) assert.NoError(t, resultBody.Unmarshal(result.Doc)) - db.AssertEqualBodies(t, expectedBody, resultBody) // FIXME (bbrks): Missing channels after revcache changes + db.AssertEqualBodies(t, expectedBody, resultBody) } else { assert.Equal(t, expectedChange.Doc, result.Doc) } From fd3e91a91dcf5bd7415897c7db91bc113b8ef5c5 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Fri, 3 Nov 2023 12:46:44 +0000 Subject: [PATCH 23/27] simplify --- db/revtree.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/db/revtree.go b/db/revtree.go index 4ef7be53f0..5616879db9 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -181,15 +181,13 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { (*tree)[rep.Revs[i]] = info } - if len(rep.ChannelsMap) > 0 { - for iStr, channels := range rep.ChannelsMap { - i, err := strconv.ParseInt(iStr, 10, 64) - if err != nil { - return err - } - info := (*tree)[rep.Revs[i]] - info.Channels = channels + for iStr, channels := range rep.ChannelsMap { + i, err := strconv.ParseInt(iStr, 10, 64) + if err != nil { + return err } + info := (*tree)[rep.Revs[i]] + info.Channels = channels } // we shouldn't be in a situation where we have both channels and channelsMap populated, but we still need to handle the old format From eacbe39a3cef605ab77e4a85e6b02fc914c49631 Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 7 Nov 2023 18:07:15 +0000 Subject: [PATCH 24/27] Address PR comments (move logic into Unmarshal only - and don't set channel info for winning rev) --- db/blip_sync_context.go | 2 +- db/crud.go | 11 ++++++++--- db/database.go | 8 +++++--- db/document.go | 5 ++++- db/revtree.go | 8 ++++---- db/revtree_test.go | 40 +++++++++++++++++++++++++--------------- 6 files changed, 47 insertions(+), 27 deletions(-) diff --git a/db/blip_sync_context.go b/db/blip_sync_context.go index c3f0f59d59..1346c0198c 100644 --- a/db/blip_sync_context.go +++ b/db/blip_sync_context.go @@ -384,7 +384,7 @@ func (bsc *BlipSyncContext) handleChangesResponse(sender *blip.Sender, response sentSeqs = append(sentSeqs, seq) } } else { - base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s / %s (seq:%v)", base.UD(docID), revID, seq) + base.DebugfCtx(bsc.loggingCtx, base.KeySync, "Peer didn't want revision %s/%s (seq:%v)", base.UD(docID), revID, seq) if collectionCtx.sgr2PushAlreadyKnownSeqsCallback != nil { alreadyKnownSeqs = append(alreadyKnownSeqs, seq) } diff --git a/db/crud.go b/db/crud.go index 5da9585240..10a978f6ec 100644 --- a/db/crud.go +++ b/db/crud.go @@ -1752,7 +1752,8 @@ func (col *DatabaseCollectionWithUser) documentUpdateFunc(ctx context.Context, d return } - if len(channelSet) > 0 { + isWinningRev := doc.CurrentRev == newRevID + if len(channelSet) > 0 && !isWinningRev { doc.History[newRevID].Channels = channelSet } @@ -1779,7 +1780,7 @@ func (col *DatabaseCollectionWithUser) documentUpdateFunc(ctx context.Context, d return } } - _, err = doc.updateChannels(ctx, channelSet) + _, err = doc.updateChannels(ctx, isWinningRev, channelSet) if err != nil { return } @@ -2008,7 +2009,11 @@ func (db *DatabaseCollectionWithUser) updateAndReturnDoc(ctx context.Context, do return nil, "", err } - revChannels := doc.History[newRevID].Channels + revChannels, ok := doc.channelsForRev(newRevID) + if !ok { + // Should be unreachable, as we've already checked History[newRevID] above ... + return nil, "", base.RedactErrorf("unable to determine channels for %s/%s", base.UD(docid), newRevID) + } documentRevision := DocumentRevision{ DocID: docid, RevID: newRevID, diff --git a/db/database.go b/db/database.go index 8508e4bbf3..a683f49f3b 100644 --- a/db/database.go +++ b/db/database.go @@ -1734,9 +1734,11 @@ func (db *DatabaseCollectionWithUser) getResyncedDocument(ctx context.Context, d access = nil channels = nil } - rev.Channels = channels - if rev.ID == doc.CurrentRev { + isWinningRev := rev.ID == doc.CurrentRev + if !isWinningRev { + rev.Channels = channels + } else { if regenerateSequences { updatedUnusedSequences, err = db.assignSequence(ctx, 0, doc, unusedSequences) if err != nil { @@ -1745,7 +1747,7 @@ func (db *DatabaseCollectionWithUser) getResyncedDocument(ctx context.Context, d forceUpdate = true } - changedChannels, err := doc.updateChannels(ctx, channels) + changedChannels, err := doc.updateChannels(ctx, isWinningRev, channels) changed = len(doc.Access.updateAccess(ctx, doc, access)) + len(doc.RoleAccess.updateAccess(ctx, doc, roles)) + len(changedChannels) diff --git a/db/document.go b/db/document.go index a1c1c1ea85..7dca3c148c 100644 --- a/db/document.go +++ b/db/document.go @@ -977,7 +977,7 @@ func (doc *Document) addToChannelSetHistory(channelName string, historyEntry Cha // Updates the Channels property of a document object with current & past channels. // Returns the set of channels that have changed (document joined or left in this revision) -func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) (changedChannels base.Set, err error) { +func (doc *Document) updateChannels(ctx context.Context, isWinningRev bool, newChannels base.Set) (changedChannels base.Set, err error) { var changed []string oldChannels := doc.Channels if oldChannels == nil { @@ -1006,6 +1006,9 @@ func (doc *Document) updateChannels(ctx context.Context, newChannels base.Set) ( doc.updateChannelHistory(channel, doc.Sequence, true) } } + if isWinningRev { + doc.SyncData.currentRevChannels = newChannels + } if changed != nil { base.InfofCtx(ctx, base.KeyCRUD, "\tDoc %q / %q in channels %q", base.UD(doc.ID), doc.CurrentRev, base.UD(newChannels)) changedChannels, err = channels.SetFromArray(changed, channels.KeepStar) diff --git a/db/revtree.go b/db/revtree.go index 5616879db9..42367bd763 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -63,8 +63,6 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } revIndexes := map[string]int{"": -1} - winner, _, _ := tree.winningRevision(context.TODO()) - i := 0 for _, info := range tree { revIndexes[info.ID] = i @@ -86,7 +84,7 @@ func (tree RevTree) MarshalJSON() ([]byte, error) { } // for non-winning leaf revisions we'll store channel information - if winner != info.ID && len(info.Channels) > 0 { + if len(info.Channels) > 0 { if rep.ChannelsMap == nil { rep.ChannelsMap = make(map[string]base.Set, 1) } @@ -192,10 +190,12 @@ func (tree *RevTree) UnmarshalJSON(inputjson []byte) (err error) { // we shouldn't be in a situation where we have both channels and channelsMap populated, but we still need to handle the old format if len(rep.Channels_Old) > 0 { + winner, _, _ := tree.winningRevision(context.TODO()) leaves := tree.Leaves() for i, channels := range rep.Channels_Old { info := (*tree)[rep.Revs[i]] - if _, isLeaf := leaves[info.ID]; isLeaf { + // Ensure this leaf is not a winner (channels not stored in revtree for winning revision) + if _, isLeaf := leaves[info.ID]; isLeaf && info.ID != winner { // set only if we've not already populated from ChannelsMap (we shouldn't ever have both) if info.Channels != nil { return fmt.Errorf("RevTree leaf %q had channels set already (from channelsMap)", info.ID) diff --git a/db/revtree_test.go b/db/revtree_test.go index cb6044fc4a..9aa0f8ebe8 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -229,17 +229,18 @@ func TestRevTreeUnmarshalOldFormat(t *testing.T) { "bodies": ["{\"foo\":\"bar\"}", "", ""], "channels": [["ABC", "CBS"], null, ["ABC"]] }` - tree := testmap.copy() + expected := testmap.copy() - // set desired channels - ri, err := tree.getInfo("3-three") + // winning rev channel information no longer present in revtree, + // expect to drop it at unmarshal time if it's still in the raw JSON + ri, err := expected.getInfo("3-three") require.NoError(t, err) - ri.Channels = base.SetOf("ABC", "CBS") + ri.Channels = nil - assertRevTreeUnmarshal(t, testJSON, tree) + assertRevTreeUnmarshal(t, testJSON, expected) } -func TestRevTreeUnmarshalChannelMap(t *testing.T) { +func TestRevTreeUnmarshalOldFormatNonWinningRev(t *testing.T) { // we moved non-winning leaf revision channel information into a 'channelMap' property instead to handle the case where documents are in conflict. const testJSON = `{ "revs": ["3-drei", "3-three", "2-two", "1-one"], @@ -247,14 +248,16 @@ func TestRevTreeUnmarshalChannelMap(t *testing.T) { "bodies": ["{\"foo\":\"buzz\"}", "{\"foo\":\"bar\"}", "", ""], "channels": [["DE"], ["EN"], null, ["ABC"]] }` - tree := testmap.copy() + expected := testmap.copy() - // set desired channels - ri, err := tree.getInfo("3-three") + // winning rev channel information no longer present in revtree, + // expect to drop it at unmarshal time if it's still in the raw JSON + ri, err := expected.getInfo("3-three") require.NoError(t, err) - ri.Channels = base.SetOf("EN") + ri.Channels = nil - err = tree.addRevision("", RevInfo{ + // non-winning revisions do retain channel information, so populate this for the expected expected + err = expected.addRevision("", RevInfo{ ID: "3-drei", Parent: "2-two", Body: []byte(`{"foo":"buzz"}`), @@ -262,7 +265,7 @@ func TestRevTreeUnmarshalChannelMap(t *testing.T) { }) require.NoError(t, err) - assertRevTreeUnmarshal(t, testJSON, tree) + assertRevTreeUnmarshal(t, testJSON, expected) } func TestRevTreeUnmarshal(t *testing.T) { @@ -390,15 +393,22 @@ func TestRevTreeChannelMapLeafOnly(t *testing.T) { // └─│ 3-drei (DE) │ // └─────────────┘ tree := branchymap.copy() - err := tree.addRevision(t.Name(), RevInfo{ + + ri, err := tree.getInfo("3-three") + require.NoError(t, err) + // this wouldn't have been set normally (winning rev), + // but let's force it to ensure we're not storing old revs in ChannelsMap + ri.Channels = base.SetOf("EN") + + err = tree.addRevision(t.Name(), RevInfo{ ID: "4-four", Parent: "3-three", - Channels: base.SetOf("EN-us", "EN-gb"), + Channels: nil, // we don't store channels for winning revs }) require.NoError(t, err) // insert channel into tree - we don't store it in the globals because each test requires different channel data. - ri, err := tree.getInfo("3-drei") + ri, err = tree.getInfo("3-drei") require.NoError(t, err) ri.Channels = base.SetOf("DE") From 25c2fab2873c360a7e56540c90ab8d0a65b07c9f Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 21 Nov 2023 15:40:04 +0000 Subject: [PATCH 25/27] Return 404 from authorizeDoc when an uncached non-leaf rev is requested --- db/crud.go | 8 ++++-- db/database_test.go | 64 +++++++++++++++++++++++++++++++++++++++++++++ db/document.go | 13 ++++----- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/db/crud.go b/db/crud.go index 10a978f6ec..46e9bfc0d6 100644 --- a/db/crud.go +++ b/db/crud.go @@ -537,8 +537,12 @@ func (col *DatabaseCollectionWithUser) authorizeDoc(ctx context.Context, doc *Do channelsForRev, ok := doc.channelsForRev(revid) if !ok { - // No such revision; let the caller proceed and return a 404 + // No such revision + // let the caller proceed and return a 404 return nil + } else if channelsForRev == nil { + // non-leaf (no channel info) - force 404 (caller would find the rev if it tried to look) + return ErrMissing } return col.user.AuthorizeAnyCollectionChannel(col.ScopeName, col.Name, channelsForRev) @@ -686,7 +690,7 @@ func (db *DatabaseCollectionWithUser) get1xRevFromDoc(ctx context.Context, doc * // Update: this applies to non-deletions too, since the client may have lost access to // the channel and gotten a "removed" entry in the _changes feed. It then needs to // incorporate that tombstone and for that it needs to see the _revisions property. - if revid == "" || doc.History[revid] == nil { + if revid == "" || doc.History[revid] == nil || err == ErrMissing { return nil, false, err } if doc.History[revid].Deleted { diff --git a/db/database_test.go b/db/database_test.go index 39c04058f5..8d17557caf 100644 --- a/db/database_test.go +++ b/db/database_test.go @@ -457,6 +457,70 @@ func TestIsServerless(t *testing.T) { } } +func TestUncachedOldRevisionChannel(t *testing.T) { + db, ctx := setupTestDB(t) + defer db.Close(ctx) + collection := GetSingleDatabaseCollectionWithUser(t, db) + collection.ChannelMapper = channels.NewChannelMapper(ctx, channels.DocChannelsSyncFunction, db.Options.JavascriptTimeout) + + auth := db.Authenticator(base.TestCtx(t)) + + userAlice, err := auth.NewUser("alice", "pass", base.SetOf("ABC")) + require.NoError(t, err, "Error creating user") + + collection.user = userAlice + + // Create the first revision of doc1. + rev1Body := Body{ + "k1": "v1", + "channels": []string{"ABC"}, + } + rev1ID, _, err := collection.Put(ctx, "doc1", rev1Body) + require.NoError(t, err, "Error creating doc") + + rev2Body := Body{ + "k2": "v2", + "channels": []string{"ABC"}, + BodyRev: rev1ID, + } + rev2ID, _, err := collection.Put(ctx, "doc1", rev2Body) + require.NoError(t, err, "Error creating doc") + + rev3Body := Body{ + "k3": "v3", + "channels": []string{"ABC"}, + BodyRev: rev2ID, + } + rev3ID, _, err := collection.Put(ctx, "doc1", rev3Body) + require.NoError(t, err, "Error creating doc") + require.NotEmpty(t, rev3ID, "Error creating doc") + + body, err := collection.Get1xRevBody(ctx, "doc1", rev2ID, true, nil) + require.NoError(t, err, "Error getting 1x rev body") + + // old rev was cached so still retains channel information + _, rev1Digest := ParseRevID(ctx, rev1ID) + _, rev2Digest := ParseRevID(ctx, rev2ID) + bodyExpected := Body{ + "k2": "v2", + "channels": []string{"ABC"}, + BodyRevisions: Revisions{ + RevisionsStart: 2, + RevisionsIds: []string{rev2Digest, rev1Digest}, + }, + BodyId: "doc1", + BodyRev: rev2ID, + } + require.Equal(t, bodyExpected, body) + + // Flush the revision cache to force load from backup revision + collection.FlushRevisionCacheForTest() + + // 404 because we lost the non-leaf channel information after cache flush + _, _, _, _, _, _, _, _, err = collection.Get1xRevAndChannels(ctx, "doc1", rev2ID, false) + assertHTTPError(t, err, 404) +} + // Test removal handling for unavailable multi-channel revisions. func TestGetRemovalMultiChannel(t *testing.T) { db, ctx := setupTestDB(t) diff --git a/db/document.go b/db/document.go index 7dca3c148c..e6eba19962 100644 --- a/db/document.go +++ b/db/document.go @@ -120,16 +120,13 @@ func (sd *SyncData) UnmarshalJSON(b []byte) error { // determine set of current channels based on removal entries. func (sd *SyncData) getCurrentChannels() base.Set { - if len(sd.Channels) > 0 { - ch := base.SetOf() - for channelName, channelRemoval := range sd.Channels { - if channelRemoval == nil || channelRemoval.Seq == 0 { - ch.Add(channelName) - } + ch := base.SetOf() + for channelName, channelRemoval := range sd.Channels { + if channelRemoval == nil || channelRemoval.Seq == 0 { + ch.Add(channelName) } - return ch } - return nil + return ch } func (sd *SyncData) HashRedact(salt string) SyncData { From 1c0e4cdc8d13e2af53cad869bf48dd3e578deefe Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 21 Nov 2023 16:58:29 +0000 Subject: [PATCH 26/27] use errors.Is instead of == --- db/crud.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/db/crud.go b/db/crud.go index 46e9bfc0d6..0b656ae80d 100644 --- a/db/crud.go +++ b/db/crud.go @@ -11,6 +11,7 @@ package db import ( "bytes" "context" + "errors" "fmt" "math" "net/http" @@ -21,7 +22,7 @@ import ( "github.com/couchbase/sync_gateway/auth" "github.com/couchbase/sync_gateway/base" "github.com/couchbase/sync_gateway/channels" - "github.com/pkg/errors" + pkgerrors "github.com/pkg/errors" ) const ( @@ -690,7 +691,7 @@ func (db *DatabaseCollectionWithUser) get1xRevFromDoc(ctx context.Context, doc * // Update: this applies to non-deletions too, since the client may have lost access to // the channel and gotten a "removed" entry in the _changes feed. It then needs to // incorporate that tombstone and for that it needs to see the _revisions property. - if revid == "" || doc.History[revid] == nil || err == ErrMissing { + if revid == "" || doc.History[revid] == nil || errors.Is(err, ErrMissing) { return nil, false, err } if doc.History[revid].Deleted { @@ -1559,7 +1560,7 @@ func (db *DatabaseCollectionWithUser) addAttachments(ctx context.Context, newAtt if errors.Is(err, ErrAttachmentTooLarge) || err.Error() == "document value was too large" { err = base.HTTPErrorf(http.StatusRequestEntityTooLarge, "Attachment too large") } else { - err = errors.Wrap(err, "Error adding attachment") + err = pkgerrors.Wrap(err, "Error adding attachment") } } return err From 5698d3e5ca6a6756fad374243f633807d788ec4c Mon Sep 17 00:00:00 2001 From: Ben Brooks Date: Tue, 9 Jan 2024 11:52:18 +0000 Subject: [PATCH 27/27] Fix duplicate UnmarshalJSON func for SyncData --- db/document.go | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/db/document.go b/db/document.go index e6eba19962..b67de0336a 100644 --- a/db/document.go +++ b/db/document.go @@ -99,25 +99,6 @@ type SyncData struct { currentRevChannels base.Set // A base.Set of the current revision's channels (determined by SyncData.Channels at UnmarshalJSON time) } -func (sd *SyncData) UnmarshalJSON(b []byte) error { - - // type alias avoids UnmarshalJSON stack overflow (forces fallback to standard JSON unmarshal instead of this one) - type stdSyncData SyncData - var tmp stdSyncData - - err := base.JSONUnmarshal(b, &tmp) - if err != nil { - return err - } - - *sd = SyncData(tmp) - - // determine current revision's channels and store in-memory (avoids Channels iteration at access-check time) - sd.currentRevChannels = sd.getCurrentChannels() - - return nil -} - // determine set of current channels based on removal entries. func (sd *SyncData) getCurrentChannels() base.Set { ch := base.SetOf()