tulip · eparker-tulip · Nov 12, 2024 · Oct 30, 2024 · Nov 8, 2024 · Nov 12, 2024
diff --git a/lib/oplog/tail.go b/lib/oplog/tail.go
@@ -37,12 +37,10 @@ type Tailer struct {
 // Raw oplog entry from Mongo
 type rawOplogEntry struct {
 	Timestamp    primitive.Timestamp `bson:"ts"`
-	HistoryID    int64               `bson:"h"`
-	MongoVersion int                 `bson:"v"`
 	Operation    string              `bson:"op"`
 	Namespace    string              `bson:"ns"`
 	Doc          bson.Raw            `bson:"o"`
-	Update       rawOplogEntryID     `bson:"o2"`
+	Update       bson.Raw	    	 `bson:"o2"`
 }
 
 // Parsed Cursor Result
@@ -52,10 +50,6 @@ type cursorResultStatus struct {
 	DidLosePosition bool
 }
 
-type rawOplogEntryID struct {
-	ID interface{} `bson:"_id"`
-}
-
 const requeryDuration = time.Second
 
 var (
@@ -214,7 +208,7 @@ func (tailer *Tailer) tailOnce(out []PublisherChannels, stop <-chan bool, readOr
 					continue
 				}
 
-				ts, pubs, sendMetricsData := tailer.unmarshalEntry(rawData, tailer.Denylist, readOrdinal)
+				ts, pubs, sendMetricsData := tailer.processEntry(rawData, readOrdinal)
 
 				if ts != nil {
 					lastTimestamp = *ts
@@ -366,16 +360,13 @@ func closeCursor(cursor *mongo.Cursor) {
 	}
 }
 
-// unmarshalEntry unmarshals a single entry from the oplog.
+// processEntry processes a single entry from the oplog.
 //
 // The timestamp of the entry is returned so that tailOnce knows the timestamp of the last entry it read, even if it
 // ignored it or failed at some later step.
-func (tailer *Tailer) unmarshalEntry(rawData bson.Raw, denylist *sync.Map, readOrdinal int) (timestamp *primitive.Timestamp, pubs []*redispub.Publication, sendMetricsData func()) {
-	var result rawOplogEntry
-
-	err := bson.Unmarshal(rawData, &result)
-	if err != nil {
-		log.Log.Errorw("Error unmarshalling oplog entry", "error", err)
+func (tailer *Tailer) processEntry(rawData bson.Raw, readOrdinal int) (timestamp *primitive.Timestamp, pubs []*redispub.Publication, sendMetricsData func()) {
+	result := tailer.unmarshalEntryMetadata(rawData)
+	if result == nil {
 		return
 	}
 
@@ -387,6 +378,10 @@ func (tailer *Tailer) unmarshalEntry(rawData bson.Raw, denylist *sync.Map, readO
 	status := "ignored"
 	database := "(no database)"
 	messageLen := float64(len(rawData))
+
+	if len(entries) > 0 {
+		database = entries[0].Database
+	}
 
 	sendMetricsData = func() {
 		// TODO: remove these in a future version
@@ -398,17 +393,6 @@ func (tailer *Tailer) unmarshalEntry(rawData bson.Raw, denylist *sync.Map, readO
 		metricLastReceivedStaleness.WithLabelValues(strconv.Itoa(readOrdinal)).Set(float64(time.Since(time.Unix(int64(timestamp.T), 0))))
 	}
 
-	if len(entries) > 0 {
-		database = entries[0].Database
-	}
-
-	if _, denied := denylist.Load(database); denied {
-		log.Log.Debugw("Skipping oplog entry", "database", database)
-		metricOplogEntriesFiltered.WithLabelValues(database).Add(1)
-
-		return
-	}
-
 	type errEntry struct {
 		err error
 		op  *oplogEntry
@@ -483,8 +467,87 @@ func (tailer *Tailer) getStartTime(maxOrdinal int, getTimestampOfLastOplogEntry
 	return primitive.Timestamp{T: uint32(time.Now().Unix() << 32)}
 }
 
+func parseID(idRaw bson.RawValue) (id interface{}, err error) {
+	if idRaw.IsZero() {
+		log.Log.Error("failed to get objectId: _id is empty or not set")
+		err = errors.New("empty or missing objectId")
+		return
+	}
+	err = idRaw.Unmarshal(&id)
+	if err != nil {
+		log.Log.Errorf("failed to unmarshal objectId: %v", err)
+	}
+	return
+}
+
+// unmarshalEntryMetadata processes the top-level data from an entry and returns a rawOplogEntry object.
+// This avoids using bson.Unmarshal on the whole document as that has very poor performance, even with the
+// bson.Raw type to limit depth.  While messy, using these raw bson methods here provides far better performance.
+func (tailer *Tailer) unmarshalEntryMetadata(rawData bson.Raw) *rawOplogEntry {
+	var result rawOplogEntry
+	var ok bool
+	nsLookup, err := rawData.LookupErr("ns");
+	if err == nil {
+		result.Namespace, ok = nsLookup.StringValueOK()
+		if !ok {
+			// this means there was a type mismatch
+			log.Log.Error("Error unmarshalling oplog namespace entry")
+			return nil
+		}
+	}
+
+	// filter if db is in denylist
+	if len(result.Namespace) > 0 && result.Namespace != "admin.$cmd" {
+		db, _ := parseNamespace(result.Namespace)
+		if _, denied := tailer.Denylist.Load(db); denied {
+			log.Log.Debugw("Skipping oplog entry", "database", db)
+			metricOplogEntriesFiltered.WithLabelValues(db).Add(1)
+			return nil
+		}
+	}
+
+	tsLookup, err := rawData.LookupErr("ts")
+	if err == nil {
+		t, i, ok := tsLookup.TimestampOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog timestamp entry")
+			return nil
+		}
+		result.Timestamp = primitive.Timestamp{T: t, I: i}
+	}
+
+	opLookup, err := rawData.LookupErr("op")
+	if err == nil {
+		result.Operation, ok = opLookup.StringValueOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog operation entry")
+			return nil
+		}
+	}
+
+	oLookup, err := rawData.LookupErr("o")
+	if err == nil {
+		result.Doc, ok = oLookup.DocumentOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog document entry")
+			return nil
+		}
+	}
+
+	o2Lookup, err := rawData.LookupErr("o2")
+	if err == nil {
+		result.Update, ok = o2Lookup.DocumentOK()
+		if !ok {
+			log.Log.Error("Error unmarshalling oplog update entry")
+			return nil
+		}
+	}
+
+	return &result
+}
+
 // converts a rawOplogEntry to an oplogEntry
-func (tailer *Tailer) parseRawOplogEntry(entry rawOplogEntry, txIdx *uint) []oplogEntry {
+func (tailer *Tailer) parseRawOplogEntry(entry *rawOplogEntry, txIdx *uint) []oplogEntry {
 	if txIdx == nil {
 		idx := uint(0)
 		txIdx = &idx
@@ -505,19 +568,14 @@ func (tailer *Tailer) parseRawOplogEntry(entry rawOplogEntry, txIdx *uint) []opl
 
 		out.Database, out.Collection = parseNamespace(out.Namespace)
 
+		var errID error
 		if out.Operation == operationUpdate {
-			out.DocID = entry.Update.ID
+			out.DocID, errID = parseID(entry.Update.Lookup("_id"))
 		} else {
-			idLookup := entry.Doc.Lookup("_id")
-			if idLookup.IsZero() {
-				log.Log.Error("failed to get objectId: _id is empty or not set")
-				return nil
-			}
-			err := idLookup.Unmarshal(&out.DocID)
-			if err != nil {
-				log.Log.Errorf("failed to unmarshal objectId: %v", err)
-				return nil
-			}
+			out.DocID, errID = parseID(entry.Doc.Lookup("_id"))
+		}
+		if errID != nil {
+			return nil
 		}
 
 		return []oplogEntry{out}
@@ -527,20 +585,36 @@ func (tailer *Tailer) parseRawOplogEntry(entry rawOplogEntry, txIdx *uint) []opl
 			return nil
 		}
 
-		var txData struct {
-			ApplyOps []rawOplogEntry `bson:"applyOps"`
+		applyOpsLookup, err := entry.Doc.LookupErr("applyOps")
+		if err != nil {
+			log.Log.Errorf("Looking up transaction data: %v", err)
+			return nil
+		}
+
+		applyOpsValues, ok := applyOpsLookup.ArrayOK()
+		if !ok {
+			log.Log.Error("Failed to access transaction data as array")
+			return nil
 		}
 
-		if err := bson.Unmarshal(entry.Doc, &txData); err != nil {
-			log.Log.Errorf("unmarshaling transaction data: %v", err)
+		applyOpsArray, err := applyOpsValues.Values()
+		if err != nil {
+			log.Log.Errorf("Getting transaction ops array: %v", err)
 			return nil
 		}
 
 		var ret []oplogEntry
-
-		for _, v := range txData.ApplyOps {
-			v.Timestamp = entry.Timestamp
-			ret = append(ret, tailer.parseRawOplogEntry(v, txIdx)...)
+		for _, rawEntry := range applyOpsArray {
+			rawDoc, ok := rawEntry.DocumentOK()
+			if ok {
+				v := tailer.unmarshalEntryMetadata(rawDoc)
+				if v != nil {
+					v.Timestamp = entry.Timestamp
+					ret = append(ret, tailer.parseRawOplogEntry(v, txIdx)...)
+				}
+			} else {
+				log.Log.Error("Getting transaction op doc")
+			}
 		}
 
 		return ret

diff --git a/lib/oplog/tail_test.go b/lib/oplog/tail_test.go
@@ -104,16 +104,6 @@ func TestGetStartTime(t *testing.T) {
 	}
 }
 
-func mustRaw(t *testing.T, data interface{}) bson.Raw {
-	b, err := bson.Marshal(data)
-	require.NoError(t, err)
-
-	var raw bson.Raw
-	require.NoError(t, bson.Unmarshal(b, &raw))
-
-	return raw
-}
-
 func TestParseRawOplogEntry(t *testing.T) {
 	tests := map[string]struct {
 		in   rawOplogEntry
@@ -124,7 +114,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "i",
 				Namespace: "foo.Bar",
-				Doc:       mustRaw(t, map[string]interface{}{"_id": "someid", "foo": "bar"}),
+				Doc:       rawBson(t, map[string]interface{}{"_id": "someid", "foo": "bar"}),
 			},
 			want: []oplogEntry{{
 				Timestamp:  primitive.Timestamp{T: 1234},
@@ -141,8 +131,8 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "u",
 				Namespace: "foo.Bar",
-				Doc:       mustRaw(t, map[string]interface{}{"new": "data"}),
-				Update:    rawOplogEntryID{ID: "updateid"},
+				Doc:       rawBson(t, map[string]interface{}{"new": "data"}),
+				Update:    rawBson(t, map[string]interface{}{"_id": "updateid"}),
 			},
 			want: []oplogEntry{{
 				Timestamp:  primitive.Timestamp{T: 1234},
@@ -159,7 +149,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "d",
 				Namespace: "foo.Bar",
-				Doc:       mustRaw(t, map[string]interface{}{"_id": "someid"}),
+				Doc:       rawBson(t, map[string]interface{}{"_id": "someid"}),
 			},
 			want: []oplogEntry{{
 				Timestamp:  primitive.Timestamp{T: 1234},
@@ -176,7 +166,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "c",
 				Namespace: "foo.$cmd",
-				Doc:       mustRaw(t, map[string]interface{}{"drop": "Foo"}),
+				Doc:       rawBson(t, map[string]interface{}{"drop": "Foo"}),
 			},
 			want: nil,
 		},
@@ -185,47 +175,51 @@ func TestParseRawOplogEntry(t *testing.T) {
 				Timestamp: primitive.Timestamp{T: 1234},
 				Operation: "c",
 				Namespace: "admin.$cmd",
-				Doc: mustRaw(t, map[string]interface{}{
+				Doc: rawBson(t, map[string]interface{}{
 					"applyOps": []rawOplogEntry{
 						{
 							Timestamp: primitive.Timestamp{T: 1234},
 							Operation: "c",
 							Namespace: "admin.$cmd",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"applyOps": []rawOplogEntry{
 									{
 										Operation: "i",
 										Namespace: "foo.Bar",
-										Doc: mustRaw(t, map[string]interface{}{
+										Doc: rawBson(t, map[string]interface{}{
 											"_id": "id1",
 											"foo": "baz",
 										}),
+										Update: rawBson(t, map[string]interface{}{}),
 									},
 								},
 							}),
+							Update: rawBson(t, map[string]interface{}{}),
 						},
 						{
 							Operation: "i",
 							Namespace: "foo.Bar",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"_id": "id1",
 								"foo": "bar",
 							}),
+							Update: rawBson(t, map[string]interface{}{}),
 						},
 						{
 							Operation: "u",
 							Namespace: "foo.Bar",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"foo": "quux",
 							}),
-							Update: rawOplogEntryID{"id2"},
+							Update: rawBson(t, map[string]interface{}{"_id": "id2"}),
 						},
 						{
 							Operation: "d",
 							Namespace: "foo.Bar",
-							Doc: mustRaw(t, map[string]interface{}{
+							Doc: rawBson(t, map[string]interface{}{
 								"_id": "id3",
 							}),
+							Update: rawBson(t, map[string]interface{}{}),
 						},
 					},
 				}),
@@ -287,7 +281,7 @@ func TestParseRawOplogEntry(t *testing.T) {
 
 	for testName, test := range tests {
 		t.Run(testName, func(t *testing.T) {
-			got := (&Tailer{Denylist: &sync.Map{}}).parseRawOplogEntry(test.in, nil)
+			got := (&Tailer{Denylist: &sync.Map{}}).parseRawOplogEntry(&test.in, nil)
 
 			if diff := pretty.Compare(parseEntry(t, got), parseEntry(t, test.want)); diff != "" {
 				t.Errorf("Got incorrect result (-got +want)\n%s", diff)