From 3c04ec529fd86ce6bd5518f1fc571d53a5cec2e5 Mon Sep 17 00:00:00 2001 From: Jason Wilder Date: Fri, 1 Jul 2016 20:22:54 -0600 Subject: [PATCH] Fix panic in full compactions due to duplciate data in blocks Due to a bug in compactions, it's possible some blocks may have duplicate points stored. If those blocks are decoded and re-compacted, an assertion panic could trigger. We now dedup those blocks if necessary to remove the duplicate points and avoid the panic. --- tsdb/engine/tsm1/encoding.gen.go | 115 ++++++++++++++++++++++++++ tsdb/engine/tsm1/encoding.gen.go.tmpl | 23 ++++++ tsdb/engine/tsm1/encoding_test.go | 35 +++++++- 3 files changed, 172 insertions(+), 1 deletion(-) diff --git a/tsdb/engine/tsm1/encoding.gen.go b/tsdb/engine/tsm1/encoding.gen.go index 0b159f7fc16..eb993a8395f 100644 --- a/tsdb/engine/tsm1/encoding.gen.go +++ b/tsdb/engine/tsm1/encoding.gen.go @@ -30,6 +30,18 @@ func (a Values) Size() int { return sz } +func (a Values) ordered() bool { + if len(a) <= 1 { + return true + } + for i := 1; i < len(a); i++ { + if av, ab := a[i-1].UnixNano(), a[i].UnixNano(); av >= ab { + return false + } + } + return true +} + func (a Values) assertOrdered() { if len(a) <= 1 { return @@ -98,6 +110,17 @@ func (a Values) Merge(b Values) Values { return a } + // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's + // possible stored blocks might contain duplicate values. Remove them if they exists before + // merging. + if !a.ordered() { + a = a.Deduplicate() + } + + if !b.ordered() { + b = b.Deduplicate() + } + if a[len(a)-1].UnixNano() < b[0].UnixNano() { return append(a, b...) } @@ -178,6 +201,18 @@ func (a FloatValues) Size() int { return sz } +func (a FloatValues) ordered() bool { + if len(a) <= 1 { + return true + } + for i := 1; i < len(a); i++ { + if av, ab := a[i-1].UnixNano(), a[i].UnixNano(); av >= ab { + return false + } + } + return true +} + func (a FloatValues) assertOrdered() { if len(a) <= 1 { return @@ -246,6 +281,17 @@ func (a FloatValues) Merge(b FloatValues) FloatValues { return a } + // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's + // possible stored blocks might contain duplicate values. Remove them if they exists before + // merging. + if !a.ordered() { + a = a.Deduplicate() + } + + if !b.ordered() { + b = b.Deduplicate() + } + if a[len(a)-1].UnixNano() < b[0].UnixNano() { return append(a, b...) } @@ -326,6 +372,18 @@ func (a IntegerValues) Size() int { return sz } +func (a IntegerValues) ordered() bool { + if len(a) <= 1 { + return true + } + for i := 1; i < len(a); i++ { + if av, ab := a[i-1].UnixNano(), a[i].UnixNano(); av >= ab { + return false + } + } + return true +} + func (a IntegerValues) assertOrdered() { if len(a) <= 1 { return @@ -394,6 +452,17 @@ func (a IntegerValues) Merge(b IntegerValues) IntegerValues { return a } + // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's + // possible stored blocks might contain duplicate values. Remove them if they exists before + // merging. + if !a.ordered() { + a = a.Deduplicate() + } + + if !b.ordered() { + b = b.Deduplicate() + } + if a[len(a)-1].UnixNano() < b[0].UnixNano() { return append(a, b...) } @@ -474,6 +543,18 @@ func (a StringValues) Size() int { return sz } +func (a StringValues) ordered() bool { + if len(a) <= 1 { + return true + } + for i := 1; i < len(a); i++ { + if av, ab := a[i-1].UnixNano(), a[i].UnixNano(); av >= ab { + return false + } + } + return true +} + func (a StringValues) assertOrdered() { if len(a) <= 1 { return @@ -542,6 +623,17 @@ func (a StringValues) Merge(b StringValues) StringValues { return a } + // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's + // possible stored blocks might contain duplicate values. Remove them if they exists before + // merging. + if !a.ordered() { + a = a.Deduplicate() + } + + if !b.ordered() { + b = b.Deduplicate() + } + if a[len(a)-1].UnixNano() < b[0].UnixNano() { return append(a, b...) } @@ -622,6 +714,18 @@ func (a BooleanValues) Size() int { return sz } +func (a BooleanValues) ordered() bool { + if len(a) <= 1 { + return true + } + for i := 1; i < len(a); i++ { + if av, ab := a[i-1].UnixNano(), a[i].UnixNano(); av >= ab { + return false + } + } + return true +} + func (a BooleanValues) assertOrdered() { if len(a) <= 1 { return @@ -690,6 +794,17 @@ func (a BooleanValues) Merge(b BooleanValues) BooleanValues { return a } + // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's + // possible stored blocks might contain duplicate values. Remove them if they exists before + // merging. + if !a.ordered() { + a = a.Deduplicate() + } + + if !b.ordered() { + b = b.Deduplicate() + } + if a[len(a)-1].UnixNano() < b[0].UnixNano() { return append(a, b...) } diff --git a/tsdb/engine/tsm1/encoding.gen.go.tmpl b/tsdb/engine/tsm1/encoding.gen.go.tmpl index 6d5dfc3446b..58355e59097 100644 --- a/tsdb/engine/tsm1/encoding.gen.go.tmpl +++ b/tsdb/engine/tsm1/encoding.gen.go.tmpl @@ -26,6 +26,18 @@ func (a {{.Name}}Values) Size() int { return sz } +func (a {{.Name}}Values) ordered() bool { + if len(a) <= 1 { + return true + } + for i := 1; i < len(a); i++ { + if av, ab := a[i-1].UnixNano(), a[i].UnixNano(); av >= ab { + return false + } + } + return true +} + func (a {{.Name}}Values) assertOrdered() { if len(a) <= 1 { return @@ -95,6 +107,17 @@ func (a {{.Name}}Values) Merge(b {{.Name}}Values) {{.Name}}Values { return a } + // Normally, both a and b should not contain duplicates. Due to a bug in older versions, it's + // possible stored blocks might contain duplicate values. Remove them if they exists before + // merging. + if !a.ordered() { + a = a.Deduplicate() + } + + if !b.ordered() { + b = b.Deduplicate() + } + if a[len(a)-1].UnixNano() < b[0].UnixNano() { return append(a, b...) } diff --git a/tsdb/engine/tsm1/encoding_test.go b/tsdb/engine/tsm1/encoding_test.go index 3c63b9bf59a..e1809224f98 100644 --- a/tsdb/engine/tsm1/encoding_test.go +++ b/tsdb/engine/tsm1/encoding_test.go @@ -285,6 +285,40 @@ func TestValues_MergeFloat(t *testing.T) { tsm1.NewValue(2, 2.1), }, }, + { + a: []tsm1.Value{ + tsm1.NewValue(0, 0.0), + tsm1.NewValue(1, 1.1), + tsm1.NewValue(2, 2.1), + }, + b: []tsm1.Value{ + tsm1.NewValue(2, 2.2), + tsm1.NewValue(2, 2.2), // duplicate data + }, + exp: []tsm1.Value{ + tsm1.NewValue(0, 0.0), + tsm1.NewValue(1, 1.1), + tsm1.NewValue(2, 2.2), + }, + }, + { + a: []tsm1.Value{ + tsm1.NewValue(0, 0.0), + tsm1.NewValue(1, 1.1), + tsm1.NewValue(1, 1.1), // duplicate data + tsm1.NewValue(2, 2.1), + }, + b: []tsm1.Value{ + tsm1.NewValue(2, 2.2), + tsm1.NewValue(2, 2.2), // duplicate data + }, + exp: []tsm1.Value{ + tsm1.NewValue(0, 0.0), + tsm1.NewValue(1, 1.1), + tsm1.NewValue(2, 2.2), + }, + }, + { a: []tsm1.Value{ tsm1.NewValue(1, 1.1), @@ -454,7 +488,6 @@ func TestValues_MergeFloat(t *testing.T) { for i, test := range tests { got := tsm1.Values(test.a).Merge(test.b) - spew.Dump(got) if exp, got := len(test.exp), len(got); exp != got { t.Fatalf("test(%d): value length mismatch: exp %v, got %v", i, exp, got)