From b933ef1add0b9ebb756b52a03ba5017b68c486e4 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Mon, 15 Aug 2022 14:27:17 +0300 Subject: [PATCH] Implement jerasure algorithm of matrix generation for interoperability --- options.go | 17 +++++++++- reedsolomon.go | 83 +++++++++++++++++++++++++++++++++++++++++++++ reedsolomon_test.go | 29 ++++++++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) diff --git a/options.go b/options.go index 0a33397e..ec79ec76 100644 --- a/options.go +++ b/options.go @@ -16,6 +16,7 @@ type options struct { perRound int useAVX512, useAVX2, useSSSE3, useSSE2 bool + useJerasureMatrix bool usePAR1Matrix bool useCauchy bool fastOneParity bool @@ -163,12 +164,25 @@ func WithAVX512(enabled bool) Option { } } +// WithJerasureMatrix causes the encoder to build the Reed-Solomon-Vandermonde +// matrix in the same way as done by the Jerasure library. +// The first row and column of the coding matrix only contains 1's in this method +// so the first parity chunk is always equal to XOR of all data chunks. +func WithJerasureMatrix() Option { + return func(o *options) { + o.useJerasureMatrix = true + o.usePAR1Matrix = false + o.useCauchy = false + } +} + // WithPAR1Matrix causes the encoder to build the matrix how PARv1 // does. Note that the method they use is buggy, and may lead to cases // where recovery is impossible, even if there are enough parity // shards. func WithPAR1Matrix() Option { return func(o *options) { + o.useJerasureMatrix = false o.usePAR1Matrix = true o.useCauchy = false } @@ -180,8 +194,9 @@ func WithPAR1Matrix() Option { // but will result in slightly faster start-up time. func WithCauchyMatrix() Option { return func(o *options) { - o.useCauchy = true + o.useJerasureMatrix = false o.usePAR1Matrix = false + o.useCauchy = true } } diff --git a/reedsolomon.go b/reedsolomon.go index 3c8e03ac..4f6afb67 100644 --- a/reedsolomon.go +++ b/reedsolomon.go @@ -191,6 +191,87 @@ func buildMatrix(dataShards, totalShards int) (matrix, error) { return vm.Multiply(topInv) } +// buildMatrixJerasure creates the same encoding matrix as Jerasure library +// +// The top square of the matrix is guaranteed to be an identity +// matrix, which means that the data shards are unchanged after +// encoding. +func buildMatrixJerasure(dataShards, totalShards int) (matrix, error) { + // Start with a Vandermonde matrix. This matrix would work, + // in theory, but doesn't have the property that the data + // shards are unchanged after encoding. + vm, err := vandermonde(totalShards, dataShards) + if err != nil { + return nil, err + } + + // Jerasure does this: + // first row is always 100..00 + vm[0][0] = 1 + for i := 1; i < dataShards; i++ { + vm[0][i] = 0 + } + // last row is always 000..01 + for i := 0; i < dataShards-1; i++ { + vm[totalShards-1][i] = 0 + } + vm[totalShards-1][dataShards-1] = 1 + + for i := 0; i < dataShards; i++ { + // Find the row where i'th col is not 0 + r := i + for ; r < totalShards && vm[r][i] == 0; r++ { + } + if r != i { + // Swap it with i'th row if not already + t := vm[r] + vm[r] = vm[i] + vm[i] = t + } + // Multiply by the inverted matrix (same as vm.Multiply(vm[0:dataShards].Invert())) + if vm[i][i] != 1 { + // Make vm[i][i] = 1 by dividing the column by vm[i][i] + tmp := galDivide(1, vm[i][i]) + for j := 0; j < totalShards; j++ { + vm[j][i] = galMultiply(vm[j][i], tmp) + } + } + for j := 0; j < dataShards; j++ { + // Make vm[i][j] = 0 where j != i by adding vm[i][j]*vm[.][i] to each column + tmp := vm[i][j] + if j != i && tmp != 0 { + for r := 0; r < totalShards; r++ { + vm[r][j] = galAdd(vm[r][j], galMultiply(tmp, vm[r][i])) + } + } + } + } + + // Make vm[dataShards] row all ones - divide each column j by vm[dataShards][j] + for j := 0; j < dataShards; j++ { + tmp := vm[dataShards][j] + if tmp != 1 { + tmp = galDivide(1, tmp) + for i := dataShards; i < totalShards; i++ { + vm[i][j] = galMultiply(vm[i][j], tmp) + } + } + } + + // Make vm[dataShards...totalShards-1][0] column all ones - divide each row + for i := dataShards + 1; i < totalShards; i++ { + tmp := vm[i][0] + if tmp != 1 { + tmp = galDivide(1, tmp) + for j := 0; j < dataShards; j++ { + vm[i][j] = galMultiply(vm[i][j], tmp) + } + } + } + + return vm, nil +} + // buildMatrixPAR1 creates the matrix to use for encoding according to // the PARv1 spec, given the number of data shards and the number of // total shards. Note that the method they use is buggy, and may lead @@ -323,6 +404,8 @@ func New(dataShards, parityShards int, opts ...Option) (Encoder, error) { r.m, err = buildMatrixCauchy(dataShards, r.Shards) case r.o.usePAR1Matrix: r.m, err = buildMatrixPAR1(dataShards, r.Shards) + case r.o.useJerasureMatrix: + r.m, err = buildMatrixJerasure(dataShards, r.Shards) default: r.m, err = buildMatrix(dataShards, r.Shards) } diff --git a/reedsolomon_test.go b/reedsolomon_test.go index 60049853..57cb0bde 100644 --- a/reedsolomon_test.go +++ b/reedsolomon_test.go @@ -110,6 +110,35 @@ func findSingularSubMatrix(m matrix) (matrix, error) { return nil, nil } +func TestBuildMatrixJerasure(t *testing.T) { + totalShards := 12 + dataShards := 8 + m, err := buildMatrixJerasure(dataShards, totalShards) + if err != nil { + t.Fatal(err) + } + refMatrix := matrix{ + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 55, 39, 73, 84, 181, 225, 217}, + {1, 39, 217, 161, 92, 60, 172, 90}, + {1, 172, 70, 235, 143, 34, 200, 101}, + } + for i := 0; i < 8; i++ { + for j := 0; j < 8; j++ { + if i != j && m[i][j] != 0 || i == j && m[i][j] != 1 { + t.Fatal("Top part of the matrix is not identity") + } + } + } + for i := 0; i < 4; i++ { + for j := 0; j < 8; j++ { + if m[8+i][j] != refMatrix[i][j] { + t.Fatal("Coding matrix for EC 8+4 differs from Jerasure") + } + } + } +} + func TestBuildMatrixPAR1Singular(t *testing.T) { totalShards := 8 dataShards := 4