feat(erasure_coding): introduce erasure coding for PoV Distributor (#…

…3281) Co-authored-by: Eclésio Junior <eclesiomelo.1@gmail.com> Co-authored-by: Edward Mack <emack@pop-os.localdomain>
ChainSafe · Jul 12, 2023 · 467af59 · 467af59
1 parent 2f4b165
commit 467af59
Show file tree

Hide file tree

Showing 4 changed files with 205 additions and 0 deletions.
diff --git a/go.mod b/go.mod
@@ -26,6 +26,7 @@ require (
 	github.com/ipfs/go-ds-badger2 v0.1.3
 	github.com/jpillora/ipfilter v1.2.9
 	github.com/klauspost/compress v1.16.5
+	github.com/klauspost/reedsolomon v1.11.8
 	github.com/libp2p/go-libp2p v0.27.7
 	github.com/libp2p/go-libp2p-kad-dht v0.24.2
 	github.com/minio/sha256-simd v1.0.1

diff --git a/go.sum b/go.sum
@@ -429,6 +429,8 @@ github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/d
 github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
 github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/reedsolomon v1.11.8 h1:s8RpUW5TK4hjr+djiOpbZJB4ksx+TdYbRH7vHQpwPOY=
+github.com/klauspost/reedsolomon v1.11.8/go.mod h1:4bXRN+cVzMdml6ti7qLouuYi32KHJ5MGv0Qd8a47h6A=
 github.com/koron/go-ssdp v0.0.0-20191105050749-2e1c40ed0b5d/go.mod h1:5Ky9EC2xfoUKUor0Hjgi2BJhCSXJfMOFlmyYrVKGQMk=
 github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0=
 github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk=

diff --git a/lib/erasure/erasure.go b/lib/erasure/erasure.go
@@ -0,0 +1,70 @@
+// Copyright 2021 ChainSafe Systems (ON)
+// SPDX-License-Identifier: LGPL-3.0-only
+
+package erasure
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+
+	"github.com/klauspost/reedsolomon"
+)
+
+// ErrNotEnoughValidators cannot encode something for zero or one validator
+var ErrNotEnoughValidators = errors.New("expected at least 2 validators")
+
+// ObtainChunks obtains erasure-coded chunks, divides data into number of validatorsQty chunks and
+// creates parity chunks for reconstruction
+func ObtainChunks(validatorsQty int, data []byte) ([][]byte, error) {
+	recoveryThres, err := recoveryThreshold(validatorsQty)
+	if err != nil {
+		return nil, err
+	}
+	enc, err := reedsolomon.New(validatorsQty, recoveryThres)
+	if err != nil {
+		return nil, fmt.Errorf("creating new reed solomon failed: %w", err)
+	}
+	shards, err := enc.Split(data)
+	if err != nil {
+		return nil, err
+	}
+	err = enc.Encode(shards)
+	if err != nil {
+		return nil, err
+	}
+
+	return shards, nil
+}
+
+// Reconstruct the missing data from a set of chunks
+func Reconstruct(validatorsQty, originalDataLen int, chunks [][]byte) ([]byte, error) {
+	recoveryThres, err := recoveryThreshold(validatorsQty)
+	if err != nil {
+		return nil, err
+	}
+
+	enc, err := reedsolomon.New(validatorsQty, recoveryThres)
+	if err != nil {
+		return nil, err
+	}
+	err = enc.Reconstruct(chunks)
+	if err != nil {
+		return nil, err
+	}
+	buf := new(bytes.Buffer)
+	err = enc.Join(buf, chunks, originalDataLen)
+	return buf.Bytes(), err
+}
+
+// recoveryThreshold gives the max number of shards/chunks that we can afford to lose and still construct
+// the full initial data.  Total number of chunks will be validatorQty + recoveryThreshold
+func recoveryThreshold(validators int) (int, error) {
+	if validators <= 1 {
+		return 0, ErrNotEnoughValidators
+	}
+
+	needed := (validators - 1) / 3
+
+	return needed + 1, nil
+}
diff --git a/lib/erasure/erasure_test.go b/lib/erasure/erasure_test.go
@@ -0,0 +1,132 @@
+// Copyright 2021 ChainSafe Systems (ON)
+// SPDX-License-Identifier: LGPL-3.0-only
+
+package erasure
+
+import (
+	"testing"
+
+	"github.com/klauspost/reedsolomon"
+	"github.com/stretchr/testify/assert"
+)
+
+var testData = []byte("this is a test of the erasure coding")
+var expectedChunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {97, 32, 116, 101}, {115, 116, 32, 111},
+	{102, 32, 116, 104}, {101, 32, 101, 114}, {97, 115, 117, 114}, {101, 32, 99, 111}, {100, 105, 110, 103},
+	{0, 0, 0, 0}, {133, 189, 154, 178}, {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}
+
+// erasure data missing chunks
+var missing2Chunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {}, {115, 116, 32, 111},
+	{102, 32, 116, 104}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103},
+	{0, 0, 0, 0}, {133, 189, 154, 178}, {88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}
+var missing3Chunks = [][]byte{{116, 104, 105, 115}, {32, 105, 115, 32}, {}, {115, 116, 32, 111},
+	{}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, {0, 0, 0, 0}, {133, 189, 154, 178},
+	{88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}
+var missing5Chunks = [][]byte{{}, {}, {}, {115, 116, 32, 111},
+	{}, {101, 32, 101, 114}, {}, {101, 32, 99, 111}, {100, 105, 110, 103}, {0, 0, 0, 0}, {133, 189, 154, 178},
+	{88, 245, 245, 220}, {59, 208, 165, 70}, {127, 213, 208, 179}}
+
+func TestObtainChunks(t *testing.T) {
+	type args struct {
+		validatorsQty int
+		data          []byte
+	}
+	tests := map[string]struct {
+		args          args
+		expectedValue [][]byte
+		expectedError error
+	}{
+		"happy_path": {
+			args: args{
+				validatorsQty: 10,
+				data:          testData,
+			},
+			expectedValue: expectedChunks,
+		},
+		"nil_data": {
+			args: args{
+				validatorsQty: 10,
+				data:          nil,
+			},
+			expectedError: reedsolomon.ErrShortData,
+		},
+		"not_enough_validators": {
+			args: args{
+				validatorsQty: 1,
+				data:          testData,
+			},
+			expectedError: ErrNotEnoughValidators,
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			got, err := ObtainChunks(tt.args.validatorsQty, tt.args.data)
+			expectedThreshold, _ := recoveryThreshold(tt.args.validatorsQty)
+			if tt.expectedError != nil {
+				assert.EqualError(t, err, tt.expectedError.Error())
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.args.validatorsQty+expectedThreshold, len(got))
+			}
+			assert.Equal(t, tt.expectedValue, got)
+		})
+	}
+}
+
+func TestReconstruct(t *testing.T) {
+	type args struct {
+		validatorsQty int
+		chunks        [][]byte
+	}
+	tests := map[string]struct {
+		args
+		expectedData   []byte
+		expectedChunks [][]byte
+		expectedError  error
+	}{
+		"missing_2_chunks": {
+			args: args{
+				validatorsQty: 10,
+				chunks:        missing2Chunks,
+			},
+			expectedData:   testData,
+			expectedChunks: expectedChunks,
+		},
+		"missing_2_chunks,_validator_qty_3": {
+			args: args{
+				validatorsQty: 3,
+				chunks:        missing2Chunks,
+			},
+			expectedError:  reedsolomon.ErrTooFewShards,
+			expectedChunks: expectedChunks,
+		},
+		"missing_3_chunks": {
+			args: args{
+				validatorsQty: 10,
+				chunks:        missing3Chunks,
+			},
+			expectedData:   testData,
+			expectedChunks: expectedChunks,
+		},
+		"missing_5_chunks": {
+			args: args{
+				validatorsQty: 10,
+				chunks:        missing5Chunks,
+			},
+			expectedChunks: missing5Chunks,
+			expectedError:  reedsolomon.ErrTooFewShards,
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			data, err := Reconstruct(tt.args.validatorsQty, len(testData), tt.args.chunks)
+			if tt.expectedError != nil {
+				assert.EqualError(t, err, tt.expectedError.Error())
+			} else {
+				assert.NoError(t, err)
+			}
+			assert.Equal(t, tt.expectedData, data)
+			assert.Equal(t, tt.expectedChunks, tt.args.chunks)
+		})
+	}
+}