From 5cfd062e4b91a5a9c4bed5d76ffe64f1ec3c3954 Mon Sep 17 00:00:00 2001 From: Deaa Sarsour Date: Sat, 27 Jan 2024 13:06:04 +0400 Subject: [PATCH 1/2] feat : refactor mock filesystem + another parts feat : create sstable builder --- dbms/dbms_test.go | 4 +- dbms/memtable.go | 2 +- dbms/memtable_test.go | 4 +- memtable/memtable.go | 30 ++++- .../memtable_test.go} | 45 +------ sstable/builder.go | 118 ++++++++++++++++++ sstable/sstable.go | 16 +++ sstable/sstable_test.go | 29 +++++ storage/metadata_test.go | 8 +- test/data/memtable/basic_1.log | 10 +- test/data/memtable/memtable_to_sstable_1.log | 5 + test/data/sstable/sstable_1.sst | 7 ++ test/util/mockfilesystem/builder.go | 36 ++++++ .../util/mockfilesystem}/directory_mock.go | 9 +- .../util/mockfilesystem}/file_mock.go | 13 +- .../mockfilesystem/testutilfilesystem_test.go | 10 +- test/util/mockmemtable/builder.go | 32 +++++ test/util/{ => testdatafile}/testdata.go | 4 +- util/keyvalue.go | 4 +- 19 files changed, 298 insertions(+), 88 deletions(-) rename memtable/{memtable_rw_test.go => test/memtable_test.go} (51%) create mode 100644 sstable/builder.go create mode 100644 sstable/sstable.go create mode 100644 sstable/sstable_test.go create mode 100644 test/data/memtable/memtable_to_sstable_1.log create mode 100644 test/data/sstable/sstable_1.sst create mode 100644 test/util/mockfilesystem/builder.go rename {filesystem/mock => test/util/mockfilesystem}/directory_mock.go (90%) rename {filesystem/mock => test/util/mockfilesystem}/file_mock.go (54%) rename filesystem/mock/filesystem_mock_test.go => test/util/mockfilesystem/testutilfilesystem_test.go (85%) create mode 100644 test/util/mockmemtable/builder.go rename test/util/{ => testdatafile}/testdata.go (77%) diff --git a/dbms/dbms_test.go b/dbms/dbms_test.go index 1658b7b..e54f7d1 100644 --- a/dbms/dbms_test.go +++ b/dbms/dbms_test.go @@ -1,9 +1,9 @@ package dbms import ( - filesystem "sstable/filesystem" - mockfilesystem "sstable/filesystem/mock" + "sstable/filesystem" "sstable/storage" + "sstable/test/util/mockfilesystem" ) func createDummyDbmsWithDirectory(rootDirectory filesystem.DirectoryOperation, metadata *DatabaseMetadata) *DatabaseManagementSystem { diff --git a/dbms/memtable.go b/dbms/memtable.go index de33e97..5ad3457 100644 --- a/dbms/memtable.go +++ b/dbms/memtable.go @@ -13,7 +13,7 @@ func generateMemtableName() string { func (dbms *DatabaseManagementSystem) loadMemtableFromFile(memtableFile filesystem.FileOperation) error { memtable := memtable.NewMemoryTable(memtableFile) if err := memtable.LoadMemoryTable(); err == nil { - dbms.memoryTable.Store(&memtable) + dbms.memoryTable.Store(memtable) return nil } else { return err diff --git a/dbms/memtable_test.go b/dbms/memtable_test.go index c93d2fc..73a423f 100644 --- a/dbms/memtable_test.go +++ b/dbms/memtable_test.go @@ -1,7 +1,7 @@ package dbms import ( - filesystem "sstable/filesystem/mock" + "sstable/test/util/mockfilesystem" "testing" "github.com/stretchr/testify/assert" @@ -22,7 +22,7 @@ func TestMemtableEmpty(t *testing.T) { func TestMemtableLoad(t *testing.T) { //arrange - rootDirectory := filesystem.NewDummyDirectory() + rootDirectory := mockfilesystem.NewDummyDirectory() key, value := "k", "v" //act diff --git a/memtable/memtable.go b/memtable/memtable.go index 462e42c..ab2c306 100644 --- a/memtable/memtable.go +++ b/memtable/memtable.go @@ -7,14 +7,26 @@ import ( "strings" ) +type MemoryTableIO interface { + Read(key string) any + Write(key string, value any) error +} + +type MemoryTableLowLevel interface { + GetRecords() map[string]any + IsLoaded() bool + LoadMemoryTable() error +} + type MemoryTable struct { - file filesystem.FileOperation - records map[string]any + file filesystem.FileOperation + records map[string]any + isLoaded bool } -func NewMemoryTable(file filesystem.FileOperation) MemoryTable { +func NewMemoryTable(file filesystem.FileOperation) *MemoryTable { records := make(map[string]any) - return MemoryTable{file: file, records: records} + return &MemoryTable{file: file, records: records} } func (memtable *MemoryTable) Read(key string) any { @@ -55,9 +67,18 @@ func (memtable *MemoryTable) LoadMemoryTable() error { content := string(bytes) memtable.enrichRecordsFromContent(content) + memtable.isLoaded = true return nil } +func (memtable *MemoryTable) IsLoaded() bool { + return memtable.isLoaded +} + +func (memtable *MemoryTable) GetRecords() map[string]any { + return memtable.records +} + func (memtable *MemoryTable) enrichRecordsFromContent(content string) { lines := strings.Split(content, "\n") for _, line := range lines { @@ -67,5 +88,4 @@ func (memtable *MemoryTable) enrichRecordsFromContent(content string) { memtable.records[keyValue.Key] = keyValue.Value } } - } diff --git a/memtable/memtable_rw_test.go b/memtable/test/memtable_test.go similarity index 51% rename from memtable/memtable_rw_test.go rename to memtable/test/memtable_test.go index f8b698a..9276be4 100644 --- a/memtable/memtable_rw_test.go +++ b/memtable/test/memtable_test.go @@ -1,11 +1,9 @@ -package memtable +package testmemtable import ( "encoding/json" - "path" - filesystem "sstable/filesystem" - mockfilesystem "sstable/filesystem/mock" + "sstable/test/util/mockmemtable" "sstable/util" "strings" "testing" @@ -13,39 +11,8 @@ import ( "github.com/stretchr/testify/assert" ) -const MEMTABLE_TEST_DATA_FOLDER = "memtable" const BASIC_TEST_DATA = "basic_1.log" -func createDummyFile(dataFileName string) filesystem.FileOperation { - fullPath := path.Join(MEMTABLE_TEST_DATA_FOLDER, dataFileName) - var fileOperation filesystem.FileOperation = mockfilesystem.NewDummyFileFromAnotherFile(fullPath) - return fileOperation -} - -func createEmptyFile() filesystem.FileOperation { - var fileOperation filesystem.FileOperation = mockfilesystem.NewDummyFile("") - return fileOperation -} - -func createDummyMemoryTable(dataFileName string) MemoryTable { - dummyFile := createDummyFile(dataFileName) - memtableInstance := NewMemoryTable(dummyFile) - return memtableInstance -} - -func createReadyBasicDummyMemoryTable() MemoryTable { - memoryTable := createDummyMemoryTable(BASIC_TEST_DATA) - memoryTable.LoadMemoryTable() - return memoryTable -} - -func createReadyEmptyDummyMemoryTable() (MemoryTable, filesystem.FileOperation) { - fileOperation := createEmptyFile() - memoryTable := NewMemoryTable(fileOperation) - memoryTable.LoadMemoryTable() - return memoryTable, fileOperation -} - func getKeyValueJson(key string, value any) string { keyValue := util.KeyValueObject{Key: key, Value: value} @@ -63,7 +30,7 @@ func TestMemtableReadInt(t *testing.T) { expectValue := 14 //act - sut := createReadyBasicDummyMemoryTable() + sut := mockmemtable.NewReadyBasicMemtable() result := sut.Read("score#deea") //assert @@ -77,7 +44,7 @@ func TestMemtableReadString(t *testing.T) { expectValue := "deeax99" //act - sut := createReadyBasicDummyMemoryTable() + sut := mockmemtable.NewReadyBasicMemtable() result := sut.Read("nickname#deea") //assert @@ -95,7 +62,7 @@ func TestMemtableReadObject(t *testing.T) { } //act - sut := createReadyBasicDummyMemoryTable() + sut := mockmemtable.NewReadyBasicMemtable() result := sut.Read("profile#deea") //assert @@ -104,7 +71,7 @@ func TestMemtableReadObject(t *testing.T) { func TestMemtableWrite(t *testing.T) { //arrange - memoryTable, dummyFile := createReadyEmptyDummyMemoryTable() + memoryTable, dummyFile := mockmemtable.NewReadyEmptyMemtable() key := "user_score#deeax99" value := 10.0 expectedJson := getKeyValueJson(key, value) diff --git a/sstable/builder.go b/sstable/builder.go new file mode 100644 index 0000000..a27ef1b --- /dev/null +++ b/sstable/builder.go @@ -0,0 +1,118 @@ +package sstable + +import ( + "encoding/json" + "sort" + "sstable/filesystem" + "sstable/memtable" +) + +const SEALED = "SEALED!!!" + +func NewSSTable(file filesystem.FileOperation) *SSTable { + return &SSTable{file: file} +} + +func FlushSSTable(memtable memtable.MemoryTableLowLevel, file filesystem.FileOperation) (*SSTable, error) { + + if !memtable.IsLoaded() { + if err := memtable.LoadMemoryTable(); err != nil { + return nil, err + } + } + + records := memtable.GetRecords() + + if err := flushSSTableMetadata(records, file); err != nil { + return nil, err + } + + if err := flushMemtableRecords(records, file); err != nil { + return nil, err + } + + if err := sealSSTableFile(file); err != nil { + return nil, err + } + + return NewSSTable(file), nil +} + +func sealSSTableFile(file filesystem.FileOperation) error { + return file.AppendBytes([]byte(SEALED)) +} + +func flushSSTableMetadata(records map[string]any, file filesystem.FileOperation) error { + metadata := sstableMetadata{ + RowCount: len(records), + } + if serialized, err := json.Marshal(metadata); err == nil { + if err := file.AppendBytes(serialized); err != nil { + return err + } + if err := file.AppendBytes([]byte("\n")); err != nil { + return err + } + return nil + } else { + return nil + } +} + +func flushMemtableRecords(records map[string]any, file filesystem.FileOperation) error { + sortedRecords := getRecordsSorted(records) + + if serializedRecords, err := serializeSSTableRecords(sortedRecords); err == nil { + if err := flushSerializedRecords(serializedRecords, file); err != nil { + return err + } + } else { + return err + } + + return nil +} + +func flushSerializedRecords(records [][]byte, file filesystem.FileOperation) error { + for _, serializedRecord := range records { + if err := file.AppendBytes(serializedRecord); err != nil { + return err + } + if err := file.AppendBytes([]byte("\n")); err != nil { + return err + } + } + return nil +} + +func getRecordsSorted(records map[string]any) []*sstableRecord { + results := make([]*sstableRecord, len(records)) + + index := 0 + for k, v := range records { + results[index] = &sstableRecord{ + Key: k, + Value: v, + } + index++ + } + + sort.Slice(results, func(i, j int) bool { + return results[i].Key < results[j].Key + }) + + return results +} + +func serializeSSTableRecords(records []*sstableRecord) ([][]byte, error) { + results := make([][]byte, len(records)) + for i, record := range records { + if serialized, err := json.Marshal(record); err == nil { + results[i] = serialized + } else { + return nil, err + } + } + + return results, nil +} diff --git a/sstable/sstable.go b/sstable/sstable.go new file mode 100644 index 0000000..1c97d81 --- /dev/null +++ b/sstable/sstable.go @@ -0,0 +1,16 @@ +package sstable + +import "sstable/filesystem" + +type sstableMetadata struct { + RowCount int `json:"row_count"` +} + +type sstableRecord struct { + Key string `json:"key"` + Value any `json:"value"` +} + +type SSTable struct { + file filesystem.FileOperation +} diff --git a/sstable/sstable_test.go b/sstable/sstable_test.go new file mode 100644 index 0000000..583e3c0 --- /dev/null +++ b/sstable/sstable_test.go @@ -0,0 +1,29 @@ +package sstable + +import ( + "sstable/test/util/mockfilesystem" + "sstable/test/util/mockmemtable" + "sstable/test/util/testdatafile" + "testing" + + "github.com/stretchr/testify/assert" +) + +const MEMTABLE_FILE_NAME string = "memtable_to_sstable_1.log" + +func TestCreateSSTable(t *testing.T) { + //arrange + expectedContent := testdatafile.ReadTestData("sstable/sstable_1.sst") + mockmemtable.NewReadyMemtable(MEMTABLE_FILE_NAME) + memtable := mockmemtable.NewReadyMemtable(MEMTABLE_FILE_NAME) + sstableFile := mockfilesystem.NewEmptyFile() + + //act + _, err := FlushSSTable(memtable, sstableFile) + bytes, _ := sstableFile.ReadAll() + actualContent := string(bytes) + + //assert + assert.Nil(t, err) + assert.Equal(t, expectedContent, actualContent) +} diff --git a/storage/metadata_test.go b/storage/metadata_test.go index 2434140..66cf998 100644 --- a/storage/metadata_test.go +++ b/storage/metadata_test.go @@ -2,7 +2,7 @@ package storage import ( "encoding/json" - filesystemmock "sstable/filesystem/mock" + "sstable/test/util/mockfilesystem" "testing" "github.com/stretchr/testify/assert" @@ -15,7 +15,7 @@ type metadata struct { func TestMetadata(t *testing.T) { //arrange - rootDirectory := filesystemmock.NewDummyDirectory() + rootDirectory := mockfilesystem.NewDummyDirectory() storageState, _ := NewStorageState(rootDirectory) metadataInstance := metadata{ MemtableName: "test_123", @@ -36,7 +36,7 @@ func TestMetadata(t *testing.T) { func TestMetadataInitializer(t *testing.T) { //arrange - rootDirectory := filesystemmock.NewDummyDirectory() + rootDirectory := mockfilesystem.NewDummyDirectory() storageState, _ := NewStorageState(rootDirectory) metadataInstance := metadata{ MemtableName: "test_123", @@ -57,7 +57,7 @@ func TestMetadataInitializer(t *testing.T) { func TestMetadataEmptry(t *testing.T) { //arrange - rootDirectory := filesystemmock.NewDummyDirectory() + rootDirectory := mockfilesystem.NewDummyDirectory() storageState, _ := NewStorageState(rootDirectory) //act diff --git a/test/data/memtable/basic_1.log b/test/data/memtable/basic_1.log index a6b0b29..303d477 100644 --- a/test/data/memtable/basic_1.log +++ b/test/data/memtable/basic_1.log @@ -1,6 +1,6 @@ -{ "Key" : "score#deea" , "Value" : 0} -{ "Key" : "score#player" , "Value" : 3} -{ "Key" : "score#deea" , "Value" : 14} -{ "Key" : "profile#deea" , "Value" : {"name" : "deea" , "user_id" : 12 , "metadata" : {"last_login" : 7}} } -{ "Key" : "nickname#deea" , "Value" : "deeax99" } +{ "key" : "score#deea" , "value" : 0} +{ "key" : "score#player" , "value" : 3} +{ "key" : "score#deea" , "value" : 14} +{ "key" : "profile#deea" , "value" : {"name" : "deea" , "user_id" : 12 , "metadata" : {"last_login" : 7}} } +{ "key" : "nickname#deea" , "value" : "deeax99" } diff --git a/test/data/memtable/memtable_to_sstable_1.log b/test/data/memtable/memtable_to_sstable_1.log new file mode 100644 index 0000000..541df2f --- /dev/null +++ b/test/data/memtable/memtable_to_sstable_1.log @@ -0,0 +1,5 @@ +{ "key" : "score#c" , "value" : 9} +{ "key" : "score#a" , "value" : 11} +{ "key" : "score#b" , "value" : 6} +{ "key" : "score#e" , "value" : 4} +{ "key" : "score#d" , "value" : 2} \ No newline at end of file diff --git a/test/data/sstable/sstable_1.sst b/test/data/sstable/sstable_1.sst new file mode 100644 index 0000000..90fd747 --- /dev/null +++ b/test/data/sstable/sstable_1.sst @@ -0,0 +1,7 @@ +{"row_count":5} +{"key":"score#a","value":11} +{"key":"score#b","value":6} +{"key":"score#c","value":9} +{"key":"score#d","value":2} +{"key":"score#e","value":4} +SEALED!!! \ No newline at end of file diff --git a/test/util/mockfilesystem/builder.go b/test/util/mockfilesystem/builder.go new file mode 100644 index 0000000..88d6f48 --- /dev/null +++ b/test/util/mockfilesystem/builder.go @@ -0,0 +1,36 @@ +package mockfilesystem + +import ( + "path" + "sstable/filesystem" + "sstable/test/util/testdatafile" +) + +const MEMTABLE_TEST_DATA_FOLDER = "memtable" + +func NewDummyFile(content string) *DummyFile { + return &DummyFile{content: content} +} + +func NewDummyFileFromAnotherFile(filePaths string) *DummyFile { + content := testdatafile.ReadTestData(filePaths) + return &DummyFile{content: content} +} + +func NewDummyDirectory() *DummyDirectory { + return &DummyDirectory{ + subDirectories: make(map[string]*DummyDirectory), + files: make(map[string]filesystem.FileOperation), + } +} + +func NewDummyFileFromMemtableFolder(dataFileName string) filesystem.FileOperation { + fullPath := path.Join(MEMTABLE_TEST_DATA_FOLDER, dataFileName) + var fileOperation filesystem.FileOperation = NewDummyFileFromAnotherFile(fullPath) + return fileOperation +} + +func NewEmptyFile() filesystem.FileOperation { + var fileOperation filesystem.FileOperation = NewDummyFile("") + return fileOperation +} diff --git a/filesystem/mock/directory_mock.go b/test/util/mockfilesystem/directory_mock.go similarity index 90% rename from filesystem/mock/directory_mock.go rename to test/util/mockfilesystem/directory_mock.go index cd665ff..e262272 100644 --- a/filesystem/mock/directory_mock.go +++ b/test/util/mockfilesystem/directory_mock.go @@ -1,4 +1,4 @@ -package filesystem +package mockfilesystem import ( "errors" @@ -10,13 +10,6 @@ type DummyDirectory struct { files map[string]filesystem.FileOperation } -func NewDummyDirectory() *DummyDirectory { - return &DummyDirectory{ - subDirectories: make(map[string]*DummyDirectory), - files: make(map[string]filesystem.FileOperation), - } -} - func (directory DummyDirectory) GetFiles() ([]string, error) { filesName := make([]string, len(directory.files)) diff --git a/filesystem/mock/file_mock.go b/test/util/mockfilesystem/file_mock.go similarity index 54% rename from filesystem/mock/file_mock.go rename to test/util/mockfilesystem/file_mock.go index 870ad3c..f21e3a2 100644 --- a/filesystem/mock/file_mock.go +++ b/test/util/mockfilesystem/file_mock.go @@ -1,20 +1,9 @@ -package filesystem - -import testdata "sstable/test/util" +package mockfilesystem type DummyFile struct { content string } -func NewDummyFile(content string) *DummyFile { - return &DummyFile{content: content} -} - -func NewDummyFileFromAnotherFile(filePaths string) *DummyFile { - content := testdata.ReadTestData(filePaths) - return &DummyFile{content: content} -} - func (file *DummyFile) Open() error { return nil } diff --git a/filesystem/mock/filesystem_mock_test.go b/test/util/mockfilesystem/testutilfilesystem_test.go similarity index 85% rename from filesystem/mock/filesystem_mock_test.go rename to test/util/mockfilesystem/testutilfilesystem_test.go index 45c2bab..31a33df 100644 --- a/filesystem/mock/filesystem_mock_test.go +++ b/test/util/mockfilesystem/testutilfilesystem_test.go @@ -1,8 +1,6 @@ -package filesystem_test +package mockfilesystem import ( - mockfilesystem "sstable/filesystem/mock" - "testing" "github.com/stretchr/testify/assert" @@ -14,7 +12,7 @@ func TestMockBasic(t *testing.T) { file2ExpectedContent := []byte("file 2 :)") //act - root := mockfilesystem.NewDummyDirectory() + root := NewDummyDirectory() root.CreateFile("file1.txt", file1ExpectedContent) root.CreateFile("file2.txt", file2ExpectedContent) @@ -38,7 +36,7 @@ func TestMockDeletion(t *testing.T) { file1ExpectedContent := []byte("file 1 :)") //act - root := mockfilesystem.NewDummyDirectory() + root := NewDummyDirectory() root.CreateFile("file1.txt", file1ExpectedContent) root.DeleteFile("file1.txt") @@ -58,7 +56,7 @@ func TestAppend(t *testing.T) { expectedContent := []byte("Hello World!") //act - file := mockfilesystem.NewDummyFile(string(part1Content)) + file := NewDummyFile(string(part1Content)) file.AppendBytes(part2Content) actualContent, _ := file.ReadAll() diff --git a/test/util/mockmemtable/builder.go b/test/util/mockmemtable/builder.go new file mode 100644 index 0000000..f9309ef --- /dev/null +++ b/test/util/mockmemtable/builder.go @@ -0,0 +1,32 @@ +package mockmemtable + +import ( + "sstable/filesystem" + "sstable/memtable" + "sstable/test/util/mockfilesystem" +) + +const BASIC_TEST_DATA = "basic_1.log" + +func NewMemtable(dataFileName string) *memtable.MemoryTable { + dummyFile := mockfilesystem.NewDummyFileFromMemtableFolder(dataFileName) + memtableInstance := memtable.NewMemoryTable(dummyFile) + return memtableInstance +} + +func NewReadyMemtable(dataFileName string) *memtable.MemoryTable { + memoryTable := NewMemtable(dataFileName) + memoryTable.LoadMemoryTable() + return memoryTable +} + +func NewReadyBasicMemtable() *memtable.MemoryTable { + return NewReadyMemtable(BASIC_TEST_DATA) +} + +func NewReadyEmptyMemtable() (*memtable.MemoryTable, filesystem.FileOperation) { + fileOperation := mockfilesystem.NewEmptyFile() + memoryTable := memtable.NewMemoryTable(fileOperation) + memoryTable.LoadMemoryTable() + return memoryTable, fileOperation +} diff --git a/test/util/testdata.go b/test/util/testdatafile/testdata.go similarity index 77% rename from test/util/testdata.go rename to test/util/testdatafile/testdata.go index c2e9b71..56b4895 100644 --- a/test/util/testdata.go +++ b/test/util/testdatafile/testdata.go @@ -1,4 +1,4 @@ -package testdata +package testdatafile import ( "os" @@ -11,7 +11,7 @@ const TEST_DATA_ROOT = "/test/data" func ReadTestData(filePath string) string { _, b, _, _ := runtime.Caller(0) - basepath := filepath.Dir(filepath.Dir(filepath.Dir(b))) + basepath := filepath.Dir(filepath.Dir(filepath.Dir(filepath.Dir(b)))) fullPath := path.Join(basepath, TEST_DATA_ROOT, filePath) diff --git a/util/keyvalue.go b/util/keyvalue.go index da20a0f..a47ddf3 100644 --- a/util/keyvalue.go +++ b/util/keyvalue.go @@ -1,6 +1,6 @@ package util type KeyValueObject struct { - Key string - Value any + Key string `json:"key"` + Value any `json:"value"` } From c960353620bf0056a9378ee0250c461f3c1f8cea Mon Sep 17 00:00:00 2001 From: Deaa Sarsour Date: Sun, 28 Jan 2024 12:59:50 +0400 Subject: [PATCH 2/2] feat : add sstable reader --- filesystem/file.go | 2 + sstable/builder.go | 51 ++++---- sstable/reader.go | 113 ++++++++++++++++++ sstable/sstable.go | 3 +- sstable/sstable_test.go | 29 ----- sstable/test/sstable_test.go | 51 ++++++++ test/data/memtable/memtable_to_sstable_1.log | 2 +- test/data/sstable/sstable_1.sst | 4 +- test/util/mockfilesystem/builder.go | 14 +-- test/util/mockfilesystem/file_mock.go | 30 ++++- .../mockfilesystem/testutilfilesystem_test.go | 88 ++++++++++++++ test/util/mocksstable/builder.go | 13 ++ test/util/testdatafile/testdata.go | 12 ++ util/slice.go | 8 ++ 14 files changed, 351 insertions(+), 69 deletions(-) create mode 100644 sstable/reader.go delete mode 100644 sstable/sstable_test.go create mode 100644 sstable/test/sstable_test.go create mode 100644 test/util/mocksstable/builder.go diff --git a/filesystem/file.go b/filesystem/file.go index 3be2a02..375dacc 100644 --- a/filesystem/file.go +++ b/filesystem/file.go @@ -5,4 +5,6 @@ type FileOperation interface { Close() error AppendBytes(bytes []byte) error ReadAll() ([]byte, error) + ReadAt(bytes []byte, offset int) (int, error) + Size() (int, error) } diff --git a/sstable/builder.go b/sstable/builder.go index a27ef1b..f7ee138 100644 --- a/sstable/builder.go +++ b/sstable/builder.go @@ -15,19 +15,23 @@ func NewSSTable(file filesystem.FileOperation) *SSTable { func FlushSSTable(memtable memtable.MemoryTableLowLevel, file filesystem.FileOperation) (*SSTable, error) { - if !memtable.IsLoaded() { - if err := memtable.LoadMemoryTable(); err != nil { - return nil, err - } + if err := loadMemtableIfNeeded(memtable); err != nil { + return nil, err } records := memtable.GetRecords() + sortedRecords := getRecordsSorted(records) + serializedRecords, err := serializeSSTableRecords(sortedRecords) + + if err != nil { + return nil, err + } - if err := flushSSTableMetadata(records, file); err != nil { + if err := flushSSTableMetadata(serializedRecords, file); err != nil { return nil, err } - if err := flushMemtableRecords(records, file); err != nil { + if err := flushSerializedRecords(serializedRecords, file); err != nil { return nil, err } @@ -38,14 +42,31 @@ func FlushSSTable(memtable memtable.MemoryTableLowLevel, file filesystem.FileOpe return NewSSTable(file), nil } +func loadMemtableIfNeeded(memtable memtable.MemoryTableLowLevel) error { + if !memtable.IsLoaded() { + if err := memtable.LoadMemoryTable(); err != nil { + return err + } + } + return nil +} + func sealSSTableFile(file filesystem.FileOperation) error { return file.AppendBytes([]byte(SEALED)) } -func flushSSTableMetadata(records map[string]any, file filesystem.FileOperation) error { +func flushSSTableMetadata(records [][]byte, file filesystem.FileOperation) error { + recordsCount := len(records) + metadata := sstableMetadata{ - RowCount: len(records), + RowCount: recordsCount, + RowOffsets: make([]int, recordsCount), + } + + for i := range records { + metadata.RowOffsets[i] = len(records[i]) } + if serialized, err := json.Marshal(metadata); err == nil { if err := file.AppendBytes(serialized); err != nil { return err @@ -59,20 +80,6 @@ func flushSSTableMetadata(records map[string]any, file filesystem.FileOperation) } } -func flushMemtableRecords(records map[string]any, file filesystem.FileOperation) error { - sortedRecords := getRecordsSorted(records) - - if serializedRecords, err := serializeSSTableRecords(sortedRecords); err == nil { - if err := flushSerializedRecords(serializedRecords, file); err != nil { - return err - } - } else { - return err - } - - return nil -} - func flushSerializedRecords(records [][]byte, file filesystem.FileOperation) error { for _, serializedRecord := range records { if err := file.AppendBytes(serializedRecord); err != nil { diff --git a/sstable/reader.go b/sstable/reader.go new file mode 100644 index 0000000..027a53f --- /dev/null +++ b/sstable/reader.go @@ -0,0 +1,113 @@ +package sstable + +import ( + "encoding/json" + "errors" + "io" + "sstable/filesystem" +) + +func GetEndOfLineIndex(arr []byte) int { + for i := range arr { + if arr[i] == '\n' { + return i + } + } + + return -1 +} + +func readReadLine(rowStartOffset int, file filesystem.FileOperation) ([]byte, error) { + + rowContent := make([]byte, 0) + dest := make([]byte, 1024) + + currentOffset := rowStartOffset + for { + receivedBytes, err := file.ReadAt(dest, currentOffset) + + if err != nil && err != io.EOF { + return nil, err + } + + if receivedBytes == 0 { + return nil, errors.New("cant find EOL") + } + eofIndex := GetEndOfLineIndex(dest) + + if eofIndex != -1 { + rowContent = append(rowContent, dest[:eofIndex]...) + break + } else { + rowContent = append(rowContent, dest...) + } + + currentOffset += receivedBytes + } + + return rowContent, nil +} + +func (sstable *SSTable) readMetadata() (*sstableMetadata, int, error) { + if metadataBytes, err := readReadLine(0, sstable.file); err == nil { + var metadata *sstableMetadata + json.Unmarshal(metadataBytes, &metadata) + return metadata, len(metadataBytes) + 1, nil + } else { + return nil, 0, err + } +} + +func getRowsOffset(metadata *sstableMetadata, firstRecordsOffset int) []int { + rawsCount := metadata.RowCount + offsets := make([]int, rawsCount) + currentOffset := firstRecordsOffset + for i := 0; i < rawsCount; i++ { + offsets[i] = currentOffset + currentOffset += metadata.RowOffsets[i] + 1 + } + + return offsets +} + +func (sstable *SSTable) searchFileBinarySearch(key string, metadata *sstableMetadata, firstRecordsOffset int) (any, error) { + rowsOffset := getRowsOffset(metadata, firstRecordsOffset) + file := sstable.file + left := 0 + right := metadata.RowCount - 1 + for right >= left { + mid := (left + right) / 2 + dest := make([]byte, metadata.RowOffsets[mid]) + rowBytesLen, err := file.ReadAt(dest, rowsOffset[mid]) + if err != nil { + return nil, err + } + if rowBytesLen != len(dest) { + return nil, errors.New("row isn't complete") + } + + var row sstableRecord + err = json.Unmarshal(dest, &row) + if err != nil { + return nil, err + } + + if row.Key == key { + return row.Value, nil + } else if row.Key < key { + left = mid + 1 + } else { + right = mid - 1 + } + } + + return nil, nil +} + +func (sstable *SSTable) Read(key string) (any, error) { + if metadata, firstRecordsOffset, err := sstable.readMetadata(); err == nil { + return sstable.searchFileBinarySearch(key, metadata, firstRecordsOffset) + } else { + return nil, err + } +} diff --git a/sstable/sstable.go b/sstable/sstable.go index 1c97d81..7d1b8d3 100644 --- a/sstable/sstable.go +++ b/sstable/sstable.go @@ -3,7 +3,8 @@ package sstable import "sstable/filesystem" type sstableMetadata struct { - RowCount int `json:"row_count"` + RowCount int `json:"row_count"` + RowOffsets []int `json:"row_offsets"` } type sstableRecord struct { diff --git a/sstable/sstable_test.go b/sstable/sstable_test.go deleted file mode 100644 index 583e3c0..0000000 --- a/sstable/sstable_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package sstable - -import ( - "sstable/test/util/mockfilesystem" - "sstable/test/util/mockmemtable" - "sstable/test/util/testdatafile" - "testing" - - "github.com/stretchr/testify/assert" -) - -const MEMTABLE_FILE_NAME string = "memtable_to_sstable_1.log" - -func TestCreateSSTable(t *testing.T) { - //arrange - expectedContent := testdatafile.ReadTestData("sstable/sstable_1.sst") - mockmemtable.NewReadyMemtable(MEMTABLE_FILE_NAME) - memtable := mockmemtable.NewReadyMemtable(MEMTABLE_FILE_NAME) - sstableFile := mockfilesystem.NewEmptyFile() - - //act - _, err := FlushSSTable(memtable, sstableFile) - bytes, _ := sstableFile.ReadAll() - actualContent := string(bytes) - - //assert - assert.Nil(t, err) - assert.Equal(t, expectedContent, actualContent) -} diff --git a/sstable/test/sstable_test.go b/sstable/test/sstable_test.go new file mode 100644 index 0000000..1b6308b --- /dev/null +++ b/sstable/test/sstable_test.go @@ -0,0 +1,51 @@ +package testsstable + +import ( + "sstable/sstable" + "sstable/test/util/mockfilesystem" + "sstable/test/util/mockmemtable" + "sstable/test/util/mocksstable" + "sstable/test/util/testdatafile" + "testing" + + "github.com/stretchr/testify/assert" +) + +const MEMTABLE_FILE_NAME = "memtable_to_sstable_1.log" +const SSTABLE_FILE_NAME = "sstable_1.sst" + +func TestCreateSSTable(t *testing.T) { + //arrange + expectedContent := testdatafile.ReadSSTableData(SSTABLE_FILE_NAME) + memtable := mockmemtable.NewReadyMemtable(MEMTABLE_FILE_NAME) + sstableFile := mockfilesystem.NewEmptyFile() + + //act + _, err := sstable.FlushSSTable(memtable, sstableFile) + bytes, _ := sstableFile.ReadAll() + actualContent := string(bytes) + + //assert + assert.Nil(t, err) + assert.Equal(t, expectedContent, actualContent) +} + +func TestReadSSTable(t *testing.T) { + //arrange + sstable := mocksstable.NewSSTable(SSTABLE_FILE_NAME) + fixtures := map[string]any{ + "score#a": 11.0, + "score#c": 9.0, + "score#e": nil, + "score#f": 4.0, + "score#h": nil, + } + + for key, expectedValue := range fixtures { + //act + actual, err := sstable.Read(key) + //assert + assert.Equal(t, expectedValue, actual) + assert.Nil(t, err) + } +} diff --git a/test/data/memtable/memtable_to_sstable_1.log b/test/data/memtable/memtable_to_sstable_1.log index 541df2f..de869bd 100644 --- a/test/data/memtable/memtable_to_sstable_1.log +++ b/test/data/memtable/memtable_to_sstable_1.log @@ -1,5 +1,5 @@ { "key" : "score#c" , "value" : 9} { "key" : "score#a" , "value" : 11} { "key" : "score#b" , "value" : 6} -{ "key" : "score#e" , "value" : 4} +{ "key" : "score#f" , "value" : 4} { "key" : "score#d" , "value" : 2} \ No newline at end of file diff --git a/test/data/sstable/sstable_1.sst b/test/data/sstable/sstable_1.sst index 90fd747..b3113e7 100644 --- a/test/data/sstable/sstable_1.sst +++ b/test/data/sstable/sstable_1.sst @@ -1,7 +1,7 @@ -{"row_count":5} +{"row_count":5,"row_offsets":[28,27,27,27,27]} {"key":"score#a","value":11} {"key":"score#b","value":6} {"key":"score#c","value":9} {"key":"score#d","value":2} -{"key":"score#e","value":4} +{"key":"score#f","value":4} SEALED!!! \ No newline at end of file diff --git a/test/util/mockfilesystem/builder.go b/test/util/mockfilesystem/builder.go index 88d6f48..319e0ed 100644 --- a/test/util/mockfilesystem/builder.go +++ b/test/util/mockfilesystem/builder.go @@ -1,20 +1,12 @@ package mockfilesystem import ( - "path" "sstable/filesystem" "sstable/test/util/testdatafile" ) -const MEMTABLE_TEST_DATA_FOLDER = "memtable" - func NewDummyFile(content string) *DummyFile { - return &DummyFile{content: content} -} - -func NewDummyFileFromAnotherFile(filePaths string) *DummyFile { - content := testdatafile.ReadTestData(filePaths) - return &DummyFile{content: content} + return &DummyFile{content: []byte(content)} } func NewDummyDirectory() *DummyDirectory { @@ -25,8 +17,8 @@ func NewDummyDirectory() *DummyDirectory { } func NewDummyFileFromMemtableFolder(dataFileName string) filesystem.FileOperation { - fullPath := path.Join(MEMTABLE_TEST_DATA_FOLDER, dataFileName) - var fileOperation filesystem.FileOperation = NewDummyFileFromAnotherFile(fullPath) + content := testdatafile.ReadMemtableData(dataFileName) + var fileOperation filesystem.FileOperation = NewDummyFile(content) return fileOperation } diff --git a/test/util/mockfilesystem/file_mock.go b/test/util/mockfilesystem/file_mock.go index f21e3a2..1c642c9 100644 --- a/test/util/mockfilesystem/file_mock.go +++ b/test/util/mockfilesystem/file_mock.go @@ -1,7 +1,12 @@ package mockfilesystem +import ( + "io" + "sstable/util" +) + type DummyFile struct { - content string + content []byte } func (file *DummyFile) Open() error { @@ -13,10 +18,29 @@ func (file *DummyFile) Close() error { } func (file *DummyFile) AppendBytes(bytes []byte) error { - file.content = file.content + string(bytes) + file.content = append(file.content, bytes...) return nil } func (file *DummyFile) ReadAll() ([]byte, error) { - return []byte(file.content), nil + return file.content, nil +} + +func (file *DummyFile) ReadAt(dest []byte, offset int) (int, error) { + destLen := len(dest) + fileLen := len(file.content) + + if destLen+offset-1 < fileLen { + util.DeepCopy[byte](file.content, dest, offset, offset+destLen-1) + return destLen, nil + } else if fileLen > offset { + util.DeepCopy[byte](file.content, dest, offset, fileLen-1) + return fileLen - offset, io.EOF + } else { + return 0, io.EOF + } +} + +func (file *DummyFile) Size() (int, error) { + return len(file.content), nil } diff --git a/test/util/mockfilesystem/testutilfilesystem_test.go b/test/util/mockfilesystem/testutilfilesystem_test.go index 31a33df..543e8e4 100644 --- a/test/util/mockfilesystem/testutilfilesystem_test.go +++ b/test/util/mockfilesystem/testutilfilesystem_test.go @@ -63,3 +63,91 @@ func TestAppend(t *testing.T) { //assert assert.Equal(t, expectedContent, actualContent) } + +func TestReadAtByte(t *testing.T) { + //arrange + file := DummyFile{content: []byte{0x01, 0x77, 0xFF}} + expectedDest := []byte{0x01} + + //act + dest := make([]byte, 1) + n, err := file.ReadAt(dest, 0) + + //assert + assert.Equal(t, 1, n) + assert.Equal(t, expectedDest, dest) + assert.Nil(t, err) +} + +func TestReadAtAll(t *testing.T) { + //arrange + file := DummyFile{content: []byte{0x01, 0x77, 0xFF}} + expectedDest := []byte{0x01, 0x77, 0xFF} + + //act + dest := make([]byte, 3) + n, err := file.ReadAt(dest, 0) + + //assert + assert.Equal(t, 3, n) + assert.Equal(t, expectedDest, dest) + assert.Nil(t, err) +} + +func TestReadAtHalf(t *testing.T) { + //arrange + file := DummyFile{content: []byte{0x01, 0x77, 0xFF}} + expectedDest := []byte{0x77, 0xFF} + + //act + dest := make([]byte, 2) + n, err := file.ReadAt(dest, 1) + + //assert + assert.Equal(t, 2, n) + assert.Equal(t, expectedDest, dest) + assert.Nil(t, err) +} + +func TestReadAtOverflow(t *testing.T) { + //arrange + file := DummyFile{content: []byte{0x01, 0x77, 0xFF}} + + //act + dest := make([]byte, 2) + n, err := file.ReadAt(dest, 10) + + //assert + assert.Equal(t, 0, n) + assert.NotNil(t, err) +} + +func TestReadAtEOF(t *testing.T) { + //arrange + file := DummyFile{content: []byte{0x01, 0x77, 0xFF}} + expectedDest := []byte{0x01, 0x77, 0xFF, 0x0, 0x0} + + //act + dest := make([]byte, 5) + n, err := file.ReadAt(dest, 0) + + //assert + assert.Equal(t, 3, n) + assert.Equal(t, expectedDest, dest) + assert.NotNil(t, err) +} + +func TestReadAtLastByte(t *testing.T) { + //arrange + file := DummyFile{content: []byte{0x01, 0x77, 0xFF}} + expectedDest := []byte{0xFF} + + //act + dest := make([]byte, 1) + n, err := file.ReadAt(dest, 2) + + //assert + assert.Equal(t, 1, n) + assert.Equal(t, expectedDest, dest) + assert.Nil(t, err) +} diff --git a/test/util/mocksstable/builder.go b/test/util/mocksstable/builder.go new file mode 100644 index 0000000..412dffa --- /dev/null +++ b/test/util/mocksstable/builder.go @@ -0,0 +1,13 @@ +package mocksstable + +import ( + "sstable/sstable" + "sstable/test/util/mockfilesystem" + "sstable/test/util/testdatafile" +) + +func NewSSTable(fileName string) *sstable.SSTable { + content := testdatafile.ReadSSTableData(fileName) + file := mockfilesystem.NewDummyFile(content) + return sstable.NewSSTable(file) +} diff --git a/test/util/testdatafile/testdata.go b/test/util/testdatafile/testdata.go index 56b4895..d147cdc 100644 --- a/test/util/testdatafile/testdata.go +++ b/test/util/testdatafile/testdata.go @@ -8,6 +8,8 @@ import ( ) const TEST_DATA_ROOT = "/test/data" +const MEMTABLE_TEST_DATA_FOLDER = "memtable" +const SSTABLE_TEST_DATA_FOLDER = "sstable" func ReadTestData(filePath string) string { _, b, _, _ := runtime.Caller(0) @@ -23,3 +25,13 @@ func ReadTestData(filePath string) string { return string(bytes) } + +func ReadSSTableData(fileName string) string { + fullPath := path.Join(SSTABLE_TEST_DATA_FOLDER, fileName) + return ReadTestData(fullPath) +} + +func ReadMemtableData(fileName string) string { + fullPath := path.Join(MEMTABLE_TEST_DATA_FOLDER, fileName) + return ReadTestData(fullPath) +} diff --git a/util/slice.go b/util/slice.go index b5fad9f..f2052fc 100644 --- a/util/slice.go +++ b/util/slice.go @@ -17,3 +17,11 @@ func IsContains[T comparable](arr []T, match *T) bool { } return false } + +func DeepCopy[T any](src []T, dest []T, startIndex, endIndex int) []T { + len := endIndex - startIndex + 1 + for i := 0; i < len; i++ { + dest[i] = src[i+startIndex] + } + return dest +}