From 6d5ef0f0193c8c00a5242dfacb411b0a3eb56fa7 Mon Sep 17 00:00:00 2001 From: Karan Sharma Date: Fri, 25 Nov 2022 11:49:24 +0530 Subject: [PATCH] feat: init project --- TODO.md | 46 +++++++++++++ examples/main.go | 29 ++++++++ go.mod | 3 + go.sum | 0 pkg/barrel/barrel.go | 155 +++++++++++++++++++++++++++++++++++++++++++ pkg/barrel/header.go | 46 +++++++++++++ pkg/barrel/keydir.go | 9 +++ 7 files changed, 288 insertions(+) create mode 100644 TODO.md create mode 100644 examples/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pkg/barrel/barrel.go create mode 100644 pkg/barrel/header.go create mode 100644 pkg/barrel/keydir.go diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..1030e19 --- /dev/null +++ b/TODO.md @@ -0,0 +1,46 @@ +## Implementation Notes + +### Initialising + +barrel.Open(dir="/data/barrel") + +Create a `barrel.db` file inside `/data/barrel` which is the working data directory. + +- timer.Timer -> check if file needs to be rotated (5MB) +- rename current active file +- create new file + +- timer.Timer -> merge all these files 30 minutes +- loop over all inactive files +- delete records not required + + +.Put("hello") -> "world" + +.Put("hello) -> "bye" + + +### Writing + +- [x] Encode the header +- [x] Flush to a file +- [ ] Add TTL +- [ ] Add Checksum +- [x] Organize methods as Encoder/Decoder package +- [x] Add KeyDir struct + - [x] Get the file offset and add it to the hashmap + +### Reading + +- [x] Check in keydir +- [x] decode and return to user + +### Background + +- [ ] Merge old files +- [ ] + +### Starting program + +## Test Cases + diff --git a/examples/main.go b/examples/main.go new file mode 100644 index 0000000..3d6d378 --- /dev/null +++ b/examples/main.go @@ -0,0 +1,29 @@ +package main + +import ( + "fmt" + + "github.com/mr-karan/barreldb/pkg/barrel" +) + +func main() { + barrel, err := barrel.Init(".") + if err != nil { + panic(err) + } + + if err := barrel.Put("hello", []byte("world!")); err != nil { + panic(err) + } + // if err := barrel.Put("hello", []byte("world!")); err != nil { + // panic(err) + // } + + val, err := barrel.Get("hello") + if err != nil { + panic(err) + } + + fmt.Println(string(val)) + +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..a4d1dc1 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/mr-karan/barreldb + +go 1.19 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/pkg/barrel/barrel.go b/pkg/barrel/barrel.go new file mode 100644 index 0000000..9852e4a --- /dev/null +++ b/pkg/barrel/barrel.go @@ -0,0 +1,155 @@ +package barrel + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +const ( + ACTIVE_DATAFILE = "barrel.db" + BARREL_LOCKFILE = "barrel.lock" +) + +type Barrel struct { + sync.Mutex + + activeFile string + file *os.File + reader *os.File + keydir KeyDir + offset int +} + +func Init(dir string) (*Barrel, error) { + // If the file doesn't exist, create it, or append to the file. + activeFile := filepath.Join(dir, ACTIVE_DATAFILE) + f, err := os.OpenFile(activeFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, fmt.Errorf("error opening file handler: %v", err) + } + + // Use stat to get file syze in bytes. + stat, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("error fetching file stats: %v", err) + } + + // Create an mmap reader for reading the db file. + reader, err := os.Open(activeFile) + if err != nil { + return nil, fmt.Errorf("error openning mmap for db: %v", err) + } + + // TODO: Do lockfile shenanigans. Ensure only one process can write to barrel.db at a time. + + barrel := &Barrel{ + activeFile: activeFile, + file: f, + reader: reader, + keydir: make(KeyDir, 0), + offset: int(stat.Size()), + } + + return barrel, nil +} + +func (b *Barrel) Close() { + b.file.Close() +} + +func (b *Barrel) Put(k string, val []byte) error { + b.Lock() + defer b.Unlock() + + // Prepare header. + header := Header{ + Timestamp: uint32(time.Now().Unix()), + KeySize: uint32(len(k)), + ValSize: uint32(len(val)), + } + + // Prepare the record. + record := Record{ + Key: k, + Value: val, + } + + // Create a buffer for writing data to it. + // TODO: Create a buffer pool. + buf := bytes.NewBuffer([]byte{}) + + // Encode header. + header.encode(buf) + + // Write key/value. + buf.WriteString(k) + buf.Write(val) + + // Append to underlying file. + if _, err := b.file.Write(buf.Bytes()); err != nil { + return fmt.Errorf("error writing data to file: %v", err) + } + + // Add entry to KeyDir. + // We just save the value of key and some metadata for faster lookups. + // The value is only stored in disk. + b.keydir[k] = Meta{ + Timestamp: int(record.Header.Timestamp), + RecordSize: len(buf.Bytes()), + FileID: "TODO", + } + + // Increase the offset of the current active file. + b.offset += len(buf.Bytes()) + + // Ensure filesystem's in memory buffer is flushed to disk. + if err := b.file.Sync(); err != nil { + return fmt.Errorf("error syncing file to disk: %v", err) + } + + return nil +} + +func (b *Barrel) Get(k string) ([]byte, error) { + b.Lock() + defer b.Unlock() + + // Check for entry in KeyDir. + meta, ok := b.keydir[k] + if !ok { + return nil, fmt.Errorf("error finding data for the given key") + } + + var ( + // Header object for decoding the binary data into it. + header Header + // Position to read the file from. + position = int64(b.offset - meta.RecordSize) + ) + + // Initialise a buffer for reading data. + record := make([]byte, meta.RecordSize) + + // Read the file with the given offset. + n, err := b.reader.ReadAt(record, position) + if err != nil { + return nil, fmt.Errorf("error reading data from file: %v", err) + } + + // Check if the size of bytes read matches the record size. + if n != int(meta.RecordSize) { + return nil, fmt.Errorf("error fetching record, invalid size") + } + + // Decode the header. + header.decode(record) + + // Get the offset position in record to start reading the value from. + valPos := meta.RecordSize - int(header.ValSize) + + return record[valPos:], nil +} diff --git a/pkg/barrel/header.go b/pkg/barrel/header.go new file mode 100644 index 0000000..c523819 --- /dev/null +++ b/pkg/barrel/header.go @@ -0,0 +1,46 @@ +package barrel + +import ( + "bytes" + "encoding/binary" +) + +const ( + HEADER_SIZE = 12 +) + +/* +Record is a binary representation of how each record is persisted in the disk. +The first three fields have a fixed size of 4 bytes (so 4+4+4=12 bytes fixed width "Header"). +Key size = 4 bytes which means tha max size of key can be (2^32)-1 = ~4.29GB. +Key size = 4 bytes which means tha max size of value can be (2^32)-1 = ~4.29GB. +Each entry cannot exceed more than ~8.6GB as a theoretical limit. +In a practical sense, this is also constrained by the memory of the underlying VM +where this program would run. +------------------------------------------------------ +| time(4) | key_size(4) | val_size(4) | key | val | +------------------------------------------------------ +*/ +type Record struct { + Header Header + Key string + Value []byte +} + +// Header represents the fixed width fields present at the start of every record. +type Header struct { + // TODO: Add Expiry and CRC. + Timestamp uint32 + KeySize uint32 + ValSize uint32 +} + +// Encode takes a byte buffer, encodes the value of header and writes to the buffer. +func (h *Header) encode(buf *bytes.Buffer) error { + return binary.Write(buf, binary.LittleEndian, h) +} + +// Decode takes a record object decodes the binary value the buffer. +func (h *Header) decode(record []byte) error { + return binary.Read(bytes.NewReader(record), binary.LittleEndian, h) +} diff --git a/pkg/barrel/keydir.go b/pkg/barrel/keydir.go new file mode 100644 index 0000000..bdbc61d --- /dev/null +++ b/pkg/barrel/keydir.go @@ -0,0 +1,9 @@ +package barrel + +type Meta struct { + Timestamp int + RecordSize int + FileID string +} + +type KeyDir map[string]Meta