Skip to content

Bin layout #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions layout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
HEADER
numero de chaves (8-bit) | timestamp (64-bit) -> 9 bytes

BLOCO
tamanho do bloco (16-bit) | tamanho do registro (7-bit) | tombstone? (1-bit) | timestamp (64-bit) | tamanho da chave (8-bit) | chave (x bytes) | tamanho do valor (8-bit) | valor (y bytes)
23 changes: 16 additions & 7 deletions lib/amnesia/segment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,26 @@ def store(hash_input)
end

def populate_index_structure
lines = File.readlines(@storage.filename)
byte_offset = 0
fd = File.open(@storage.filename, 'rb')

lines.each do |line|
record_key, = line.split(',', 2)
record_size = line.bytesize
fd.seek(9, IO::SEEK_CUR) # skipping header

@index_structure.add(record_key, [byte_offset, record_size - 1])
until fd.eof?
_block_size, _record_size_tombstone, _timestamp, key_size = fd.read(12).unpack('SCQC')
record_key = fd.read(key_size)

byte_offset += line.bytesize
value_size, = fd.read(1).unpack('C')

puts "Adding index entry\nKey -> #{record_key}\nFile offset -> #{fd.pos}\nValue size -> #{value_size}"

@index_structure.add(record_key, [fd.pos, value_size])

fd.seek(value_size, IO::SEEK_CUR)
end

pp @index_structure

fd.close
end
end
end
17 changes: 5 additions & 12 deletions lib/amnesia/segment_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,7 @@ def compact
end

def flush(items)
# TODO: Use the storage class for that
filename = "./_data/#{Time.now.to_i}.segment"

File.open(filename, 'w') do |f|
items.each { |(key, value)| f.write("#{key},#{value}\n") }
end

@segments.unshift(Amnesia::Segment.new(filename))

compact if @segments.length == 2

:finished_flushing
create_segment("./_data/#{Time.now.to_i}.segment", items)
end

# TODO: remove this method
Expand Down Expand Up @@ -83,6 +72,10 @@ def load_segments(filenames)

private

def create_segment(filename, items)
@segments.unshift(Amnesia::Segment.new(filename, items: items))
end

def start_segment
filename = "./_data/#{Time.now.to_i}.segment"

Expand Down
80 changes: 67 additions & 13 deletions lib/amnesia/storage.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module Amnesia
class Storage
attr_reader :filename

FIXED_AMOUNT_OF_BYTE_PER_BLOCK = 13

def initialize(filename, items: nil)
@filename = filename
populate_data(items) unless items.nil? || items.empty?
Expand Down Expand Up @@ -30,7 +32,7 @@ def delete(key)
end

def get(key, index_entry: nil)
return record_from_index(index_entry) unless index_entry.nil?
return record_from_index(index_entry, key) unless index_entry.nil?

record_from_scan(key)
end
Expand All @@ -54,28 +56,80 @@ def file_exists?
private

def populate_data(items)
data_block = items.map { |(key, value)| "#{key},#{value}\n" }.join('')
num_keys = items.length
creation_timestamp = Time.now.to_i

header = [num_keys, creation_timestamp].pack('CQ')

data_blocks = items.map do |(key, value)|
is_tombstone = value.empty? ? 1 : 0
key_size = key.bytesize
value_size = value.bytesize
record_size = key_size + value_size
record_size_tombstone_composition = (record_size << 1) | is_tombstone

block_size = FIXED_AMOUNT_OF_BYTE_PER_BLOCK + record_size

row = [block_size, record_size_tombstone_composition, creation_timestamp, key_size, key, value_size, value]

row.pack("SCQCa#{key_size}Ca#{value_size}")
end.join

create_db_file(data_block)
File.binwrite(filename, "#{header}#{data_blocks}")

# create_db_file(data_block)
end

def record_from_scan(key)
lines = File.readlines(filename)
def record_from_scan(searching_key)
handler = File.open(filename, 'rb')

handler.seek(9, IO::SEEK_CUR) # skipping header

result = nil

until handler.eof?
block_seek = 12
block_size, record_size_tombstone, _timestamp, key_size = handler.read(block_seek).unpack('SCQC')

key = handler.read(key_size)

puts "Key Size -> #{key_size} // Key -> #{key}\n\n"

if searching_key == key
is_tombstone = record_size_tombstone & 1

# value_size = block_size - (key_size + 11 + 1) # 11 ja lidos pra pegar a key, 1 a menos também que é a informacao value_size em si

# handler.seek(1, IO::SEEK_CUR)

value_size, = handler.read(1).unpack('C')

value, = handler.read(value_size).unpack('a*')

result = "#{key},#{value}\n" # por questoes de compatiblidade

result = "#{key},\n" if is_tombstone == 1

break
else
# vai para o proximo bloco
# offset calculado com base no tamanho do bloco subtraidos dos bytes já lidos, 12 + key - numero de bytes da
# key
handler.seek(block_size - (key_size + block_seek), IO::SEEK_CUR)
end
end

record = lines.filter do |line|
record_key, = line.split(',', 2)
record_key == key
end.last
handler.close

parse_record(record)
parse_record(result)
end

def record_from_index(index_entry)
def record_from_index(index_entry, key)
offset, size = index_entry

record = File.read(filename, size, offset)
value = File.binread(filename, size, offset)

parse_record(record)
parse_record("#{key},#{value}\n")
end
end
end