Skip to content

Commit

Permalink
Add chunks with metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
Moeki Kawakami committed Sep 26, 2023
1 parent 5841dca commit 8bb0f67
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 4 deletions.
2 changes: 1 addition & 1 deletion lib/baran/character_text_splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ def splitted(text)
merged(splits, @separator)
end
end
end
end
6 changes: 5 additions & 1 deletion lib/baran/text_splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def chunks(text)
chunks
end

def chunks_with_metadata(text:, metadata:)
{ chunks: chunks(text), metadata: metadata }
end

def joined(items, separator)
text = items.join(separator).strip
text.empty? ? nil : text
Expand Down Expand Up @@ -56,4 +60,4 @@ def merged(splits, separator)
results
end
end
end
end
2 changes: 1 addition & 1 deletion test/test_character_text_splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ def test_chunks

assert_equal(chunks.length, 3)
end
end
end
2 changes: 1 addition & 1 deletion test/test_recursive_character_text_splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ def test_empty_chunks

assert_equal(chunks.length, 6)
end
end
end
9 changes: 9 additions & 0 deletions test/test_text_splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ def test_chunks
assert_equal 'text', documents[0][:text]
end

def test_chunks_with_metadata
text = 'text one'
metadata = { page: 1 }
documents = @test_splitter.chunks_with_metadata(text: text, metadata: metadata)

assert_equal 2, documents[:chunks].size
assert_equal({ page: 1 }, documents[:metadata])
end

def test_joined
items = ['one', 'two', 'three']
separator = ' '
Expand Down

0 comments on commit 8bb0f67

Please sign in to comment.