diff --git a/lib/baran/character_text_splitter.rb b/lib/baran/character_text_splitter.rb index 7de8729..cf1c7b9 100644 --- a/lib/baran/character_text_splitter.rb +++ b/lib/baran/character_text_splitter.rb @@ -14,4 +14,4 @@ def splitted(text) merged(splits, @separator) end end -end \ No newline at end of file +end diff --git a/lib/baran/text_splitter.rb b/lib/baran/text_splitter.rb index 1f27861..8b6eea1 100644 --- a/lib/baran/text_splitter.rb +++ b/lib/baran/text_splitter.rb @@ -14,12 +14,14 @@ def splitted(text) raise NotImplementedError, "splitted method should be implemented in a subclass" end - def chunks(text) + def chunks(text, metadata: nil) cursor = 0 chunks = [] splitted(text).compact.each do |chunk| - chunks << { text: chunk, cursor: cursor } + chunk = { text: chunk, cursor: cursor } + chunk[:metadata] = metadata if metadata + chunks << chunk cursor += chunk.length end @@ -56,4 +58,4 @@ def merged(splits, separator) results end end -end \ No newline at end of file +end diff --git a/test/test_character_text_splitter.rb b/test/test_character_text_splitter.rb index d6537fc..aa3e6bd 100644 --- a/test/test_character_text_splitter.rb +++ b/test/test_character_text_splitter.rb @@ -13,4 +13,4 @@ def test_chunks assert_equal(chunks.length, 3) end -end \ No newline at end of file +end diff --git a/test/test_recursive_character_text_splitter.rb b/test/test_recursive_character_text_splitter.rb index d411c38..67b002e 100644 --- a/test/test_recursive_character_text_splitter.rb +++ b/test/test_recursive_character_text_splitter.rb @@ -20,4 +20,4 @@ def test_empty_chunks assert_equal(chunks.length, 6) end -end \ No newline at end of file +end diff --git a/test/test_text_splitter.rb b/test/test_text_splitter.rb index 88b3a70..d069049 100644 --- a/test/test_text_splitter.rb +++ b/test/test_text_splitter.rb @@ -55,6 +55,14 @@ def test_chunks assert_equal 'text', documents[0][:text] end + def test_chunks_with_metadata + text = 'text one' + metadata = { page: 1 } + documents = @test_splitter.chunks(text, metadata: metadata) + + assert_equal({ page: 1 }, documents[0][:metadata]) + end + def test_joined items = ['one', 'two', 'three'] separator = ' '