diff --git a/lib/baran/text_splitter.rb b/lib/baran/text_splitter.rb index 8b6eea1..08a7f39 100644 --- a/lib/baran/text_splitter.rb +++ b/lib/baran/text_splitter.rb @@ -22,7 +22,7 @@ def chunks(text, metadata: nil) chunk = { text: chunk, cursor: cursor } chunk[:metadata] = metadata if metadata chunks << chunk - cursor += chunk.length + cursor += chunk[:text].length end chunks diff --git a/test/test_text_splitter.rb b/test/test_text_splitter.rb index d069049..ef8c201 100644 --- a/test/test_text_splitter.rb +++ b/test/test_text_splitter.rb @@ -53,6 +53,7 @@ def test_chunks assert_equal 2, documents.size assert_equal 'text', documents[0][:text] + assert_equal 4, documents[1][:cursor] end def test_chunks_with_metadata