From 0a791f28749ffc6afb8dec38d9ad5246dfb47963 Mon Sep 17 00:00:00 2001 From: Moeki Kawakami Date: Sat, 9 Mar 2024 09:38:56 +0900 Subject: [PATCH] Fix cursor --- lib/baran/text_splitter.rb | 2 +- test/test_text_splitter.rb | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/baran/text_splitter.rb b/lib/baran/text_splitter.rb index 8b6eea1..08a7f39 100644 --- a/lib/baran/text_splitter.rb +++ b/lib/baran/text_splitter.rb @@ -22,7 +22,7 @@ def chunks(text, metadata: nil) chunk = { text: chunk, cursor: cursor } chunk[:metadata] = metadata if metadata chunks << chunk - cursor += chunk.length + cursor += chunk[:text].length end chunks diff --git a/test/test_text_splitter.rb b/test/test_text_splitter.rb index d069049..ef8c201 100644 --- a/test/test_text_splitter.rb +++ b/test/test_text_splitter.rb @@ -53,6 +53,7 @@ def test_chunks assert_equal 2, documents.size assert_equal 'text', documents[0][:text] + assert_equal 4, documents[1][:cursor] end def test_chunks_with_metadata