@@ -82,9 +82,22 @@ public void testSplitText() {
8282		assertThat (chunks .get (3 ).getText ())
8383			.isEqualTo ("choose. It isn’t the lack of an exit, but the abundance of exits that is so disorienting." );
8484
85- 		// Verify that the same, merged metadata is copied to all chunks. 
86- 		assertThat (chunks .get (0 ).getMetadata ()).isEqualTo (chunks .get (1 ).getMetadata ());
87- 		assertThat (chunks .get (2 ).getMetadata ()).isEqualTo (chunks .get (3 ).getMetadata ());
85+ 		// Verify that the original metadata is copied to all chunks (including 
86+ 		// chunk-specific fields) 
87+ 		assertThat (chunks .get (0 ).getMetadata ()).containsKeys ("key1" , "key2" , "parent_document_id" , "chunk_index" ,
88+ 				"total_chunks" );
89+ 		assertThat (chunks .get (1 ).getMetadata ()).containsKeys ("key1" , "key2" , "parent_document_id" , "chunk_index" ,
90+ 				"total_chunks" );
91+ 		assertThat (chunks .get (2 ).getMetadata ()).containsKeys ("key2" , "key3" , "parent_document_id" , "chunk_index" ,
92+ 				"total_chunks" );
93+ 		assertThat (chunks .get (3 ).getMetadata ()).containsKeys ("key2" , "key3" , "parent_document_id" , "chunk_index" ,
94+ 				"total_chunks" );
95+ 
96+ 		// Verify chunk indices are correct 
97+ 		assertThat (chunks .get (0 ).getMetadata ().get ("chunk_index" )).isEqualTo (0 );
98+ 		assertThat (chunks .get (1 ).getMetadata ().get ("chunk_index" )).isEqualTo (1 );
99+ 		assertThat (chunks .get (2 ).getMetadata ().get ("chunk_index" )).isEqualTo (0 );
100+ 		assertThat (chunks .get (3 ).getMetadata ().get ("chunk_index" )).isEqualTo (1 );
88101		assertThat (chunks .get (0 ).getMetadata ()).containsKeys ("key1" , "key2" ).doesNotContainKeys ("key3" );
89102		assertThat (chunks .get (2 ).getMetadata ()).containsKeys ("key2" , "key3" ).doesNotContainKeys ("key1" );
90103
@@ -148,7 +161,6 @@ public void pageNoChunkSplit() {
148161	@ Test 
149162	public  void  pageWithChunkSplit () {
150163		// given 
151- 
152164		var  doc1  = new  Document ("1In the end, writing arises when man realizes that memory is not enough." 
153165				+ "1The most oppressive thing about the labyrinth is that you are constantly " 
154166				+ "1being forced to choose. It isn’t the lack of an exit, but the abundance of exits that is so disorienting." ,
@@ -236,13 +248,137 @@ public void testSplitTextWithNullMetadata() {
236248		assertThat (chunks .get (0 ).getText ()).isEqualTo ("In the end, writing arises when man" );
237249		assertThat (chunks .get (1 ).getText ()).isEqualTo (" realizes that memory is not enough." );
238250
239- 		// Verify that the same, merged metadata is copied to all chunks. 
240- 		assertThat (chunks .get (0 ).getMetadata ()).isEqualTo (chunks .get (1 ).getMetadata ());
241- 		assertThat (chunks .get (1 ).getMetadata ()).containsKeys ("key1" );
251+ 		// Verify that the original metadata is copied to all chunks (with chunk-specific 
252+ 		// fields) 
253+ 		assertThat (chunks .get (0 ).getMetadata ()).containsKeys ("key1" , "parent_document_id" , "chunk_index" ,
254+ 				"total_chunks" );
255+ 		assertThat (chunks .get (1 ).getMetadata ()).containsKeys ("key1" , "parent_document_id" , "chunk_index" ,
256+ 				"total_chunks" );
257+ 
258+ 		// Verify chunk indices are different 
259+ 		assertThat (chunks .get (0 ).getMetadata ().get ("chunk_index" )).isEqualTo (0 );
260+ 		assertThat (chunks .get (1 ).getMetadata ().get ("chunk_index" )).isEqualTo (1 );
242261
243262		// Verify that the content formatters are copied from the parents to the chunks. 
244263		assertThat (chunks .get (0 ).getContentFormatter ()).isSameAs (contentFormatter );
245264		assertThat (chunks .get (1 ).getContentFormatter ()).isSameAs (contentFormatter );
246265	}
247266
267+ 	@ Test 
268+ 	public  void  testScorePreservation () {
269+ 		// given 
270+ 		Double  originalScore  = 0.95 ;
271+ 		var  doc  = Document .builder ()
272+ 			.text ("This is a test document that will be split into multiple chunks." )
273+ 			.metadata (Map .of ("source" , "test.txt" ))
274+ 			.score (originalScore )
275+ 			.build ();
276+ 
277+ 		// when 
278+ 		List <Document > chunks  = testTextSplitter .apply (List .of (doc ));
279+ 
280+ 		// then 
281+ 		assertThat (chunks ).hasSize (2 );
282+ 		assertThat (chunks .get (0 ).getScore ()).isEqualTo (originalScore );
283+ 		assertThat (chunks .get (1 ).getScore ()).isEqualTo (originalScore );
284+ 	}
285+ 
286+ 	@ Test 
287+ 	public  void  testParentDocumentTracking () {
288+ 		// given 
289+ 		var  doc1  = new  Document ("First document content for testing splitting functionality." ,
290+ 				Map .of ("source" , "doc1.txt" ));
291+ 		var  doc2  = new  Document ("Second document content for testing splitting functionality." ,
292+ 				Map .of ("source" , "doc2.txt" ));
293+ 
294+ 		String  originalId1  = doc1 .getId ();
295+ 		String  originalId2  = doc2 .getId ();
296+ 
297+ 		// when 
298+ 		List <Document > chunks  = testTextSplitter .apply (List .of (doc1 , doc2 ));
299+ 
300+ 		// then 
301+ 		assertThat (chunks ).hasSize (4 );
302+ 
303+ 		// Verify parent document tracking for doc1 chunks 
304+ 		assertThat (chunks .get (0 ).getMetadata ().get ("parent_document_id" )).isEqualTo (originalId1 );
305+ 		assertThat (chunks .get (1 ).getMetadata ().get ("parent_document_id" )).isEqualTo (originalId1 );
306+ 
307+ 		// Verify parent document tracking for doc2 chunks 
308+ 		assertThat (chunks .get (2 ).getMetadata ().get ("parent_document_id" )).isEqualTo (originalId2 );
309+ 		assertThat (chunks .get (3 ).getMetadata ().get ("parent_document_id" )).isEqualTo (originalId2 );
310+ 	}
311+ 
312+ 	@ Test 
313+ 	public  void  testChunkMetadataInformation () {
314+ 		// given 
315+ 		var  doc  = new  Document ("This is a longer document that will be split into exactly two chunks for testing." ,
316+ 				Map .of ("source" , "test.txt" ));
317+ 
318+ 		// when 
319+ 		List <Document > chunks  = testTextSplitter .apply (List .of (doc ));
320+ 
321+ 		// then 
322+ 		assertThat (chunks ).hasSize (2 );
323+ 
324+ 		// Verify chunk index and total chunks for first chunk 
325+ 		assertThat (chunks .get (0 ).getMetadata ().get ("chunk_index" )).isEqualTo (0 );
326+ 		assertThat (chunks .get (0 ).getMetadata ().get ("total_chunks" )).isEqualTo (2 );
327+ 
328+ 		// Verify chunk index and total chunks for second chunk 
329+ 		assertThat (chunks .get (1 ).getMetadata ().get ("chunk_index" )).isEqualTo (1 );
330+ 		assertThat (chunks .get (1 ).getMetadata ().get ("total_chunks" )).isEqualTo (2 );
331+ 
332+ 		// Verify original metadata is preserved 
333+ 		assertThat (chunks .get (0 ).getMetadata ().get ("source" )).isEqualTo ("test.txt" );
334+ 		assertThat (chunks .get (1 ).getMetadata ().get ("source" )).isEqualTo ("test.txt" );
335+ 	}
336+ 
337+ 	@ Test 
338+ 	public  void  testEnhancedMetadataWithMultipleDocuments () {
339+ 		// given 
340+ 		var  doc1  = Document .builder ()
341+ 			.text ("First document with score and metadata." )
342+ 			.metadata (Map .of ("type" , "article" , "priority" , "high" ))
343+ 			.score (0.8 )
344+ 			.build ();
345+ 
346+ 		var  doc2  = Document .builder ()
347+ 			.text ("Second document with different score." )
348+ 			.metadata (Map .of ("type" , "report" , "priority" , "medium" ))
349+ 			.score (0.6 )
350+ 			.build ();
351+ 
352+ 		String  originalId1  = doc1 .getId ();
353+ 		String  originalId2  = doc2 .getId ();
354+ 
355+ 		// when 
356+ 		List <Document > chunks  = testTextSplitter .apply (List .of (doc1 , doc2 ));
357+ 
358+ 		// then 
359+ 		assertThat (chunks ).hasSize (4 );
360+ 
361+ 		// Verify first document chunks 
362+ 		for  (int  i  = 0 ; i  < 2 ; i ++) {
363+ 			Document  chunk  = chunks .get (i );
364+ 			assertThat (chunk .getScore ()).isEqualTo (0.8 );
365+ 			assertThat (chunk .getMetadata ().get ("parent_document_id" )).isEqualTo (originalId1 );
366+ 			assertThat (chunk .getMetadata ().get ("chunk_index" )).isEqualTo (i );
367+ 			assertThat (chunk .getMetadata ().get ("total_chunks" )).isEqualTo (2 );
368+ 			assertThat (chunk .getMetadata ().get ("type" )).isEqualTo ("article" );
369+ 			assertThat (chunk .getMetadata ().get ("priority" )).isEqualTo ("high" );
370+ 		}
371+ 
372+ 		// Verify second document chunks 
373+ 		for  (int  i  = 2 ; i  < 4 ; i ++) {
374+ 			Document  chunk  = chunks .get (i );
375+ 			assertThat (chunk .getScore ()).isEqualTo (0.6 );
376+ 			assertThat (chunk .getMetadata ().get ("parent_document_id" )).isEqualTo (originalId2 );
377+ 			assertThat (chunk .getMetadata ().get ("chunk_index" )).isEqualTo (i  - 2 );
378+ 			assertThat (chunk .getMetadata ().get ("total_chunks" )).isEqualTo (2 );
379+ 			assertThat (chunk .getMetadata ().get ("type" )).isEqualTo ("report" );
380+ 			assertThat (chunk .getMetadata ().get ("priority" )).isEqualTo ("medium" );
381+ 		}
382+ 	}
383+ 
248384}
0 commit comments