Closed as not planned
Description
The main purpose of Stream
is avoiding return a large List
which may lead to OOM, but before the transaction finished, the entities still exist in persistence context and cannot be recycled by GC, we need to evict entity manually after it be consumed. Here is a test case verify that:
package stream;
import java.util.stream.Stream;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.domain.EntityScan;
import org.springframework.boot.test.autoconfigure.orm.jpa.DataJpaTest;
import org.springframework.data.domain.Sort;
import org.springframework.data.jpa.domain.AbstractPersistable;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.TestPropertySource;
import jakarta.persistence.Entity;
import jakarta.persistence.EntityManager;
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
@DataJpaTest(showSql = false)
@TestPropertySource(properties = "spring.jpa.properties.hibernate.jdbc.fetch_size=100")
@EnableJpaRepositories(basePackageClasses = StreamTests.TestEntityRepository.class, considerNestedRepositories = true)
@EntityScan(basePackageClasses = StreamTests.TestEntity.class)
@ContextConfiguration(classes = StreamTests.class)
class StreamTests {
@Autowired
TestEntityRepository repository;
@Autowired
EntityManager entityManager;
@BeforeAll
void prepare() {
int size = 100000;
for (int i = 0; i < size; i++) {
TestEntity entity = new TestEntity();
this.repository.save(entity);
}
}
@ParameterizedTest
@ValueSource(booleans = { false, true })
void testStreamMemoryUsage(boolean detaching) {
long usedMemory = usedMemory();
Stream<TestEntity> stream = repository.stream();
if (detaching) {
stream = stream.peek(entityManager::detach);
}
stream.forEach(entity -> {
// TODO do something with entity
});
System.out.println(
"Memory usage increased " + (detaching ? "with " : "without ") + " detaching: " + (usedMemory() - usedMemory));
}
private long usedMemory() {
System.gc();
return Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
}
interface TestEntityRepository extends JpaRepository<TestEntity, Long>, JpaSpecificationExecutor<TestEntity> {
default Stream<TestEntity> stream() {
return findBy((root, query, cb) -> null, q -> q.sortBy(Sort.by("id"))).stream();
}
}
@Entity
static class TestEntity extends AbstractPersistable<Long> {
}
}
it will output like this:
Memory usage increased without detaching: 3060128
Memory usage increased with detaching: 302464
Spring Data JPA could return Stream
with .peek(entityManager::detach)
.
Or use a StatelessSession to fetch stream, It's Hibernate specific, I think there is an equivalent with EclipseLink, we should migrate if standard StatelessEntityManager is out.