From 083af9fce98738b0370b3d62134a363923d1c040 Mon Sep 17 00:00:00 2001 From: Dan King Date: Wed, 13 Sep 2023 17:30:07 -0400 Subject: [PATCH 1/2] [query] eliminate optimization that can blow RAM CHANGELOG: On some pipelines, since at least 0.2.58 (commit 23813afd5b), Hail could use essentially unbounded amounts of memory. This change removes "optimization" rules that accidentally caused that. --- hail/src/main/scala/is/hail/expr/ir/Simplify.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index 6f9747c3fe7..4c9d071a3ba 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -337,10 +337,11 @@ object Simplify { case ToArray(ToStream(a, _)) if a.typ.isInstanceOf[TSet] || a.typ.isInstanceOf[TDict] => CastToArray(a) - case ToStream(ToArray(s), _) if s.typ.isInstanceOf[TStream] => s - - case ToStream(Let(name, value, ToArray(x)), _) if x.typ.isInstanceOf[TStream] => - Let(name, value, x) + // // These rules need to a way to preserve the memory management semantics of the given ToStream + // case ToStream(ToArray(s), _) if s.typ.isInstanceOf[TStream] => s + // + // case ToStream(Let(name, value, ToArray(x)), _) if x.typ.isInstanceOf[TStream] => + // Let(name, value, x) case MakeNDArray(ToArray(someStream), shape, rowMajor, errorId) => MakeNDArray(someStream, shape, rowMajor, errorId) case MakeNDArray(ToStream(someArray, _), shape, rowMajor, errorId) => MakeNDArray(someArray, shape, rowMajor, errorId) From b8e70541c619db46a8058fe92061d6f5694a0a83 Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 14 Sep 2023 15:02:35 -0400 Subject: [PATCH 2/2] optimize only if memory management is not required --- hail/src/main/scala/is/hail/expr/ir/Simplify.scala | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala index 4c9d071a3ba..aefae9be153 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Simplify.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Simplify.scala @@ -337,11 +337,10 @@ object Simplify { case ToArray(ToStream(a, _)) if a.typ.isInstanceOf[TSet] || a.typ.isInstanceOf[TDict] => CastToArray(a) - // // These rules need to a way to preserve the memory management semantics of the given ToStream - // case ToStream(ToArray(s), _) if s.typ.isInstanceOf[TStream] => s - // - // case ToStream(Let(name, value, ToArray(x)), _) if x.typ.isInstanceOf[TStream] => - // Let(name, value, x) + case ToStream(ToArray(s), false) if s.typ.isInstanceOf[TStream] => s + + case ToStream(Let(name, value, ToArray(x)), false) if x.typ.isInstanceOf[TStream] => + Let(name, value, x) case MakeNDArray(ToArray(someStream), shape, rowMajor, errorId) => MakeNDArray(someStream, shape, rowMajor, errorId) case MakeNDArray(ToStream(someArray, _), shape, rowMajor, errorId) => MakeNDArray(someArray, shape, rowMajor, errorId)