@@ -142,55 +142,32 @@ private[spark] object InstanceBlock {
142142 new Iterator [InstanceBlock ] {
143143 private var numCols = - 1L
144144 private val buff = mutable.ArrayBuilder .make[Instance ]
145- private var buffCnt = 0L
146- private var buffNnz = 0L
147- private var buffUnitWeight = true
148- private var block = Option .empty[InstanceBlock ]
149145
150- private def flush (): Unit = {
151- block = Some (InstanceBlock .fromInstances(buff.result()))
152- buff.clear()
153- buffCnt = 0L
154- buffNnz = 0L
155- buffUnitWeight = true
156- }
146+ override def hasNext : Boolean = iterator.hasNext
157147
158- private def blockify (): Unit = {
159- block = None
148+ override def next (): InstanceBlock = {
149+ buff.clear()
150+ var buffCnt = 0L
151+ var buffNnz = 0L
152+ var buffUnitWeight = true
153+ var blockMemUsage = 0L
160154
161- while (block.isEmpty && iterator.hasNext ) {
155+ while (iterator.hasNext && blockMemUsage < maxMemUsage ) {
162156 val instance = iterator.next()
163157 if (numCols < 0L ) numCols = instance.features.size
164158 require(numCols == instance.features.size)
165159 val nnz = instance.features.numNonzeros
166160
167- // Check if enough memory remains to add this instance to the block.
168- if (getBlockMemUsage(numCols, buffCnt + 1L , buffNnz + nnz,
169- buffUnitWeight && (instance.weight == 1 )) > maxMemUsage) {
170- // Check if this instance is too large
171- require(buffCnt > 0 , s " instance $instance exceeds memory limit $maxMemUsage, " +
172- s " please increase block size " )
173- flush()
174- }
175-
176161 buff += instance
177162 buffCnt += 1L
178163 buffNnz += nnz
179164 buffUnitWeight &&= (instance.weight == 1 )
165+ blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight)
180166 }
181167
182- if (block.isEmpty && buffCnt > 0 ) flush()
183- }
184-
185- override def hasNext : Boolean = {
186- block.nonEmpty || { blockify(); block.nonEmpty }
187- }
188-
189- override def next (): InstanceBlock = {
190- if (block.isEmpty) blockify()
191- val ret = block.get
192- blockify()
193- ret
168+ // the block mem usage may slightly exceed threshold, not a big issue.
169+ // and this ensure even if one row exceed block limit, each block has one row
170+ InstanceBlock .fromInstances(buff.result())
194171 }
195172 }
196173 }
0 commit comments