Skip to content

Commit

Permalink
block size exception
Browse files Browse the repository at this point in the history
  • Loading branch information
mandyshieh committed May 4, 2018
1 parent f0e639a commit 49bb968
Showing 1 changed file with 32 additions and 8 deletions.
40 changes: 32 additions & 8 deletions src/Microsoft.ML.Parquet/ParquetLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,21 @@ public Cursor(ParquetLoader parent, Func<int, bool> predicate, IRandom rand)
Columns = _loader._columnsLoaded.Select(i => i.Name).ToArray()
};

int numBlocks = (int)Math.Ceiling(((decimal)parent.GetRowCount() / _readerOptions.Count));
int[] blockOrder = _rand == null ? Utils.GetIdentityPermutation(numBlocks) : Utils.GetRandomPermutation(rand, numBlocks);
_blockEnumerator = blockOrder.GetEnumerator();
try
{
int numBlocks = checked((int)Math.Ceiling(((decimal)parent.GetRowCount() / _readerOptions.Count)));
int[] blockOrder = _rand == null ? Utils.GetIdentityPermutation(numBlocks) : Utils.GetRandomPermutation(rand, numBlocks);
_blockEnumerator = blockOrder.GetEnumerator();
}
catch (Exception e)
{
if (e is OutOfMemoryException || e is OverflowException)
{
throw new InvalidDataException("Error due to too many blocks. Try increasing block size.", e);
}

throw;
}

_dataSetEnumerator = new int[0].GetEnumerator(); // Initialize an empty enumerator to get started
_columnValues = new IList[_actives.Length];
Expand Down Expand Up @@ -477,7 +489,7 @@ protected override bool MoveNextCore()
}
else if (_blockEnumerator.MoveNext())
{
_readerOptions.Offset = (int)_blockEnumerator.Current * _readerOptions.Count;
_readerOptions.Offset = (long)_blockEnumerator.Current * _readerOptions.Count;

// When current dataset runs out, read the next portion of the parquet file.
DataSet ds;
Expand All @@ -486,9 +498,21 @@ protected override bool MoveNextCore()
ds = ParquetReader.Read(_loader._parquetStream, _loader._parquetOptions, _readerOptions);
}

int[] dataSetOrder = _rand == null ? Utils.GetIdentityPermutation(ds.RowCount) : Utils.GetRandomPermutation(_rand, ds.RowCount);
_dataSetEnumerator = dataSetOrder.GetEnumerator();
_curDataSetRow = dataSetOrder[0];
try
{
int[] dataSetOrder = _rand == null ? Utils.GetIdentityPermutation(ds.RowCount) : Utils.GetRandomPermutation(_rand, ds.RowCount);
_dataSetEnumerator = dataSetOrder.GetEnumerator();
_curDataSetRow = dataSetOrder[0];
}
catch (Exception e)
{
if (e is OutOfMemoryException)
{
throw new InvalidDataException("Error caused because block size too big. Try decreasing block size.", e);
}

throw;
}

// Cache list for each active column
for (int i = 0; i < _actives.Length; i++)
Expand Down Expand Up @@ -671,4 +695,4 @@ private string ConvertListToString(IList list)
}
}
}
}
}

0 comments on commit 49bb968

Please sign in to comment.