Skip to content

Commit

Permalink
Merge pull request #435 from tony1223/master
Browse files Browse the repository at this point in the history
SXSSFWorkbook possible performance overhead #434
  • Loading branch information
tonyqus authored Dec 8, 2020
2 parents 963dd17 + de270ba commit 53f62a1
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 40 deletions.
65 changes: 58 additions & 7 deletions ooxml/XSSF/Streaming/SXSSFRow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ namespace NPOI.XSSF.Streaming
public class SXSSFRow : IRow, IComparable<SXSSFRow>
{
private SXSSFSheet _sheet; // parent sheet
private SortedDictionary<int, SXSSFCell> _cells = new SortedDictionary<int, SXSSFCell>();
private IDictionary<int, SXSSFCell> _cells = new Dictionary<int, SXSSFCell>();
private short _style = -1; // index of cell style in style table
private bool _zHeight; // row zero-height (this is somehow different than being hidden)
private float _height = -1;


private int _FirstCellNum = -1;
private int _LastCellNum = -1;
// use Boolean to have a tri-state for on/off/undefined
public bool? Hidden { get; set; }
public bool? Collapsed { get; set; }
Expand All @@ -43,7 +44,7 @@ public SXSSFRow(SXSSFSheet sheet)

public CellIterator AllCellsIterator()
{
return new CellIterator(LastCellNum, _cells);
return new CellIterator(LastCellNum, new SortedDictionary<int, SXSSFCell>(_cells));
}
public bool HasCustomHeight()
{
Expand All @@ -61,12 +62,11 @@ public short FirstCellNum
{
try
{
return (short) _cells.First().Key;
return (short) _FirstCellNum;
}
catch
{
return -1;

}
}
}
Expand Down Expand Up @@ -105,7 +105,7 @@ public short LastCellNum
{
get
{
return _cells.Count == 0 ? (short)-1 : Convert.ToInt16(_cells.Last().Key + 1);
return (short) _LastCellNum;

}
}
Expand Down Expand Up @@ -243,9 +243,23 @@ public ICell CreateCell(int column, CellType type)
CheckBounds(column);
SXSSFCell cell = new SXSSFCell(this, type);
_cells[column] = cell;
UpdateIndexWhenAdd(column);
return cell;
}

private void UpdateIndexWhenAdd(int cellnum)
{
if (cellnum < _FirstCellNum || _FirstCellNum == -1)
{
_FirstCellNum = cellnum;
}

if (cellnum >= _LastCellNum)
{
_LastCellNum = cellnum + 1;
}
}


/// <summary>
/// throws RuntimeException if the bounds are exceeded.
Expand Down Expand Up @@ -276,6 +290,7 @@ public ICell GetCell(int cellnum, MissingCellPolicy policy)
SXSSFCell cell = null;
if (_cells.ContainsKey(cellnum))
cell = _cells[cellnum];

switch (policy)
{
case MissingCellPolicy.RETURN_NULL_AND_BLANK:
Expand All @@ -292,7 +307,7 @@ public ICell GetCell(int cellnum, MissingCellPolicy policy)
}
public IEnumerator<ICell> GetEnumerator()
{
return new FilledCellIterator(_cells);
return new FilledCellIterator(new SortedDictionary<int, SXSSFCell>(_cells));
}

public void MoveCell(ICell cell, int newColumn)
Expand All @@ -304,7 +319,42 @@ public void RemoveCell(ICell cell)
{
int index = GetCellIndex((SXSSFCell)cell);
_cells.Remove(index);
if (index == _FirstCellNum)
{
InvalidateFirstCellNum();
}

if (index >= (_LastCellNum -1))
{
InvalidateLastCellNum();
}
}

private void InvalidateFirstCellNum()
{
if (_cells.Keys.Count == 0)
{
_FirstCellNum = 0;
}
else
{
_FirstCellNum = _cells.Keys.Min();
}
}

private void InvalidateLastCellNum()
{
if (_cells.Count == 0)
{
_LastCellNum = 0;
}
else
{
_LastCellNum = _cells.Keys.Max() + 1;
}
}


/**
* Return the column number of a cell if it is in this row
* Otherwise return -1
Expand All @@ -325,6 +375,7 @@ public int GetCellIndex(SXSSFCell cell)
return -1;
}


IEnumerator IEnumerable.GetEnumerator()
{
throw new NotImplementedException();
Expand Down
117 changes: 90 additions & 27 deletions ooxml/XSSF/Streaming/SXSSFSheet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,17 @@ public class SXSSFSheet : ISheet
internal XSSFSheet _sh;
private SXSSFWorkbook _workbook;
//private TreeMap<Integer, SXSSFRow> _rows = new TreeMap<Integer, SXSSFRow>();
private SortedDictionary<int, SXSSFRow> _rows = new SortedDictionary<int, SXSSFRow>();
private IDictionary<int, SXSSFRow> _rows = new Dictionary<int, SXSSFRow>();
private SheetDataWriter _writer;
private int _randomAccessWindowSize = SXSSFWorkbook.DEFAULT_WINDOW_SIZE;
private Lazy<AutoSizeColumnTracker> _autoSizeColumnTracker;
private int outlineLevelRow = 0;
private int lastFlushedRowNumber = -1;
private bool allFlushed = false;

private int _FirstRowNum = -1;
private int _LastRowNum = -1;


public SXSSFSheet(SXSSFWorkbook workbook, XSSFSheet xSheet)
{
Expand Down Expand Up @@ -190,7 +193,7 @@ public int FirstRowNum
{
if (_writer.NumberOfFlushedRows > 0)
return _writer.LowestIndexOfFlushedRows;
return _rows.Count == 0 ? 0 : _rows.Keys.First();
return _rows.Count == 0 ? 0 : _FirstRowNum;
}
}

Expand Down Expand Up @@ -305,7 +308,9 @@ public int LastRowNum
{
get
{
return _rows.Count == 0 ? 0 : _rows.Keys.Last();
if (_rows.Count == 0)
return _writer.NumberOfFlushedRows > 0 ? LastFlushedRowNumber : 0;
return _LastRowNum;
}
}

Expand Down Expand Up @@ -674,6 +679,9 @@ public IRow CreateRow(int rownum)

SXSSFRow newRow = new SXSSFRow(this);
_rows[rownum] = newRow;

UpdateIndexWhenAdd(rownum);

allFlushed = false;
if (_randomAccessWindowSize >= 0 && _rows.Count > _randomAccessWindowSize)
{
Expand All @@ -689,6 +697,19 @@ public IRow CreateRow(int rownum)
return newRow;
}

private void UpdateIndexWhenAdd(int rownum)
{
if (_FirstRowNum == -1 || rownum < _FirstRowNum)
{
_FirstRowNum = rownum;
}

if (rownum > _LastRowNum)
{
_LastRowNum = rownum;
}
}

public void CreateSplitPane(int xSplitPos, int ySplitPos, int leftmostColumn, int topRow, PanePosition activePane)
{
_sh.CreateSplitPane(xSplitPos, ySplitPos, leftmostColumn, topRow, activePane);
Expand Down Expand Up @@ -757,7 +778,7 @@ public List<IDataValidation> GetDataValidations()

public IEnumerator GetEnumerator()
{
return (IEnumerator<IRow>)_rows.Values.GetEnumerator();
return (IEnumerator<IRow>)new SortedDictionary<int,SXSSFRow>(_rows).Values.GetEnumerator();
}

public double GetMargin(MarginType margin)
Expand Down Expand Up @@ -889,6 +910,10 @@ public void RemoveMergedRegions(IList<int> indices)
}
public void RemoveRow(IRow row)
{
if (row == null)
{
throw new ArgumentException("Invalid row (null)");
}
if (row.Sheet != this)
{
throw new ArgumentException("Specified row does not belong to this sheet");
Expand All @@ -901,10 +926,57 @@ public void RemoveRow(IRow row)
toRemove.Add(kv.Key);
}
}

var invalidatedFirst = false;
var invalidatedLast = false;
foreach(var key in toRemove)
{
if (key == _FirstRowNum)
{
invalidatedFirst = true;
}

if (key >= (_LastRowNum -1))
{
invalidatedLast = true;
}
_rows.Remove(key);
}

if (invalidatedFirst)
{
InvalidateFirstRowNum();
}

if (invalidatedLast)
{
InvalidateLastRowNum();
}

}

private void InvalidateFirstRowNum()
{
if (_rows.Count == 0)
{
_FirstRowNum = -1;
}
else
{
_FirstRowNum = _rows.Keys.Min();
}
}

private void InvalidateLastRowNum()
{
if (_rows.Count == 0)
{
_LastRowNum = -1;
}
else
{
_LastRowNum = _rows.Keys.Max();
}
}

public void RemoveRowBreak(int row)
Expand Down Expand Up @@ -1236,6 +1308,7 @@ public void ChangeRowNum(SXSSFRow row, int newRowNum)

RemoveRow(row);
_rows.Add(newRowNum, row);
UpdateIndexWhenAdd(newRowNum);
}

public bool Dispose()
Expand All @@ -1245,21 +1318,27 @@ public bool Dispose()
}
/**
* Specifies how many rows can be accessed at most via getRow().
* The exeeding rows (if any) are flushed to the disk while rows
* The exceeding rows (if any) are flushed to the disk while rows
* with lower index values are flushed first.
*/
private void FlushRows(int remaining, bool flushOnDisk)
{
KeyValuePair<int, SXSSFRow>? lastRow = null;
var flushedRowsCount = 0;

while (_rows.Count > remaining)
{
flushedRowsCount++;
lastRow = flushOneRow();
}

InvalidateFirstRowNum();
InvalidateLastRowNum();

if (remaining == 0)
allFlushed = true;

//TODO: review this.
if (lastRow != null && flushOnDisk)
_writer.FlushRows(flushedRowsCount, lastRow.Value.Key, lastRow.Value.Value.LastCellNum);
}
Expand Down Expand Up @@ -1294,30 +1373,14 @@ public void FlushRows()
if (_rows.Count == 0)
return null;

var firstRow = _rows.FirstOrDefault();
var firstRowNum = _rows.Keys.Min();
// Update the best fit column widths for auto-sizing just before the rows are flushed
// _autoSizeColumnTracker.UpdateColumnWidths(row);
_writer.WriteRow(firstRow.Key, firstRow.Value);
_rows.Remove(firstRow.Key);
lastFlushedRowNumber = firstRow.Key;
return firstRow;
}

private void FlushOneRow()
{
KeyValuePair<int, SXSSFRow> firstRow = _rows.FirstOrDefault();
//KeyValuePair is struct, so check value instead of key
if (firstRow.Value != null)
{
int firstRowNum = firstRow.Key;
int rowIndex = firstRowNum;
SXSSFRow row = _rows[firstRowNum];
// Update the best fit column widths for auto-sizing just before the rows are flushed
//_autoSizeColumnTracker.UpdateColumnWidths(row);
_writer.WriteRow(rowIndex, row);
_rows.Remove(firstRowNum);
lastFlushedRowNumber = rowIndex;
}
var firstRow = _rows[firstRowNum];
_writer.WriteRow(firstRowNum, firstRow);
_rows.Remove(firstRowNum);
lastFlushedRowNumber = firstRowNum;
return new KeyValuePair<int, SXSSFRow>(firstRowNum,firstRow);
}

/* Gets "<sheetData>" document fragment*/
Expand Down
13 changes: 7 additions & 6 deletions ooxml/XSSF/Streaming/SheetDataWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class SheetDataWriter
protected Stream OutputStream { get; private set; }
private int RowNum { get; set; }
public int NumberOfFlushedRows { get; set; }
public int LowestIndexOfFlushedRows { get; set; } // meaningful only of _numberOfFlushedRows>0
public int LowestIndexOfFlushedRows { get; set; } = -1; // meaningful only of _numberOfFlushedRows>0
public int NumberOfCellsOfLastFlushedRow { get; set; } // meaningful only of _numberOfFlushedRows>0
public int NumberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0

Expand Down Expand Up @@ -202,18 +202,19 @@ public void WriteRow(int rownum, SXSSFRow row)
}
EndRow();
}

if (LowestIndexOfFlushedRows == -1 || LowestIndexOfFlushedRows > rownum)
{
LowestIndexOfFlushedRows = rownum;
NumberOfFlushedRows++;
}
}

public void FlushRows(int rowCount, int lastRowNum, int lastRowCellsCount)
{
if (NumberOfFlushedRows == 0)
{
LowestIndexOfFlushedRows = lastRowNum;
}

NumberLastFlushedRow = Math.Max(lastRowNum, NumberLastFlushedRow);
NumberOfCellsOfLastFlushedRow = lastRowCellsCount;
NumberOfFlushedRows += rowCount;

_outputWriter.Flush();
OutputStream.Flush();
Expand Down

0 comments on commit 53f62a1

Please sign in to comment.