Skip to content

Commit

Permalink
Monkey patched openpyxl for ranges #33
Browse files Browse the repository at this point in the history
  • Loading branch information
Apkawa committed Jan 10, 2022
1 parent 6fbe6ff commit cc17892
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 2 deletions.
Binary file added tests/fixtures/merge_cells.xlsx
Binary file not shown.
43 changes: 43 additions & 0 deletions tests/test_merge_cells.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pytest
from openpyxl.utils import range_boundaries
from xlsx2html import xlsx2html
from xlsx2html.parser.parser import XLSXParser


def test_monkey_patched():
assert range_boundaries._monkey


@pytest.mark.parametrize(
"range,expect_result",
[
["A:A", (1, 1, 1, 1048576)],
["2:2", (1, 2, 18278, 2)],
["B2:B", (2, 2, 2, 1048576)],
["D2:2", (4, 2, 18278, 2)],
],
)
def test_range_boundaries(range, expect_result):
result = range_boundaries(range)
assert result == expect_result


@pytest.mark.skip("TODO optimize large merges")
def test_parse_merge_cells(fixture_file):
p = XLSXParser(filepath=fixture_file("merge_cells.xlsx"))
result = p.get_sheet()
assert result
assert len(result.cols) == 4
assert len(result.rows) == 11


@pytest.mark.skip("TODO optimize large merges")
@pytest.mark.webtest()
def test_merge_cells(temp_file, browser, screenshot_regression, fixture_file):
browser.driver.set_window_size(1280, 1024)
out_file = temp_file()

xlsx2html(fixture_file("merge_cells.xlsx"), out_file, locale="en")

browser.visit("file://" + out_file)
screenshot_regression()
1 change: 1 addition & 0 deletions xlsx2html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import warnings
from typing import TextIO

from . import monkey_patch # noqa
from .core import xlsx2html, XLSX2HTMLConverter

__all__ = ["xls2html", "xlsx2html", "XLSX2HTMLConverter"]
Expand Down
74 changes: 74 additions & 0 deletions xlsx2html/monkey_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# type: ignore
import functools

import openpyxl
from openpyxl import utils
from openpyxl.utils import cell
from openpyxl.worksheet.cell_range import CellRange

old_range_boundaries = cell.range_boundaries

MAX_COL_INDEX = 18278
MAX_ROW_INDEX = 1048576


@functools.wraps(old_range_boundaries)
def new_range_boundaries(range_string):
"""
patch for handle string like
A:A - all column
2:2 - all row
A2:A - from row 2 to end rows
B3:3 - from column B to end columns
"""
from openpyxl.utils.cell import ABSOLUTE_RE

m = ABSOLUTE_RE.match(range_string)
if m:
min_col, min_row, sep, max_col, max_row = m.groups()
if sep:
if not max_col:
max_col = utils.get_column_letter(MAX_COL_INDEX)
if not max_row:
max_row = str(MAX_ROW_INDEX)
range_string = "".join(
[min_col or utils.get_column_letter(1), min_row or "1", ":", max_col, max_row]
)

min_col, min_row, max_col, max_row = old_range_boundaries(range_string)
return min_col or 1, min_row or 1, max_col or MAX_COL_INDEX, max_row or MAX_ROW_INDEX


new_range_boundaries._monkey = True
cell.range_boundaries = new_range_boundaries
utils.range_boundaries = new_range_boundaries
openpyxl.utils = utils


def CellRange__init__(
self, range_string=None, min_col=None, min_row=None, max_col=None, max_row=None, title=None
):
if range_string is not None:
if "!" in range_string:
from openpyxl.utils import range_to_tuple

title, (min_col, min_row, max_col, max_row) = range_to_tuple(range_string)
else:
# FIX
min_col, min_row, max_col, max_row = new_range_boundaries(range_string)

self.min_col = min_col
self.min_row = min_row
self.max_col = max_col
self.max_row = max_row
self.title = title

if min_col > max_col:
fmt = "{max_col} must be greater than {min_col}"
raise ValueError(fmt.format(min_col=min_col, max_col=max_col))
if min_row > max_row:
fmt = "{max_row} must be greater than {min_row}"
raise ValueError(fmt.format(min_row=min_row, max_row=max_row))


CellRange.__init__ = CellRange__init__
4 changes: 2 additions & 2 deletions xlsx2html/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from openpyxl.drawing.image import Image
from openpyxl.utils import rows_from_range, column_index_from_string
from openpyxl.worksheet.worksheet import Worksheet

from xlsx2html.compat import OPENPYXL_24
from xlsx2html.parser.cell import CellInfo, Borders
from xlsx2html.parser.image import ImageInfo
Expand Down Expand Up @@ -64,6 +63,7 @@ def get_sheet(self, sheet: SheetNameType = None) -> ParserResult:
excluded_cells = set(ws.merged_cells)
else:
merged_cell_ranges = [cell_range.coord for cell_range in ws.merged_cells.ranges]
# TODO Optimize for large merge
excluded_cells = set(
[
cell
Expand All @@ -72,7 +72,7 @@ def get_sheet(self, sheet: SheetNameType = None) -> ParserResult:
for cell in rows
]
)

# TODO optimize for large merge
for cell_range in merged_cell_ranges:
cell_range_list = list(ws[cell_range])
m_cell = cell_range_list[0][0]
Expand Down

0 comments on commit cc17892

Please sign in to comment.