Skip to content

Commit

Permalink
Merge pull request #473 from BCDA-APS/471-openpyxl
Browse files Browse the repository at this point in the history
refactor to use openpyxl (instead of xlrd or pandas)
  • Loading branch information
prjemian authored Jan 12, 2021
2 parents 3af5638 + f987e48 commit 669c2be
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 85 deletions.
10 changes: 8 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@
Change History
##############

:1.4.1: release expected 2021-01-31
:1.4.1: release expected 2021-03-31

:1.4.0: release expected 2021-01-01
:1.4.0: release expected 2021-01-31

* `#472 <https://github.com/BCDA-APS/apstools/pull/472>`_
Respond to changes in upstream packages.

* package requirements
* auto-detection of command list format (Excel or text)

* `#463 <https://github.com/BCDA-APS/apstools/pull/463>`_
Remove deprecated features.
Expand Down
68 changes: 26 additions & 42 deletions apstools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
import json
import logging
import math
import openpyxl
import openpyxl.utils.exceptions
import ophyd
import os
import pandas
Expand All @@ -77,7 +79,6 @@
import threading
import time
import warnings
import xlrd
import zipfile

from .filewriters import _rebuild_scan_command
Expand All @@ -88,7 +89,7 @@
MAX_EPICS_STRINGOUT_LENGTH = 40


class ExcelReadError(xlrd.XLRDError): ...
class ExcelReadError(openpyxl.utils.exceptions.InvalidFileException): ...


def cleanupText(text):
Expand Down Expand Up @@ -1059,30 +1060,36 @@ def handleExcelRowEntry(self, entry): # subclass MUST override
def parse(self, labels_row_num=None, data_start_row_num=None, ignore_extra=True):
labels_row_num = labels_row_num or self.LABELS_ROW
try:
wb = openpyxl.load_workbook(self.fname)
ws = wb.worksheets[self.sheet_name]
if ignore_extra:
# ignore data outside of table in spreadsheet file
nrows, ncols = self.getTableBoundaries(labels_row_num)
xl = pandas.read_excel(
self.fname,
sheet_name=self.sheet_name,
skiprows=labels_row_num,
usecols=range(ncols),
nrows=nrows,
)
data = list(ws.rows)[labels_row_num:]
self.data_labels = []
for c in data[0]:
if c.value is None:
break
self.data_labels.append(c.value)
rows = []
for r in data[1:]:
if r[0].value is None:
break
rows.append(r[:len(self.data_labels)])
else:
xl = pandas.read_excel(
self.fname,
sheet_name=self.sheet_name,
header=None,
)
except xlrd.XLRDError as exc:
# use the whole sheet
rows = list(ws.rows)
# create the column titles
self.data_labels = [
f"Column_{i+1}"
for i in range(len(rows[0]))
]
except openpyxl.utils.exceptions.InvalidFileException as exc:
raise ExcelReadError(exc)
self.data_labels = list(map(str, xl.columns.values))
# unused: data_start_row_num = data_start_row_num or labels_row_num+1
for row_data in xl.values:
for row in rows:
entry = OrderedDict()
for _col, label in enumerate(self.data_labels):
entry[label] = self._getExcelColumnValue(row_data, _col)
entry[label] = row[_col].value
self.handle_single_entry(entry)
self.handleExcelRowEntry(entry)

Expand All @@ -1101,29 +1108,6 @@ def _isExcel_nan(self, value):
return False
return math.isnan(value)

def getTableBoundaries(self, labels_row_num=None):
"""
identify how many rows and columns are in the Excel spreadsheet table
"""
labels_row_num = labels_row_num or self.LABELS_ROW
xl = pandas.read_excel(self.fname, sheet_name=self.sheet_name, skiprows=labels_row_num)

ncols = len(xl.columns)
for i, k in enumerate(xl.columns):
if k.startswith(f"Unnamed: {i}"):
# TODO: verify all values under this label are NaN
ncols = i
break

nrows = len(xl.values)
for j, r in enumerate(xl.values):
r = r[:ncols]
if False not in [self._isExcel_nan(value) for value in r]:
nrows = j
break

return nrows, ncols


class ExcelDatabaseFileGeneric(ExcelDatabaseFileBase):
"""
Expand Down
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ dependencies:
- qt =5
- spec2nexus >=2021.1.7
- sphinx
- xlrd
- pip:
- bluesky_live
- ipython-genutils==0.2.0
Expand Down
32 changes: 16 additions & 16 deletions tests/test_commandlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ def test_ExcelFile(self):
====== ====== =========================================
line # action parameters
====== ====== =========================================
1 row1 91, 26.0, 85.0, None, blank, 8.0
2 row2 9, 39.0, 29.0, 85.0, sample, 60.0
3 row3 54, None, 38.0, 3.0, blank, 76.0
4 row4 71, 36.0, 95.0, 83.0, foil, 12.0
5 row5 55, 75.0, 59.0, 84.0, DNA, 34.0
6 row6 18, 49.0, 31.0, 34.0, lecithin, 47.0
1 row1 91, 26, 85, None, blank, 8
2 row2 9, 39, 29, 85, sample, 60
3 row3 54, None, 38, 3, blank, 76
4 row4 71, 36, 95, 83, foil, 12
5 row5 55, 75, 59, 84, DNA, 34
6 row6 18, 49, 31, 34, lecithin, 47
7 row7 37, None, None, None, a big mix of stuff
8 row8 37, 80.0, 79.0, 45.0, salt water, 36.0
9 row9 72, 98.0, 67.0, 89.0, surprises, 49.0
8 row8 37, 80, 79, 45, salt water, 36
9 row9 72, 98, 67, 89, surprises, 49
====== ====== =========================================
""".strip()
self.compare_tables_as_str(expected, received)
Expand All @@ -59,17 +59,17 @@ def test_ExcelCommandList(self):
table = APS_utils.command_list_as_table(commands, show_raw=False)
received = str(table).strip()
expected = """
====== ============ =============================
====== ============ ===========================
line # action parameters
====== ============ =============================
====== ============ ===========================
1 mono_shutter open
2 USAXSscan 45.07, 98.3, 0.0, Water Blank
3 saxsExp 45.07, 98.3, 0.0, Water Blank
4 waxwsExp 45.07, 98.3, 0.0, Water Blank
5 USAXSscan 12, 12.0, 1.2, plastic
6 USAXSscan 12, 37.0, 0.1, Al foil
2 USAXSscan 45.07, 98.3, 0, Water Blank
3 saxsExp 45.07, 98.3, 0, Water Blank
4 waxwsExp 45.07, 98.3, 0, Water Blank
5 USAXSscan 12, 12, 1.2, plastic
6 USAXSscan 12, 37, 0.1, Al foil
7 mono_shutter close
====== ============ =============================
====== ============ ===========================
""".strip()
self.compare_tables_as_str(expected, received)

Expand Down
4 changes: 3 additions & 1 deletion tests/test_exceltable.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def test_ExcelTable_normal_read(self):
self.assertEqual(xl.db["0"]["Unnamed: 7"], 8.0)

def test_ExcelTable_ignore_extra_false(self):
xl = apstools.utils.ExcelDatabaseFileGeneric(self.xl_file, ignore_extra=False)
xl = apstools.utils.ExcelDatabaseFileGeneric(
self.xl_file, ignore_extra=False
)
self.assertEqual(len(xl.db), 16) # rows
self.assertEqual(len(xl.db["0"]), 9) # columns

Expand Down
48 changes: 25 additions & 23 deletions tests/test_plans.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ def test_run_command_file(self):
'file line 12: SAXS 0 0 0 blank',
'no handling for line 12: SAXS 0 0 0 blank',
]
self.assertEqual(str(received), str(expected))
for r, e in zip(received, expected):
self.assertEqual(str(r), str(e))

filename = os.path.join(_test_path, "actions.xlsx")
with Capture_stdout() as received:
Expand All @@ -102,34 +103,35 @@ def test_run_command_file(self):
# print(f"|{received}|")
expected = [
f'Command file: {filename}',
'====== ============ =============================',
'line # action parameters ',
'====== ============ =============================',
'1 mono_shutter open ',
'2 USAXSscan 45.07, 98.3, 0.0, Water Blank',
'3 saxsExp 45.07, 98.3, 0.0, Water Blank',
'4 waxwsExp 45.07, 98.3, 0.0, Water Blank',
'5 USAXSscan 12, 12.0, 1.2, plastic ',
'6 USAXSscan 12, 37.0, 0.1, Al foil ',
'7 mono_shutter close ',
'====== ============ =============================',
'====== ============ ===========================',
'line # action parameters ',
'====== ============ ===========================',
'1 mono_shutter open ',
'2 USAXSscan 45.07, 98.3, 0, Water Blank',
'3 saxsExp 45.07, 98.3, 0, Water Blank',
'4 waxwsExp 45.07, 98.3, 0, Water Blank',
'5 USAXSscan 12, 12, 1.2, plastic ',
'6 USAXSscan 12, 37, 0.1, Al foil ',
'7 mono_shutter close ',
'====== ============ ===========================',
'',
"file line 1: ['mono_shutter', 'open', None, None, None]",
"no handling for line 1: ['mono_shutter', 'open', None, None, None]",
"file line 2: ['USAXSscan', 45.07, 98.3, 0.0, 'Water Blank']",
"no handling for line 2: ['USAXSscan', 45.07, 98.3, 0.0, 'Water Blank']",
"file line 3: ['saxsExp', 45.07, 98.3, 0.0, 'Water Blank']",
"no handling for line 3: ['saxsExp', 45.07, 98.3, 0.0, 'Water Blank']",
"file line 4: ['waxwsExp', 45.07, 98.3, 0.0, 'Water Blank']",
"no handling for line 4: ['waxwsExp', 45.07, 98.3, 0.0, 'Water Blank']",
"file line 5: ['USAXSscan', 12, 12.0, 1.2, 'plastic']",
"no handling for line 5: ['USAXSscan', 12, 12.0, 1.2, 'plastic']",
"file line 6: ['USAXSscan', 12, 37.0, 0.1, 'Al foil']",
"no handling for line 6: ['USAXSscan', 12, 37.0, 0.1, 'Al foil']",
"file line 2: ['USAXSscan', 45.07, 98.3, 0, 'Water Blank']",
"no handling for line 2: ['USAXSscan', 45.07, 98.3, 0, 'Water Blank']",
"file line 3: ['saxsExp', 45.07, 98.3, 0, 'Water Blank']",
"no handling for line 3: ['saxsExp', 45.07, 98.3, 0, 'Water Blank']",
"file line 4: ['waxwsExp', 45.07, 98.3, 0, 'Water Blank']",
"no handling for line 4: ['waxwsExp', 45.07, 98.3, 0, 'Water Blank']",
"file line 5: ['USAXSscan', 12, 12, 1.2, 'plastic']",
"no handling for line 5: ['USAXSscan', 12, 12, 1.2, 'plastic']",
"file line 6: ['USAXSscan', 12, 37, 0.1, 'Al foil']",
"no handling for line 6: ['USAXSscan', 12, 37, 0.1, 'Al foil']",
"file line 7: ['mono_shutter', 'close', None, None, None]",
"no handling for line 7: ['mono_shutter', 'close', None, None, None]"
]
self.assertEqual(str(received), str(expected))
for r, e in zip(received, expected):
self.assertEqual(str(r), str(e))


def suite(*args, **kw):
Expand Down

0 comments on commit 669c2be

Please sign in to comment.