forked from frictionlessdata/tabulator-py
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ODS support is based on ezodf [1] library. It looks, that ezodf has a bug [2], where number of sheet rows and cells are calculated incorrectly. Probably this is the reason, why I get extra rows and cols in tests. [1] https://github.com/T0ha/ezodf [2] T0ha/ezodf#12 See: frictionlessdata#28
- Loading branch information
Showing
6 changed files
with
122 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import division | ||
from __future__ import print_function | ||
from __future__ import absolute_import | ||
from __future__ import unicode_literals | ||
|
||
import ezodf | ||
from six import BytesIO | ||
|
||
from .. import helpers | ||
from . import api | ||
|
||
|
||
# Module API | ||
|
||
class ODSParser(api.Parser): | ||
"""Parser to parse ODF Spreadsheets. | ||
Args: | ||
sheet (int or str): sheet number or name | ||
First sheet's number is 1. | ||
""" | ||
|
||
# Public | ||
|
||
options = [ | ||
'sheet', | ||
] | ||
|
||
def __init__(self, sheet=1): | ||
self.__index = sheet - 1 if isinstance(sheet, int) else sheet | ||
self.__loader = None | ||
self.__bytes = None | ||
self.__book = None | ||
self.__sheet = None | ||
self.__extended_rows = None | ||
|
||
@property | ||
def closed(self): | ||
return self.__bytes is None or self.__bytes.closed | ||
|
||
def open(self, source, encoding, loader): | ||
self.close() | ||
self.__loader = loader | ||
self.__bytes = loader.load(source, encoding, mode='b') | ||
self.__book = ezodf.opendoc(BytesIO(self.__bytes.read())) | ||
self.__sheet = self.__book.sheets[self.__index] | ||
self.reset() | ||
|
||
def close(self): | ||
if not self.closed: | ||
self.__bytes.close() | ||
|
||
def reset(self): | ||
helpers.reset_stream(self.__bytes) | ||
self.__extended_rows = self.__iter_extended_rows() | ||
|
||
@property | ||
def extended_rows(self): | ||
return self.__extended_rows | ||
|
||
# Private | ||
|
||
def __iter_extended_rows(self): | ||
for number, row in enumerate(self.__sheet.rows(), start=1): | ||
yield number, None, [cell.value for cell in row] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import division | ||
from __future__ import print_function | ||
from __future__ import absolute_import | ||
from __future__ import unicode_literals | ||
|
||
import io | ||
from mock import Mock | ||
from tabulator import Stream | ||
from tabulator.parsers.ods import ODSParser | ||
|
||
|
||
# Tests | ||
|
||
def test_excelx_parser(): | ||
|
||
source = 'data/table.ods' | ||
encoding = None | ||
loader = Mock() | ||
loader.load = Mock(return_value=io.open(source, 'rb')) | ||
parser = ODSParser() | ||
|
||
assert parser.closed | ||
parser.open(source, encoding, loader) | ||
assert not parser.closed | ||
|
||
assert list(parser.extended_rows) == [ | ||
(1, None, ['id', 'name', None]), | ||
(2, None, [1.0, 'english', None]), | ||
(3, None, [2.0, '中国人', None]), | ||
(4, None, [None, None, None]), | ||
(5, None, [None, None, None]), | ||
] | ||
|
||
assert len(list(parser.extended_rows)) == 0 | ||
parser.reset() | ||
assert len(list(parser.extended_rows)) == 5 | ||
|
||
parser.close() | ||
assert parser.closed | ||
|
||
|
||
def test_stream_ods(): | ||
with Stream('data/table.ods', headers=1) as stream: | ||
assert stream.headers == ['id', 'name', None] | ||
assert stream.read(keyed=True) == [ | ||
{'id': 1.0, 'name': 'english', None: None}, | ||
{'id': 2.0, 'name': '中国人', None: None}, | ||
{'id': None, 'name': None, None: None}, | ||
{'id': None, 'name': None, None: None}, | ||
] |