Skip to content

Commit

Permalink
Add ODS support
Browse files Browse the repository at this point in the history
ODS support is based on ezodf [1] library.

It looks, that ezodf has a bug [2], where number of sheet rows and cells are
calculated incorrectly. Probably this is the reason, why I get extra rows and
cols in tests.

[1] https://github.com/T0ha/ezodf
[2] T0ha/ezodf#12

See: frictionlessdata#28
  • Loading branch information
sirex committed Oct 27, 2016
1 parent 731059f commit 748da3f
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Consistent interface for stream reading and writing tabular data (csv/xls/json/e

## Features

- supports various formats: csv/tsv/xls/xlsx/json/ndjson/native/etc
- supports various formats: csv/tsv/xls/xlsx/json/ndjson/ods/native/etc
- reads data from variables, filesystem or Internet
- streams data instead of using a lot of memory
- processes data via simple user processors
Expand Down
Binary file added data/table.ods
Binary file not shown.
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def read(*paths):
'linear-tsv>=1.0,<2.0a',
'unicodecsv>=0.14,<1.0a',
'jsonlines>=1.1,<1.2',
'ezodf>=0.3,<0.4',
'lxml', # required by ezodf
]
TESTS_REQUIRE = [
'pylama',
Expand Down
1 change: 1 addition & 0 deletions tabulator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
'tsv': 'tabulator.parsers.tsv.TSVParser',
'xls': 'tabulator.parsers.excel.ExcelParser',
'xlsx': 'tabulator.parsers.excelx.ExcelxParser',
'ods': 'tabulator.parsers.ods.ODSParser',
}

WRITERS = {
Expand Down
67 changes: 67 additions & 0 deletions tabulator/parsers/ods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals

import ezodf
from six import BytesIO

from .. import helpers
from . import api


# Module API

class ODSParser(api.Parser):
"""Parser to parse ODF Spreadsheets.
Args:
sheet (int or str): sheet number or name
First sheet's number is 1.
"""

# Public

options = [
'sheet',
]

def __init__(self, sheet=1):
self.__index = sheet - 1 if isinstance(sheet, int) else sheet
self.__loader = None
self.__bytes = None
self.__book = None
self.__sheet = None
self.__extended_rows = None

@property
def closed(self):
return self.__bytes is None or self.__bytes.closed

def open(self, source, encoding, loader):
self.close()
self.__loader = loader
self.__bytes = loader.load(source, encoding, mode='b')
self.__book = ezodf.opendoc(BytesIO(self.__bytes.read()))
self.__sheet = self.__book.sheets[self.__index]
self.reset()

def close(self):
if not self.closed:
self.__bytes.close()

def reset(self):
helpers.reset_stream(self.__bytes)
self.__extended_rows = self.__iter_extended_rows()

@property
def extended_rows(self):
return self.__extended_rows

# Private

def __iter_extended_rows(self):
for number, row in enumerate(self.__sheet.rows(), start=1):
yield number, None, [cell.value for cell in row]
51 changes: 51 additions & 0 deletions tests/parsers/test_ods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals

import io
from mock import Mock
from tabulator import Stream
from tabulator.parsers.ods import ODSParser


# Tests

def test_excelx_parser():

source = 'data/table.ods'
encoding = None
loader = Mock()
loader.load = Mock(return_value=io.open(source, 'rb'))
parser = ODSParser()

assert parser.closed
parser.open(source, encoding, loader)
assert not parser.closed

assert list(parser.extended_rows) == [
(1, None, ['id', 'name', None]),
(2, None, [1.0, 'english', None]),
(3, None, [2.0, '中国人', None]),
(4, None, [None, None, None]),
(5, None, [None, None, None]),
]

assert len(list(parser.extended_rows)) == 0
parser.reset()
assert len(list(parser.extended_rows)) == 5

parser.close()
assert parser.closed


def test_stream_ods():
with Stream('data/table.ods', headers=1) as stream:
assert stream.headers == ['id', 'name', None]
assert stream.read(keyed=True) == [
{'id': 1.0, 'name': 'english', None: None},
{'id': 2.0, 'name': '中国人', None: None},
{'id': None, 'name': None, None: None},
{'id': None, 'name': None, None: None},
]

0 comments on commit 748da3f

Please sign in to comment.