atlanhq · vinayak-mehta · Sep 28, 2018 · Sep 28, 2018 · Sep 28, 2018 · Sep 28, 2018
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
 build/
 dist/
 *.egg-info/
+.eggs/
 .coverage
 coverage.xml
 

diff --git a/.travis.yml b/.travis.yml
@@ -1,8 +1,9 @@
 language: python
 python:
   - "2.7"
+  - "3.6"
 before_install:
-  - sudo apt-get install python-tk ghostscript
+  - sudo apt-get install python-tk python3-tk ghostscript
 install:
   - pip install ".[dev]"
 script:

diff --git a/README.md b/README.md
@@ -43,14 +43,7 @@
 
 There's a [command-line interface](https://camelot-py.readthedocs.io/en/latest/user/cli.html) too!
 
----
-
-**Note:** Camelot only works with:
-
-- Python 2, with Python 3 support [on the way](https://github.com/socialcopsdev/camelot/issues/81).
-- Text-based PDFs and not scanned documents. If you can click-and-drag to select text in your table in a PDF viewer, then your PDF is text-based. Support for image-based PDFs using OCR is [planned](https://github.com/socialcopsdev/camelot/issues/101).
-
----
+**Note:** Camelot only works with text-based PDFs and not scanned documents. If you can click-and-drag to select text in your table in a PDF viewer, then your PDF is text-based.
 
 ## Why Camelot?
 
@@ -84,7 +77,7 @@ $ cd camelot
 $ pip install .
 </pre>
 
-Note: Use a [virtualenv](https://virtualenv.pypa.io/en/stable/) if you don't want to affect your global Python installation.
+**Note:** Use a [virtualenv](https://virtualenv.pypa.io/en/stable/) if you don't want to affect your global Python installation.
 
 ## Documentation
 

diff --git a/camelot/__version__.py b/camelot/__version__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-VERSION = (0, 1, 2)
+VERSION = (0, 2, 0)
 
 __title__ = 'camelot-py'
 __description__ = 'PDF Table Extraction for Humans.'

diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py
@@ -13,7 +13,7 @@
 from ..core import Table
 from ..utils import (scale_image, scale_pdf, segments_in_bbox, text_in_bbox,
                      merge_close_lines, get_table_index, compute_accuracy,
-                     compute_whitespace, setup_logging, encode_)
+                     compute_whitespace, setup_logging)
 from ..image_processing import (adaptive_threshold, find_lines,
                                 find_table_contours, find_table_joints)
 
@@ -177,7 +177,7 @@ def _generate_image(self):
         gs_call = [
             "-q", "-sDEVICE=png16m", "-o", self.imagename, "-r600", self.filename
         ]
-        if "ghostscript" in subprocess.check_output(["gs", "-version"]).lower():
+        if "ghostscript" in subprocess.check_output(["gs", "-version"]).decode('utf-8').lower():
             gs_call.insert(0, "gs")
         else:
             gs_call.insert(0, "gsc")
@@ -284,7 +284,6 @@ def _generate_table(self, table_idx, cols, rows, **kwargs):
             table = Lattice._copy_spanning_text(table, copy_text=self.copy_text)
 
         data = table.data
-        data = encode_(data)
         table.df = pd.DataFrame(data)
         table.shape = table.df.shape
 

diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py
@@ -10,7 +10,7 @@
 from .base import BaseParser
 from ..core import Table
 from ..utils import (text_in_bbox, get_table_index, compute_accuracy,
-                     compute_whitespace, setup_logging, encode_)
+                     compute_whitespace, setup_logging)
 
 
 logger = setup_logging(__name__)
@@ -323,7 +323,6 @@ def _generate_table(self, table_idx, cols, rows, **kwargs):
         accuracy = compute_accuracy([[100, pos_errors]])
 
         data = table.data
-        data = encode_(data)
         table.df = pd.DataFrame(data)
         table.shape = table.df.shape
 

diff --git a/camelot/utils.py b/camelot/utils.py
@@ -560,7 +560,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False):
                     lt_col_overlap.append(abs(left - right) / abs(c[0] - c[1]))
                 else:
                     lt_col_overlap.append(-1)
-            if len(filter(lambda x: x != -1, lt_col_overlap)) == 0:
+            if len(list(filter(lambda x: x != -1, lt_col_overlap))) == 0:
                 text = t.get_text().strip('\n')
                 text_range = (t.x0, t.x1)
                 col_range = (table.cols[0][0], table.cols[-1][1])
@@ -669,22 +669,6 @@ def remove_empty(d):
     return d
 
 
-def encode_(ar):
-    """Encodes two-dimensional list into unicode.
-
-    Parameters
-    ----------
-    ar : list
-
-    Returns
-    -------
-    ar : list
-
-    """
-    ar = [[r.encode('utf-8') for r in row] for row in ar]
-    return ar
-
-
 def get_page_layout(filename, char_margin=1.0, line_margin=0.5, word_margin=0.1,
                detect_vertical=True, all_texts=True):
     """Returns a PDFMiner LTPage object and page dimension of a single
@@ -709,7 +693,7 @@ def get_page_layout(filename, char_margin=1.0, line_margin=0.5, word_margin=0.1,
         Dimension of pdf page in the form (width, height).
 
     """
-    with open(filename, 'r') as f:
+    with open(filename, 'rb') as f:
         parser = PDFParser(f)
         document = PDFDocument(parser)
         if not document.is_extractable:

diff --git a/docs/index.rst b/docs/index.rst
@@ -55,13 +55,7 @@ Release v\ |version|. (:ref:`Installation <install>`)
 
 There's a :ref:`command-line interface <cli>` too!
 
-.. note:: Camelot only works with:
-
-          - Python 2, with **Python 3** support `on the way`_.
-          - Text-based PDFs and not scanned documents. If you can click-and-drag to select text in your table in a PDF viewer, then your PDF is text-based. Support for image-based PDFs using **OCR** is `planned`_.
-
-.. _on the way: https://github.com/socialcopsdev/camelot/issues/81
-.. _planned: https://github.com/socialcopsdev/camelot/issues/101
+.. note:: Camelot only works with text-based PDFs and not scanned documents. If you can click-and-drag to select text in your table in a PDF viewer, then your PDF is text-based.
 
 Why Camelot?
 ------------

diff --git a/docs/user/install.rst b/docs/user/install.rst
@@ -14,6 +14,8 @@ For Ubuntu::
 
     $ apt install python-tk ghostscript
 
+.. note:: For Python 3, install python3-tk.
+
 For macOS::
 
     $ brew install tcl-tk ghostscript

diff --git a/requirements.txt b/requirements.txt
@@ -3,5 +3,5 @@ matplotlib==2.2.3
 numpy==1.13.3
 opencv-python==3.4.2.17
 pandas==0.23.4
-pdfminer==20140328
+pdfminer.six==20170720
 PyPDF2==1.26.0
diff --git a/setup.py b/setup.py
@@ -48,7 +48,8 @@ def setup_package():
                         # Trove classifiers
                         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
                         'License :: OSI Approved :: MIT License',
-                        'Programming Language :: Python :: 2.7'
+                        'Programming Language :: Python :: 2.7',
+                        'Programming Language :: Python :: 3.6'
                     ])
 
     try: