Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make jpype optional #369

Merged
merged 1 commit into from
Nov 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ Ensure you have a Java runtime and set the PATH for it.
pip install tabula-py
```

If you want to leverage faster execution with jpype, install with `jpype` extra.

```sh
pip install tabula-py[jpype]
```

### Example

tabula-py enables you to extract tables from a PDF into a DataFrame, or a JSON. It can also extract tables from a PDF and save the file as a CSV, a TSV, or a JSON.  
Expand Down
6 changes: 6 additions & 0 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ You can install tabula-py from PyPI with ``pip`` command.
pip install tabula-py


If you want to leverage faster execution with jpype, install with `jpype` extra.

.. code-block:: bash

pip install tabula-py[jpype]

.. Note::
conda recipe on conda-forge is not maintained by us.
We recommend installing via ``pip`` to use the latest version of tabula-py.
Expand Down
24 changes: 23 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,30 @@ def lint(session):


@nox.session
def tests(session):
@nox.parametrize(
"python,jpype",
[
("3.8", True),
("3.9", True),
("3.10", True),
("3.11", True),
# ("3.12", False),
],
)
def tests(session, jpype):
if jpype:
tests_with_jpype(session)
else:
tests_without_jpype(session)


def tests_without_jpype(session):
session.install(".[test]")
session.run("pytest", "-v", "tests/test_read_pdf_table.py")


def tests_with_jpype(session):
session.install(".[jpype,test]")
session.run("pytest", "-v", "tests/test_read_pdf_table.py")
session.run("pytest", "-v", "tests/test_read_pdf_jar_path.py")
session.run("pytest", "-v", "tests/test_read_pdf_silent.py")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ dependencies = [
"pandas >= 0.25.3",
"numpy",
"distro",
"jpype1",
]
dynamic = ["version"]

[project.optional-dependencies]
jpype = ["jpype1"]
dev = [
"pytest",
"flake8",
Expand Down
36 changes: 18 additions & 18 deletions tabula/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
from logging import getLogger
from typing import List, Optional

import jpype
import jpype.imports

from .errors import JavaNotFoundError
from .util import TabulaOption

Expand All @@ -27,35 +24,38 @@ def jar_path() -> str:

class TabulaVm:
def __init__(self, java_options: List[str], silent: Optional[bool]) -> None:
if not jpype.isJVMStarted():
jpype.addClassPath(jar_path())

# Workaround to enforce the silent option. See:
# https://github.com/tabulapdf/tabula-java/issues/231#issuecomment-397281157
if silent:
java_options.extend(
(
"-Dorg.slf4j.simpleLogger.defaultLogLevel=off",
"-Dorg.apache.commons.logging.Log"
"=org.apache.commons.logging.impl.NoOpLog",
try:
import jpype
import jpype.imports

if not jpype.isJVMStarted():
jpype.addClassPath(jar_path())

# Workaround to enforce the silent option. See:
# https://github.com/tabulapdf/tabula-java/issues/231#issuecomment-397281157
if silent:
java_options.extend(
(
"-Dorg.slf4j.simpleLogger.defaultLogLevel=off",
"-Dorg.apache.commons.logging.Log"
"=org.apache.commons.logging.impl.NoOpLog",
)
)
)

jpype.startJVM(*java_options, convertStrings=False)
jpype.startJVM(*java_options, convertStrings=False)

try:
import java.lang as lang
import technology.tabula as tabula
from org.apache.commons.cli import DefaultParser

self.tabula = tabula
self.parser = DefaultParser()
self.lang = lang

except (ModuleNotFoundError, ImportError) as e:
logger.warning(
"Error importing jpype dependencies. Fallback to subprocess."
)
logger.warning(jpype.java.lang.System.getProperty("java.class.path"))
logger.warning(e)
self.tabula = None
self.parse = None
Expand Down
3 changes: 2 additions & 1 deletion tests/test_read_pdf_jar_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from subprocess import CalledProcessError
from unittest.mock import patch

import jpype
import pytest

import tabula
Expand All @@ -19,5 +20,5 @@ def test_read_pdf_with_jar_path(self, jar_func):
# Fallback to subprocess
with pytest.raises(CalledProcessError):
tabula.read_pdf(self.pdf_path, encoding="utf-8")
file_name = Path(tabula.backend.jpype.getClassPath()).name
file_name = Path(jpype.getClassPath()).name
self.assertEqual(file_name, "tabula-java.jar")
7 changes: 2 additions & 5 deletions tests/test_read_pdf_silent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,16 @@
import unittest
from unittest.mock import patch

import pytest

import tabula


class TestReadPdfJarPath(unittest.TestCase):
def setUp(self):
self.pdf_path = "tests/resources/data.pdf"

@patch("tabula.backend.jpype.startJVM")
@patch("jpype.startJVM")
def test_read_pdf_with_silent_true(self, jvm_func):
with pytest.raises(RuntimeError):
tabula.read_pdf(self.pdf_path, encoding="utf-8", silent=True)
tabula.read_pdf(self.pdf_path, encoding="utf-8", silent=True)

target_args = []
if platform.system() == "Darwin":
Expand Down