Skip to content
This repository has been archived by the owner on Jun 14, 2018. It is now read-only.

Commit

Permalink
Merge pull request #100 from ddddavidmartin/update_deprecated_psm_opt…
Browse files Browse the repository at this point in the history
…ion_string

Use '--psm' instead of '-psm' as the option was deprecated.
  • Loading branch information
jflesch authored Jun 11, 2018
2 parents 31fb5f1 + b95bdbc commit 2c41670
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
7 changes: 4 additions & 3 deletions src/pyocr/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import xml.dom.minidom
import logging

from .tesseract import psm_parameter
from .util import to_unicode

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -305,7 +306,7 @@ class TextBuilder(BaseBuilder):
def __init__(self, tesseract_layout=3, cuneiform_dotmatrix=False,
cuneiform_fax=False, cuneiform_singlecolumn=False):
file_ext = ["txt"]
tess_flags = ["-psm", str(tesseract_layout)]
tess_flags = [psm_parameter(), str(tesseract_layout)]
cun_args = ["-f", "text"]
# Add custom cuneiform parameters if needed
for par, arg in [(cuneiform_dotmatrix, "--dotmatrix"),
Expand Down Expand Up @@ -562,7 +563,7 @@ class WordBoxBuilder(BaseBuilder):

def __init__(self, tesseract_layout=1):
file_ext = ["html", "hocr"]
tess_flags = ["-psm", str(tesseract_layout)]
tess_flags = [psm_parameter(), str(tesseract_layout)]
tess_conf = ["hocr"]
cun_args = ["-f", "hocr"]
super(WordBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf,
Expand Down Expand Up @@ -638,7 +639,7 @@ class LineBoxBuilder(BaseBuilder):

def __init__(self, tesseract_layout=1):
file_ext = ["html", "hocr"]
tess_flags = ["-psm", str(tesseract_layout)]
tess_flags = [psm_parameter(), str(tesseract_layout)]
tess_conf = ["hocr"]
cun_args = ["-f", "hocr"]
super(LineBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf,
Expand Down
11 changes: 10 additions & 1 deletion src/pyocr/tesseract.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ def can_detect_orientation():
)


def psm_parameter():
"""Return the psm option string depending on the Tesseract version."""
version = get_version()
if version[0] <= 3:
return "-psm"

return "--psm"


def detect_orientation(image, lang=None):
"""
Arguments:
Expand All @@ -178,7 +187,7 @@ def detect_orientation(image, lang=None):
"""
_set_environment()
with temp_dir() as tmpdir:
command = [TESSERACT_CMD, "input.bmp", 'stdout', "-psm", "0"]
command = [TESSERACT_CMD, "input.bmp", 'stdout', psm_parameter(), "0"]
version = get_version()
if version[0] >= 4:
# XXX: temporary fix to remove once Tesseract 4 is stable
Expand Down

0 comments on commit 2c41670

Please sign in to comment.