From c136838b46cf49f06ac1dc5f2f9bc16232c11213 Mon Sep 17 00:00:00 2001 From: David Martin Date: Thu, 7 Jun 2018 17:03:09 +1000 Subject: [PATCH] Use '--psm' instead of '-psm' as the option was deprecated. This recently changed in the official tesseract engine [0]. '-psm' is not allowed as an option anymore and '--psm' has to be used instead. [0] https://github.com/tesseract-ocr/tesseract/commit/ee201e1f4fa277a4b2ecd751a45d3bf1eba6dfdb --- src/pyocr/builders.py | 6 +++--- src/pyocr/tesseract.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pyocr/builders.py b/src/pyocr/builders.py index bf6c43f..5959a7c 100644 --- a/src/pyocr/builders.py +++ b/src/pyocr/builders.py @@ -305,7 +305,7 @@ class TextBuilder(BaseBuilder): def __init__(self, tesseract_layout=3, cuneiform_dotmatrix=False, cuneiform_fax=False, cuneiform_singlecolumn=False): file_ext = ["txt"] - tess_flags = ["-psm", str(tesseract_layout)] + tess_flags = ["--psm", str(tesseract_layout)] cun_args = ["-f", "text"] # Add custom cuneiform parameters if needed for par, arg in [(cuneiform_dotmatrix, "--dotmatrix"), @@ -562,7 +562,7 @@ class WordBoxBuilder(BaseBuilder): def __init__(self, tesseract_layout=1): file_ext = ["html", "hocr"] - tess_flags = ["-psm", str(tesseract_layout)] + tess_flags = ["--psm", str(tesseract_layout)] tess_conf = ["hocr"] cun_args = ["-f", "hocr"] super(WordBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf, @@ -638,7 +638,7 @@ class LineBoxBuilder(BaseBuilder): def __init__(self, tesseract_layout=1): file_ext = ["html", "hocr"] - tess_flags = ["-psm", str(tesseract_layout)] + tess_flags = ["--psm", str(tesseract_layout)] tess_conf = ["hocr"] cun_args = ["-f", "hocr"] super(LineBoxBuilder, self).__init__(file_ext, tess_flags, tess_conf, diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py index 22cc48d..5b8e002 100755 --- a/src/pyocr/tesseract.py +++ b/src/pyocr/tesseract.py @@ -178,7 +178,7 @@ def detect_orientation(image, lang=None): """ _set_environment() with temp_dir() as tmpdir: - command = [TESSERACT_CMD, "input.bmp", 'stdout', "-psm", "0"] + command = [TESSERACT_CMD, "input.bmp", 'stdout', "--psm", "0"] version = get_version() if version[0] >= 4: # XXX: temporary fix to remove once Tesseract 4 is stable