From 2109f34b4f3615004de1f2b2e635cfd61dae3cb7 Mon Sep 17 00:00:00 2001 From: Chad Phillips Date: Wed, 15 May 2024 10:22:33 -0400 Subject: [PATCH] support python3.12 from https://github.com/deanmalmgren/textract/pull/502 --- textract/parsers/pdf_parser.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/textract/parsers/pdf_parser.py b/textract/parsers/pdf_parser.py index 9fe74e13..10d488af 100644 --- a/textract/parsers/pdf_parser.py +++ b/textract/parsers/pdf_parser.py @@ -8,7 +8,10 @@ from .utils import ShellParser from .image import Parser as TesseractParser -from distutils.spawn import find_executable +try: + from shutil import which +except ImportError: + from distutils.spawn import find_executable as which class Parser(ShellParser): """Extract text from pdf files using either the ``pdftotext`` method @@ -49,7 +52,7 @@ def extract_pdfminer(self, filename, **kwargs): #Nested try/except loops? Not great #Try the normal pdf2txt, if that fails try the python3 # pdf2txt, if that fails try the python2 pdf2txt - pdf2txt_path = find_executable('pdf2txt.py') + pdf2txt_path = which("pdf2txt.py") try: stdout, _ = self.run(['pdf2txt.py', filename]) except OSError: