From 41589caf0265d829903d521a3afa2ba6f4c5e74b Mon Sep 17 00:00:00 2001 From: grantbuster <grant.buster@nrel.gov> Date: Mon, 20 Nov 2023 15:33:10 -0700 Subject: [PATCH] added README note on poppler install and better error handling for poppler not found --- elm/pdf.py | 17 +++++++++++++---- examples/energy_wizard/README.rst | 2 ++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/elm/pdf.py b/elm/pdf.py index 55fd03ff..f47be87d 100644 --- a/elm/pdf.py +++ b/elm/pdf.py @@ -254,10 +254,19 @@ def clean_poppler(self, layout=True): if not os.path.exists(os.path.dirname(fp_out)): os.makedirs(os.path.dirname(fp_out), exist_ok=True) - stdout = subprocess.run(args, check=True, stdout=subprocess.PIPE) - if stdout.returncode != 0: - msg = ('Poppler raised return code {}: {}' - .format(stdout.returncode, stdout)) + try: + stdout = subprocess.run(args, check=True, + stdout=subprocess.PIPE) + if stdout.returncode != 0: + msg = ('Poppler raised return code {}: {}' + .format(stdout.returncode, stdout)) + logger.exception(msg) + raise RuntimeError(msg) + except Exception as e: + msg = ('PDF cleaning with poppler failed! This usually ' + 'because you have not installed the poppler utility ' + '(see https://poppler.freedesktop.org/). ' + f'Full error: {e}') logger.exception(msg) raise RuntimeError(msg) diff --git a/examples/energy_wizard/README.rst b/examples/energy_wizard/README.rst index 05dc4ae3..4c0e8a1e 100644 --- a/examples/energy_wizard/README.rst +++ b/examples/energy_wizard/README.rst @@ -8,6 +8,8 @@ corpus. Notes: +- In this example, we use the optional `popper <https://poppler.freedesktop.org/>`_ PDF utility which you will have to install separately. You can also use the python-native ``PyPDF2`` package when calling using ``elm.pdf.PDFtoTXT`` but we have found that poppler works better. + - Streamlit is required to run this app, which is not an explicit requirement of this repo (``pip install streamlit``) - You need to set up your own OpenAI or Azure-OpenAI API keys to run the scripts.