diff --git a/elm/pdf.py b/elm/pdf.py index 55fd03ff..ac1fcfba 100644 --- a/elm/pdf.py +++ b/elm/pdf.py @@ -254,12 +254,22 @@ def clean_poppler(self, layout=True): if not os.path.exists(os.path.dirname(fp_out)): os.makedirs(os.path.dirname(fp_out), exist_ok=True) - stdout = subprocess.run(args, check=True, stdout=subprocess.PIPE) - if stdout.returncode != 0: - msg = ('Poppler raised return code {}: {}' - .format(stdout.returncode, stdout)) + try: + stdout = subprocess.run(args, check=True, + stdout=subprocess.PIPE) + except Exception as e: + msg = ('PDF cleaning with poppler failed! This usually ' + 'because you have not installed the poppler utility ' + '(see https://poppler.freedesktop.org/). ' + f'Full error: {e}') logger.exception(msg) - raise RuntimeError(msg) + raise RuntimeError(msg) from e + else: + if stdout.returncode != 0: + msg = ('Poppler raised return code {}: {}' + .format(stdout.returncode, stdout)) + logger.exception(msg) + raise RuntimeError(msg) with open(fp_out, 'r') as f: clean_txt = f.read() diff --git a/examples/energy_wizard/README.rst b/examples/energy_wizard/README.rst index 05dc4ae3..4c0e8a1e 100644 --- a/examples/energy_wizard/README.rst +++ b/examples/energy_wizard/README.rst @@ -8,6 +8,8 @@ corpus. Notes: +- In this example, we use the optional `popper `_ PDF utility which you will have to install separately. You can also use the python-native ``PyPDF2`` package when calling using ``elm.pdf.PDFtoTXT`` but we have found that poppler works better. + - Streamlit is required to run this app, which is not an explicit requirement of this repo (``pip install streamlit``) - You need to set up your own OpenAI or Azure-OpenAI API keys to run the scripts.