From 41589caf0265d829903d521a3afa2ba6f4c5e74b Mon Sep 17 00:00:00 2001
From: grantbuster <grant.buster@nrel.gov>
Date: Mon, 20 Nov 2023 15:33:10 -0700
Subject: [PATCH] added README note on poppler install and better error
 handling for poppler not found

---
 elm/pdf.py                        | 17 +++++++++++++----
 examples/energy_wizard/README.rst |  2 ++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/elm/pdf.py b/elm/pdf.py
index 55fd03ff..f47be87d 100644
--- a/elm/pdf.py
+++ b/elm/pdf.py
@@ -254,10 +254,19 @@ def clean_poppler(self, layout=True):
             if not os.path.exists(os.path.dirname(fp_out)):
                 os.makedirs(os.path.dirname(fp_out), exist_ok=True)
 
-            stdout = subprocess.run(args, check=True, stdout=subprocess.PIPE)
-            if stdout.returncode != 0:
-                msg = ('Poppler raised return code {}: {}'
-                       .format(stdout.returncode, stdout))
+            try:
+                stdout = subprocess.run(args, check=True,
+                                        stdout=subprocess.PIPE)
+                if stdout.returncode != 0:
+                    msg = ('Poppler raised return code {}: {}'
+                           .format(stdout.returncode, stdout))
+                    logger.exception(msg)
+                    raise RuntimeError(msg)
+            except Exception as e:
+                msg = ('PDF cleaning with poppler failed! This usually '
+                       'because you have not installed the poppler utility '
+                       '(see https://poppler.freedesktop.org/). '
+                       f'Full error: {e}')
                 logger.exception(msg)
                 raise RuntimeError(msg)
 
diff --git a/examples/energy_wizard/README.rst b/examples/energy_wizard/README.rst
index 05dc4ae3..4c0e8a1e 100644
--- a/examples/energy_wizard/README.rst
+++ b/examples/energy_wizard/README.rst
@@ -8,6 +8,8 @@ corpus.
 
 Notes:
 
+- In this example, we use the optional `popper <https://poppler.freedesktop.org/>`_ PDF utility which you will have to install separately. You can also use the python-native ``PyPDF2`` package when calling using ``elm.pdf.PDFtoTXT`` but we have found that poppler works better.
+
 - Streamlit is required to run this app, which is not an explicit requirement of this repo (``pip install streamlit``)
 
 - You need to set up your own OpenAI or Azure-OpenAI API keys to run the scripts.