diff --git a/pdfplumber/page.py b/pdfplumber/page.py index b36a3814..b2f3d42a 100644 --- a/pdfplumber/page.py +++ b/pdfplumber/page.py @@ -23,6 +23,7 @@ LTLine, LTPage, LTRect, + LTTextContainer, ) from pdfminer.pdfinterp import PDFPageInterpreter from pdfminer.pdfpage import PDFPage @@ -211,8 +212,10 @@ def process_attr(item: Tuple[str, Any]) -> Optional[Tuple[str, Any]]: attr["object_type"] = kind attr["page_number"] = self.page_number - if isinstance(obj, LTChar): + if isinstance(obj, (LTChar, LTTextContainer)): attr["text"] = obj.get_text() + + if isinstance(obj, LTChar): gs = obj.graphicstate attr["stroking_color"] = gs.scolor attr["non_stroking_color"] = gs.ncolor diff --git a/tests/test_laparams.py b/tests/test_laparams.py index 700ed451..1f11a31e 100644 --- a/tests/test_laparams.py +++ b/tests/test_laparams.py @@ -26,6 +26,8 @@ def test_with_laparams(self): page = pdf.pages[0] assert len(page.textboxhorizontals) == 27 assert len(page.textlinehorizontals) == 79 + assert "text" in page.textboxhorizontals[0] + assert "text" in page.textlinehorizontals[0] assert len(page.chars) == 4408 assert "anno" not in page.objects.keys() @@ -38,6 +40,8 @@ def test_vertical_texts(self): assert len(page.textboxhorizontals) == 74 assert len(page.textlineverticals) == 11 assert len(page.textboxverticals) == 6 + assert "text" in page.textboxverticals[0] + assert "text" in page.textlineverticals[0] def test_issue_383(self): with pdfplumber.open(self.path, laparams={}) as pdf: