diff --git a/hotpdf/hotpdf.py b/hotpdf/hotpdf.py index 51e8358..de85ae7 100644 --- a/hotpdf/hotpdf.py +++ b/hotpdf/hotpdf.py @@ -298,6 +298,5 @@ def extract_spans_text( extracted_text: list[str] = [] for span in spans: - # TODO: This will be span.to_text() in the next release extracted_text.append(span.to_text()) return "".join(extracted_text) diff --git a/hotpdf/memory_map.py b/hotpdf/memory_map.py index 5815df4..5165ee7 100644 --- a/hotpdf/memory_map.py +++ b/hotpdf/memory_map.py @@ -1,7 +1,6 @@ import math import xml.etree.ElementTree as ET from collections.abc import Generator -from functools import lru_cache from hashlib import md5 from .data.classes import HotCharacter, PageResult @@ -103,7 +102,6 @@ def load_memory_map(self, page: ET.Element, drop_duplicate_spans: bool = True) - self.width = self.memory_map.columns self.height = self.memory_map.rows - @lru_cache # noqa: B019 def extract_text_from_bbox(self, x0: int, x1: int, y0: int, y1: int) -> str: """Extract text within a specified bounding box. @@ -128,7 +126,6 @@ def extract_text_from_bbox(self, x0: int, x1: int, y0: int, y1: int) -> str: return extracted_text - @lru_cache # noqa: B019 def find_text(self, query: str) -> tuple[list[str], PageResult]: """Find text within the memory map. diff --git a/hotpdf/span_map.py b/hotpdf/span_map.py index 1c5a271..19ed397 100644 --- a/hotpdf/span_map.py +++ b/hotpdf/span_map.py @@ -24,7 +24,7 @@ def __setitem__(self, span_id: Union[str, None], hot_character: HotCharacter) -> self.insert(span_id, hot_character) def items(self) -> Iterable[tuple[str, Span]]: - return self.span_map.items() + yield from self.span_map.items() def insert(self, span_id: Union[str, None], hot_character: HotCharacter) -> None: if not span_id: