From 3772af60c72db6504695e4992aec7a19ed70ce62 Mon Sep 17 00:00:00 2001
From: Jeremy Singer-Vine <jsvine@gmail.com>
Date: Tue, 4 Jul 2023 09:41:19 -0400
Subject: [PATCH] Add Page.find_table(...) (#873)

Previously, `pdfplumber.Page` had these table-getting methods:

- `.find_tables(...)`
- `.extract_tables(...)`
- `.extract_table(...)`

For consistency/completeness's sake, this commit adds:

- `.find_table(...)`

... which, analogous to `.extract_table(...)`, returns the largest table
on the page.

Indeed, `.extract_table(...)` now uses `.find_table(...)` beneath the
hood.

Thanks to @pdille for the suggestion, here:
https://github.com/jsvine/pdfplumber/discussions/864#discussioncomment-5668209
---
 README.md          |  3 ++-
 pdfplumber/page.py | 30 ++++++++++++++++++++----------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index ee34ff3b..d7684fba 100644
--- a/README.md
+++ b/README.md
@@ -346,8 +346,9 @@ If you're using `pdfplumber` on a Debian-based system and encounter a `PolicyErr
 | Method | Description |
 |--------|-------------|
 |`.find_tables(table_settings={})`|Returns a list of `Table` objects. The `Table` object provides access to the `.cells`, `.rows`, and `.bbox` properties, as well as the `.extract(x_tolerance=3, y_tolerance=3)` method.|
+|`.find_table(table_settings={})`|Similar to `.find_tables(...)`, but returns the *largest* table on the page, as a `Table` object. If multiple tables have the same size — as measured by the number of cells — this method returns the table closest to the top of the page.|
 |`.extract_tables(table_settings={})`|Returns the text extracted from *all* tables found on the page, represented as a list of lists of lists, with the structure `table -> row -> cell`.|
-|`.extract_table(table_settings={})`|Returns the text extracted from the *largest* table on the page, represented as a list of lists, with the structure `row -> cell`. (If multiple tables have the same size — as measured by the number of cells — this method returns the table closest to the top of the page.)|
+|`.extract_table(table_settings={})`|Returns the text extracted from the *largest* table on the page (see `.find_table(...)` above), represented as a list of lists, with the structure `row -> cell`.|
 |`.debug_tablefinder(table_settings={})`|Returns an instance of the `TableFinder` class, with access to the `.edges`, `.intersections`, `.cells`, and `.tables` properties.|
 
 For example:
diff --git a/pdfplumber/page.py b/pdfplumber/page.py
index e6a13ff8..b317857a 100644
--- a/pdfplumber/page.py
+++ b/pdfplumber/page.py
@@ -307,16 +307,9 @@ def find_tables(
         tset = TableSettings.resolve(table_settings)
         return TableFinder(self, tset).tables
 
-    def extract_tables(
-        self, table_settings: Optional[T_table_settings] = None
-    ) -> List[List[List[Optional[str]]]]:
-        tset = TableSettings.resolve(table_settings)
-        tables = self.find_tables(tset)
-        return [table.extract(**(tset.text_settings or {})) for table in tables]
-
-    def extract_table(
+    def find_table(
         self, table_settings: Optional[T_table_settings] = None
-    ) -> Optional[List[List[Optional[str]]]]:
+    ) -> Optional[Table]:
         tset = TableSettings.resolve(table_settings)
         tables = self.find_tables(tset)
 
@@ -329,7 +322,24 @@ def sorter(x: Table) -> Tuple[int, T_num, T_num]:
 
         largest = list(sorted(tables, key=sorter))[0]
 
-        return largest.extract(**(tset.text_settings or {}))
+        return largest
+
+    def extract_tables(
+        self, table_settings: Optional[T_table_settings] = None
+    ) -> List[List[List[Optional[str]]]]:
+        tset = TableSettings.resolve(table_settings)
+        tables = self.find_tables(tset)
+        return [table.extract(**(tset.text_settings or {})) for table in tables]
+
+    def extract_table(
+        self, table_settings: Optional[T_table_settings] = None
+    ) -> Optional[List[List[Optional[str]]]]:
+        tset = TableSettings.resolve(table_settings)
+        table = self.find_table(tset)
+        if table is None:
+            return None
+        else:
+            return table.extract(**(tset.text_settings or {}))
 
     def _get_textmap(self, **kwargs: Any) -> TextMap:
         defaults = dict(x_shift=self.bbox[0], y_shift=self.bbox[1])