diff --git a/pdfplumber/table.py b/pdfplumber/table.py index a6e3fcb5..a9ef3de5 100644 --- a/pdfplumber/table.py +++ b/pdfplumber/table.py @@ -450,6 +450,11 @@ def __init__(self, page, settings={}): ) self.cells = intersections_to_cells(self.intersections) self.tables = [Table(self.page, t) for t in cells_to_tables(self.cells)] + # If multiple tables are found, sort them by their vertical placement. + # Top to bottom, left to right. + self.tables = sorted( + self.tables, key=lambda table: (table.bbox[1], table.bbox[0]) + ) def get_edges(self): settings = self.settings diff --git a/tests/pdfs/issue-336-example.pdf b/tests/pdfs/issue-336-example.pdf new file mode 100644 index 00000000..244463b6 Binary files /dev/null and b/tests/pdfs/issue-336-example.pdf differ diff --git a/tests/test_table.py b/tests/test_table.py index 6a1850a4..6206a5c9 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -84,3 +84,15 @@ def test_explicit_desc_decimalization(self): def test_text_without_words(self): assert table.words_to_edges_h([]) == [] assert table.words_to_edges_v([]) == [] + + def test_order(self): + """ + See issue #336 + """ + path = os.path.join(HERE, "pdfs/issue-336-example.pdf") + with pdfplumber.open(path) as pdf: + tables = pdf.pages[0].extract_tables() + assert len(tables) == 3 + assert len(tables[0]) == 8 + assert len(tables[1]) == 11 + assert len(tables[2]) == 2