Remove pandas from dev requirements and tests

jsvine · jsvine · commit a5e7d7fa5280 · 2020-08-15T12:03:34.000-04:00
Results in needing one more # pragma: nocover statement, but on a simple line of code. See PR #253 for details and motivation.
diff --git a/pdfplumber/utils.py b/pdfplumber/utils.py
@@ -163,7 +163,7 @@ def is_dataframe(collection):
 
 def to_list(collection):
     if is_dataframe(collection):
-        return collection.to_dict("records")
+        return collection.to_dict("records")  # pragma: nocover
     else:
         return list(collection)
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,4 +1,3 @@
-pandas>=1.0.0
 pytest
 pytest-cov
 pytest-parallel
diff --git a/tests/pdfs/pdffill-demo.pdf b/tests/pdfs/pdffill-demo.pdf
diff --git a/tests/test_basics.py b/tests/test_basics.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import unittest
 import pytest
-import pandas as pd
 import pdfplumber
 import sys, os
 
diff --git a/tests/test_ca_warn_report.py b/tests/test_ca_warn_report.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 import unittest
-import pandas as pd
 import pdfplumber
 from pdfplumber import utils
 from pdfplumber import table
@@ -37,7 +36,7 @@ def test_objects(self):
         assert len(self.pdf.figures)
         assert len(self.pdf.images)
 
-    def test_pandas(self):
+    def test_parse(self):
 
         rect_x0_clusters = utils.cluster_list([ r["x0"]
             for r in self.pdf.pages[1].rects ], tolerance=3)
diff --git a/tests/test_convert.py b/tests/test_convert.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import unittest
 import pytest
-import pandas as pd
 import pdfplumber
 from subprocess import Popen, PIPE
 from io import StringIO
diff --git a/tests/test_display.py b/tests/test_display.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 import unittest
-import pandas as pd
 import pdfplumber
 import sys, os, io
 
diff --git a/tests/test_issues.py b/tests/test_issues.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 import unittest
-import pandas as pd
 import pdfplumber
 import sys, os
 import six
diff --git a/tests/test_la_precinct_bulletin.py b/tests/test_la_precinct_bulletin.py
diff --git a/tests/test_nics_report.py b/tests/test_nics_report.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 import unittest
-import pandas as pd
 import pdfplumber
 from operator import itemgetter
 from pdfplumber.utils import within_bbox, collate_chars
@@ -86,36 +85,6 @@ def parse_row(row):
         month_text = collate_chars(month_chars)
         assert(month_text == "November - 2015")
 
-    def test_pandas(self):
-        page = self.pdf.pages[0]
-        cropped = page.crop((0, 80, self.PDF_WIDTH, 485))
-        table = cropped.extract_table({
-            "horizontal_strategy": "text",
-            "explicit_vertical_lines": [
-                min(map(itemgetter("x0"), cropped.chars))
-            ],
-            "intersection_tolerance": 5
-        })
-
-        table = pd.DataFrame(table)
-
-        def parse_value(x):
-            if pd.isnull(x) or x == "": return None
-            return int(x.replace(",", ""))
-
-        table.columns = COLUMNS
-        table[table.columns[1:]] = table[table.columns[1:]].applymap(parse_value)
-
-        # [1:] because first column is state name
-        for c in COLUMNS[1:]:
-            total = table[c].iloc[-1]
-            colsum = table[c].sum()
-            assert(colsum == (total * 2))
-
-        month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65))
-        month_text = collate_chars(month_chars)
-        assert(month_text == "November - 2015")
-
     def test_filter(self):
         page = self.pdf.pages[0]
         def test(obj):
diff --git a/tests/test_table.py b/tests/test_table.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import unittest
 import pytest
-import pandas as pd
 import pdfplumber
 from pdfplumber import table
 import sys, os
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import unittest
 import pytest
-import pandas as pd
 import pdfplumber
 from pdfplumber import utils
 from pdfminer.pdfparser import PDFObjRef
@@ -98,6 +97,55 @@ def test_extract_text(self):
         assert text == goal
         assert self.pdf.pages[0].crop((0, 0, 1, 1)).extract_text() == None
 
+    def test_intersects_bbox(self):
+        objs = [
+            # Is same as bbox
+            { 
+                "x0": 0,
+                "top": 0,
+                "x1": 20,
+                "bottom": 20,
+            },
+            # Inside bbox
+            {
+                "x0": 10,
+                "top": 10,
+                "x1": 15,
+                "bottom": 15,
+            },
+            # Overlaps bbox
+            {
+                "x0": 10,
+                "top": 10,
+                "x1": 30,
+                "bottom": 30,
+            },
+            # Touching on one side
+            {
+                "x0": 20,
+                "top": 0,
+                "x1": 40,
+                "bottom": 20,
+            },
+            # Touching on one corner
+            {
+                "x0": 20,
+                "top": 20,
+                "x1": 40,
+                "bottom": 40,
+            },
+            # Fully outside
+            {
+                "x0": 21,
+                "top": 21,
+                "x1": 40,
+                "bottom": 40,
+            },
+        ]
+        bbox = utils.obj_to_bbox(objs[0])
+
+        assert utils.intersects_bbox(objs, bbox) == objs[:4]
+
     def test_resize_object(self):
         obj = {
             "x0": 5,

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-pandas>=1.0.0`
`2`	`1`	`pytest`
`3`	`2`	`pytest-cov`
`4`	`3`	`pytest-parallel`