|
1 | 1 | #!/usr/bin/env python |
2 | 2 | import unittest |
3 | | -import pandas as pd |
4 | 3 | import pdfplumber |
5 | 4 | from operator import itemgetter |
6 | 5 | from pdfplumber.utils import within_bbox, collate_chars |
@@ -86,36 +85,6 @@ def parse_row(row): |
86 | 85 | month_text = collate_chars(month_chars) |
87 | 86 | assert(month_text == "November - 2015") |
88 | 87 |
|
89 | | - def test_pandas(self): |
90 | | - page = self.pdf.pages[0] |
91 | | - cropped = page.crop((0, 80, self.PDF_WIDTH, 485)) |
92 | | - table = cropped.extract_table({ |
93 | | - "horizontal_strategy": "text", |
94 | | - "explicit_vertical_lines": [ |
95 | | - min(map(itemgetter("x0"), cropped.chars)) |
96 | | - ], |
97 | | - "intersection_tolerance": 5 |
98 | | - }) |
99 | | - |
100 | | - table = pd.DataFrame(table) |
101 | | - |
102 | | - def parse_value(x): |
103 | | - if pd.isnull(x) or x == "": return None |
104 | | - return int(x.replace(",", "")) |
105 | | - |
106 | | - table.columns = COLUMNS |
107 | | - table[table.columns[1:]] = table[table.columns[1:]].applymap(parse_value) |
108 | | - |
109 | | - # [1:] because first column is state name |
110 | | - for c in COLUMNS[1:]: |
111 | | - total = table[c].iloc[-1] |
112 | | - colsum = table[c].sum() |
113 | | - assert(colsum == (total * 2)) |
114 | | - |
115 | | - month_chars = within_bbox(page.chars, (0, 35, self.PDF_WIDTH, 65)) |
116 | | - month_text = collate_chars(month_chars) |
117 | | - assert(month_text == "November - 2015") |
118 | | - |
119 | 88 | def test_filter(self): |
120 | 89 | page = self.pdf.pages[0] |
121 | 90 | def test(obj): |
|
0 commit comments