Skip to content

Commit

Permalink
two headers on one page no longer intertied
Browse files Browse the repository at this point in the history
  • Loading branch information
GarrettArm committed Feb 3, 2016
1 parent c7be81c commit 0f28bb2
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pdfScraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ def get_text_after_header(self, inventory_item, following_inventory_item=None):
elem_of_header = self.element_tree.xpath('//page[@number="{}"]/text/b[text()[contains(translate(., "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "{}")]]'.format(beginning_page, header.lower().strip()))
text_after_header = []
for i in self.get_first_page_siblings_and_children(elem_of_header):
if i.lower().strip() == following_header.lower().strip():
break
text_after_header.append(i.strip())
if following_inventory_item:
following_header, (following_beginning_page, following_end_page) = following_inventory_item
Expand Down

0 comments on commit 0f28bb2

Please sign in to comment.