Skip to content

Commit

Permalink
DOC: Black formatting and variable naming
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Sep 27, 2022
1 parent a5f4f41 commit d9aa64c
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions docs/user/extract-text.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,19 @@ from PyPDF2 import PdfReader
reader = PdfReader("GeoBase_NHNC1_Data_Model_UML_EN.pdf")
page = reader.pages[3]

listParts = []
parts = []


def visitor_body(text, cm, tm, fontDict, fontSize):
y = tm[5]
if y > 50 and y < 720:
listParts.append(text)
parts.append(text)


page.extract_text(visitor_text=visitor_body)
textBody = ''.join([t for t in listParts])
text_body = "".join(parts)

print(textBody)
print(text_body)
```

### Example 2: Extract rectangles and texts into a SVG-file
Expand All @@ -75,15 +79,22 @@ reader = PdfReader("GeoBase_NHNC1_Data_Model_UML_EN.pdf")
page = reader.pages[2]

dwg = svgwrite.Drawing("GeoBase_test.svg", profile="tiny")


def visitor_svg_rect(op, args, cm, tm):
if op == b're':
(x, y, w, h) = (args[i].as_numeric() for i in range(4))
dwg.add(dwg.rect((x, y), (w, h), stroke="red", fill_opacity=0.05))
if op == b"re":
(x, y, w, h) = (args[i].as_numeric() for i in range(4))
dwg.add(dwg.rect((x, y), (w, h), stroke="red", fill_opacity=0.05))


def visitor_svg_text(text, cm, tm, fontDict, fontSize):
(x, y) = (tm[4], tm[5])
dwg.add(dwg.text(text, insert=(x, y), fill="blue"))
page.extract_text(visitor_operand_before=visitor_svg_rect,
visitor_text=visitor_svg_text)


page.extract_text(
visitor_operand_before=visitor_svg_rect, visitor_text=visitor_svg_text
)
dwg.save()
```

Expand Down
Binary file added docs/user/page-stamped.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/user/page.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit d9aa64c

Please sign in to comment.