-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_images.py
34 lines (30 loc) · 1.21 KB
/
extract_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# PyMuPDF
import fitz
import os
def extract_images_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
images = []
for page in doc:
image_list = page.get_images(full=True)
for img_index, img in enumerate(page.get_images(full=True)):
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
image_ext = base_image["ext"]
image_filename = f"image{page.number}_{img_index}.{image_ext}"
with open(image_filename, "wb") as img_file:
img_file.write(image_bytes)
images.append(image_filename)
doc.close()
return images
pdf_filename = input("Enter the PDF filename or full path: ")
# Check if input is a full path or just a filename
if not os.path.isabs(pdf_filename):
# If filename, check if it already has the ".pdf"
if not pdf_filename.lower().endswith(".pdf"):
# If not, add the ".pdf"
pdf_filename += ".pdf"
# Prepend current directory to construct the full path
pdf_filename = os.path.join(os.getcwd(), pdf_filename)
extracted_images = extract_images_from_pdf(pdf_filename)
print("Extracted images:", extracted_images)