|
1 |
| - |
2 |
| -### img2pdf #### |
3 |
| -import os |
| 1 | +import PyPDF2 |
| 2 | +from os import path |
4 | 3 | import sys
|
5 |
| -from fpdf import FPDF |
6 |
| -from PIL import Image |
7 |
| -import glob |
8 |
| - |
9 | 4 |
|
| 5 | +def File_existance_checker(filePath): |
| 6 | + if path.isfile(filePath): |
| 7 | + return filePath |
| 8 | + else: |
| 9 | + print("[-] Provide a valid File") |
| 10 | + sys.exit(1) |
| 11 | +pdf_stored_path=input("Enter the name of you pdf file (please use backslash when typing in directory path):") |
10 | 12 |
|
11 |
| -images_path = raw_input("Enter the path of the folder containing images : ") |
12 |
| -images =images_path+"/*.*" |
| 13 | +textFile_stored_path=path.join(path.dirname(pdf_stored_path),path.basename(pdf_stored_path).replace(".pdf",".txt")) |
| 14 | +pdf_stored_path=File_existance_checker(pdf_stored_path) |
13 | 15 |
|
14 |
| -assert os.path.exists(images_path), "this diretory doesn't exist, "+str(images_path) |
15 |
| -f = os.chdir(images_path) |
16 |
| -print("Hooray we found your directory!") |
| 16 | +print(textFile_stored_path) |
17 | 17 |
|
18 |
| -image_list = [] |
19 |
| -for filename in glob.glob(images): |
| 18 | +with open(pdf_stored_path,'rb') as pdf_object: |
| 19 | + pdf_read=PyPDF2.PdfFileReader(pdf_object) |
20 | 20 |
|
21 |
| - image_list.append(filename) |
22 |
| - |
23 |
| -pdf = FPDF( unit = 'mm') |
24 |
| - |
25 |
| -imnames = [i.split("\\") for i in image_list] |
26 |
| -imnames = [i[-1] for i in imnames ] |
27 |
| -imnums = [i.split('.') for i in imnames] |
28 |
| -imnums = [i[0] for i in imnums] |
29 |
| -imnums = [int(i) for i in imnums] |
30 |
| - |
31 |
| - |
32 |
| - |
33 |
| -pos = 0 |
34 |
| -images_dict = dict(zip(image_list, imnums)) |
35 |
| -sorted_images = sorted(images_dict , key = images_dict.get) |
36 |
| - |
37 |
| -for i in list(sorted_images): |
38 |
| - pdf.add_page() |
39 |
| - im = Image.open(i) |
40 |
| - pdf.image(i,pos,pos,200,250) |
41 |
| - |
42 |
| -pdf_name = raw_input("Enter the pdf name : ") |
43 |
| -pdf_name = pdf_name+".pdf" |
44 |
| -pdf.output(pdf_name) |
45 |
| - |
46 |
| - |
47 |
| - |
48 |
| - |
49 |
| - |
50 |
| - |
| 21 | + pdf_pages=pdf_read.numPages |
| 22 | + |
| 23 | + for i in range(pdf_pages): |
| 24 | + page_object=pdf_read.getPage(i) |
| 25 | + with open(textFile_stored_path,'a+') as f: |
| 26 | + f.write((page_object.extract_text())) |
| 27 | + print(f"[+] Pdf Text has been extracted and written to {path.basename(textFile_stored_path)}") |
0 commit comments