Skip to content

Commit

Permalink
skip single pixel images in PDF (#546)
Browse files Browse the repository at this point in the history
* skip pixels hopefully

* add comments and reorder

* add constant
  • Loading branch information
axu2 authored Aug 5, 2023
1 parent 154707a commit 9339abb
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion kindlecomicconverter/pdfjpgextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
from random import choice
from string import ascii_uppercase, digits

# skip stray images a few pixels in size in some PDFs
# typical images are many thousands in length
# https://github.com/ciromattia/kcc/pull/546
STRAY_IMAGE_LENGTH_THRESHOLD = 300


class PdfJpgExtract:
def __init__(self, fname):
Expand Down Expand Up @@ -60,10 +65,15 @@ def extract(self):
raise Exception("Didn't find end of JPG!")
istart += startfix
iend += endfix
i = iend

if iend - istart < STRAY_IMAGE_LENGTH_THRESHOLD:
continue

jpg = pdf[istart:iend]
jpgfile = open(self.path + "/jpg%d.jpg" % njpg, "wb")
jpgfile.write(jpg)
jpgfile.close()
njpg += 1
i = iend

return self.path, njpg

0 comments on commit 9339abb

Please sign in to comment.