Skip to content

Commit

Permalink
Fix issue #163, color and grayscale images JPEG compressed when not n…
Browse files Browse the repository at this point in the history
…eeded
  • Loading branch information
James R. Barlow committed May 7, 2017
1 parent 1464b90 commit 93e802f
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 2 deletions.
4 changes: 4 additions & 0 deletions ocrmypdf/exec/ghostscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ def generate_pdfa(pdf_pages, output_file, log, threads=1):
"-dAutoRotatePages=/None",
"-sColorConversionStrategy=/RGB",
"-sProcessColorModel=DeviceRGB",
"-dAutoFilterColorImages=false",
"-sColorImageFilter=",
"-dAutoFilterGrayImages=false",
"-sGrayImageFilter=",
"-dJPEGQ=95",
"-dPDFA=2",
"-dPDFACompatibilityPolicy=1",
Expand Down
2 changes: 2 additions & 0 deletions ocrmypdf/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,8 @@ def select_visible_page_image(
pageinfo = get_pageinfo(image, context)
if pageinfo['images'] and \
all(im['enc'] == 'jpeg' for im in pageinfo['images']):
log.debug('{:4d}: JPEG input -> JPEG output'.format(
page_number(page_pdf)))
# If all images were JPEGs originally, produce a JPEG as output
im = Image.open(image)

Expand Down
7 changes: 6 additions & 1 deletion tests/resources/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ In some cases they were converted from one image format to another without other
* - typewriter.png, 2400dpi.pdf
- `Wikimedia: Triumph typewrtier text Linzensoep`_
* Creative Commons BY-SA 2.5
* - baiona.png
- `Wikimedia: Baionako udalerri mugakideak`_
- Creative Commons BY-SA 4.0


Files generated for this project
Expand Down Expand Up @@ -118,4 +121,6 @@ These test resources are assemblies from other previously mentioned files, relea

.. _`Linux (Wikipedia Article)`: https://de.wikipedia.org/wiki/Linux

.. _`Wikimedia: Triumph typewrtier text Linzensoep`: https://commons.wikimedia.org/wiki/File:Triumph.typewriter_text_Linzensoep.gif
.. _`Wikimedia: Triumph typewrtier text Linzensoep`: https://commons.wikimedia.org/wiki/File:Triumph.typewriter_text_Linzensoep.gif

.. _`Wikimedia: Baionako udalerri mugakideak`: https://commons.wikimedia.org/wiki/File:Baionako_udalerri_mugakideak.png
Binary file added tests/resources/baiona.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/resources/baiona_gray.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
36 changes: 35 additions & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,4 +874,38 @@ def test_gs_raster_failure(spoof_no_tess_gs_raster_fail, resources, outpdf):

def test_no_contents(spoof_tesseract_noop, resources, outpdf):
check_ocrmypdf(resources / 'no_contents.pdf', outpdf, '--force-ocr',
env=spoof_tesseract_noop)
env=spoof_tesseract_noop)


@pytest.mark.parametrize('image', [
'baiona.png',
'baiona_gray.png',
])
def test_lossless_to_lossless(spoof_tesseract_noop, ocrmypdf_exec,
resources, image, outpdf):
from PIL import Image

input_file = str(resources / image)
output_file = str(outpdf)

im = Image.open(input_file)

# Runs: ocrmypdf - output.pdf < testfile
with open(input_file, 'rb') as input_stream:
p_args = ocrmypdf_exec + ['--image-dpi', '150', '-', output_file]
p = Popen(
p_args, close_fds=True, stdout=PIPE, stderr=PIPE,
stdin=input_stream, env=spoof_tesseract_noop)
out, err = p.communicate()

assert p.returncode == ExitCode.ok

pdfinfo = pdf_get_all_pageinfo(output_file)
assert pdfinfo[0]['images'][0]['enc'] != 'jpeg', \
"Lossless compression changed to lossy!"
if im.mode.startswith('RGB') or im.mode.startswith('BGR'):
assert pdfinfo[0]['images'][0]['color'] == 'rgb', \
"Colorspace changed"
elif im.mode.startswith('L'):
assert pdfinfo[0]['images'][0]['color'] == 'gray', \
"Colorspace changed"

0 comments on commit 93e802f

Please sign in to comment.