diff --git a/container/Dockerfile b/container/Dockerfile
index 70fdbadf9..7adc73b1b 100644
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -6,12 +6,9 @@ RUN apk -U upgrade && \
     ghostscript \
     graphicsmagick \
     libreoffice \
-    openjdk8 \
     poppler-utils \
     python3 \
     py3-magic \
-    py3-pillow \
-    sudo \
     tesseract-ocr \
     tesseract-ocr-data-afr \
     tesseract-ocr-data-ara \
@@ -78,15 +75,6 @@ RUN apk -U upgrade && \
     tesseract-ocr-data-ukr \
     tesseract-ocr-data-vie
 
-# Install pdftk
-RUN \
-    wget https://gitlab.com/pdftk-java/pdftk/-/jobs/924565145/artifacts/raw/build/libs/pdftk-all.jar && \
-    mv pdftk-all.jar /usr/local/bin && \
-    chmod +x /usr/local/bin/pdftk-all.jar && \
-    echo '#!/bin/sh' > /usr/local/bin/pdftk && \
-    echo '/usr/bin/java -jar "/usr/local/bin/pdftk-all.jar" "$@"' >> /usr/local/bin/pdftk && \
-    chmod +x /usr/local/bin/pdftk
-
 COPY dangerzone.py /usr/local/bin/
 RUN chmod +x /usr/local/bin/dangerzone.py
 
diff --git a/container/dangerzone.py b/container/dangerzone.py
index 97e926fde..f888ee55e 100644
--- a/container/dangerzone.py
+++ b/container/dangerzone.py
@@ -15,13 +15,14 @@
 import glob
 import json
 import os
+import re
 import shutil
 import subprocess
 import sys
-from typing import Dict, List, Optional
+import time
+from typing import Callable, Dict, List, Optional, Union
 
 import magic
-from PIL import Image
 
 # timeout in seconds for any single subprocess
 DEFAULT_TIMEOUT: float = 120
@@ -36,25 +37,63 @@ def run_command(
     error_message: str,
     timeout_message: str,
     timeout: float = DEFAULT_TIMEOUT,
-) -> subprocess.CompletedProcess:
+    stdout_callback: Callable = None,
+    stderr_callback: Callable = None,
+) -> None:
     """
     Runs a command and returns the result.
 
     :raises RuntimeError: if the process returns a non-zero exit status
     :raises TimeoutError: if the process times out
     """
-    try:
-        return subprocess.run(
+    if stdout_callback is None and stderr_callback is None:
+        try:
+            subprocess.run(args, timeout=timeout, check=True)
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(error_message) from e
+        except subprocess.TimeoutExpired as e:
+            raise TimeoutError(timeout_message) from e
+
+    else:
+        p = subprocess.Popen(
             args,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-            timeout=timeout,
-            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
         )
-    except subprocess.CalledProcessError as e:
-        raise RuntimeError(error_message) from e
-    except subprocess.TimeoutExpired as e:
-        raise TimeoutError(timeout_message) from e
+
+        # Progress callback requires a manually implemented timeout
+        start_time = time.time()
+
+        # Make reading from stdout or stderr non-blocking
+        if p.stdout:
+            os.set_blocking(p.stdout.fileno(), False)
+        if p.stderr:
+            os.set_blocking(p.stderr.fileno(), False)
+
+        while True:
+            # Processes hasn't finished
+            if p.poll() is not None:
+                if p.returncode != 0:
+                    raise RuntimeError(error_message)
+                break
+
+            # Check if timeout hasn't expired
+            if time.time() - start_time > timeout:
+                p.kill()
+                raise TimeoutError(timeout_message)
+
+            if p.stdout and stdout_callback is not None:
+                line = p.stdout.readline()
+                if len(line) > 0:
+                    line = line.rstrip()  # strip trailing "\n"
+                    stdout_callback(line)
+
+            if p.stderr and stderr_callback is not None:
+                line = p.stderr.readline()
+                if len(line) > 0:
+                    line = line.rstrip()  # strip trailing "\n"
+                    stderr_callback(line)
 
 
 class DangerzoneConverter:
@@ -181,65 +220,104 @@ def document_to_pixels(self) -> None:
             )
         self.percentage += 3
 
-        # Separate PDF into pages
-        self.update_progress("Separating document into pages")
-        args = ["pdftk", pdf_filename, "burst", "output", "/tmp/page-%d.pdf"]
-        run_command(
-            args,
-            error_message="Separating document into pages failed",
-            timeout_message=f"Error separating document into pages, pdftk timed out after {DEFAULT_TIMEOUT} seconds",
-        )
-
-        page_filenames = glob.glob("/tmp/page-*.pdf")
-
-        self.percentage += 2
-
-        # Convert to RGB pixel data
-        percentage_per_page = 45.0 / len(page_filenames)
-        for page in range(1, len(page_filenames) + 1):
-            pdf_filename = f"/tmp/page-{page}.pdf"
-            png_filename = f"/tmp/page-{page}.png"
-            rgb_filename = f"/tmp/page-{page}.rgb"
-            width_filename = f"/tmp/page-{page}.width"
-            height_filename = f"/tmp/page-{page}.height"
-            filename_base = f"/tmp/page-{page}"
-
-            self.update_progress(
-                f"Converting page {page}/{len(page_filenames)} to pixels"
-            )
-
-            # Convert to png
-            run_command(
-                ["pdftocairo", pdf_filename, "-png", "-singlefile", filename_base],
-                error_message="Conversion from PDF to PNG failed",
-                timeout_message=f"Error converting from PDF to PNG, pdftocairo timed out after {DEFAULT_TIMEOUT} seconds",
-            )
+        # Obtain number of pages
+        self.update_progress("Calculating number of pages")
+        self.num_pages: Union[None, int] = None
 
-            # Save the width and height
-            with Image.open(png_filename, "r") as im:
-                width, height = im.size
-            with open(width_filename, "w") as f:
-                f.write(str(width))
-            with open(height_filename, "w") as f:
-                f.write(str(height))
+        def get_num_pages(line: str) -> None:
+            search = re.search(r"^Pages:           (\d+)", line)
+            if search is not None:
+                self.num_pages = int(search.group(1))
 
-            # Convert to RGB pixels
-            run_command(
-                [
-                    "gm",
-                    "convert",
-                    png_filename,
-                    "-depth",
-                    "8",
-                    f"rgb:{rgb_filename}",
-                ],
-                error_message="Conversion from PNG to RGB failed",
-                timeout_message=f"Error converting from PNG to pixels, convert timed out after {DEFAULT_TIMEOUT} seconds",
-            )
-
-            # Delete the png
-            os.remove(png_filename)
+        run_command(
+            ["pdfinfo", pdf_filename],
+            error_message="PDF file is corrupted",
+            timeout_message=f"Extracting metadata from PDF timed out after 1 second",
+            timeout=1,
+            stdout_callback=get_num_pages,
+        )
+        if self.num_pages == None:
+            raise ValueError("Number of pages could not be extraced from PDF")
+
+        def pdftoppm_progress_callback(line: str) -> None:
+            """Function called for every line the 'pdftoppm'command outputs
+
+            Sample pdftoppm output:
+
+                $ pdftoppm sample.pdf  /tmp/safe -progress
+                1 4 /tmp/safe-1.ppm
+                2 4 /tmp/safe-2.ppm
+                3 4 /tmp/safe-3.ppm
+                4 4 /tmp/safe-4.ppm
+
+            Each successful line is in the format "{page} {page_num} {ppm_filename}"
+            """
+            try:
+                (page_str, num_pages_str, _) = line.split()
+                num_pages = int(num_pages_str)
+                page = int(page_str)
+            except ValueError as e:
+                raise RuntimeError("Conversion from PDF to PPM failed") from e
+
+            percentage_per_page = 45.0 / num_pages
             self.percentage += percentage_per_page
+            self.update_progress(f"Converting page {page}/{num_pages} to pixels")
+
+            zero_padding = "0" * (len(num_pages_str) - len(page_str))
+            ppm_filename = f"{page_base}-{zero_padding}{page}.ppm"
+            rgb_filename = f"{page_base}-{page}.rgb"
+            width_filename = f"{page_base}-{page}.width"
+            height_filename = f"{page_base}-{page}.height"
+            filename_base = f"{page_base}-{page}"
+
+            with open(ppm_filename, "rb") as f:
+                # NOTE: PPM files have multiple ways of writing headers.
+                # For our specific case we parse it expecting the header format that ppmtopdf produces
+                # More info on PPM headers: https://people.uncw.edu/tompkinsj/112/texnh/assignments/imageFormat.html
+
+                # Read the header
+                header = f.readline().decode().strip()
+                if header != "P6":
+                    raise ValueError("Invalid PPM header")
+
+                # Save the width and height
+                dims = f.readline().decode().strip()
+                width, height = dims.split()
+                with open(width_filename, "w") as width_file:
+                    width_file.write(width)
+                with open(height_filename, "w") as height_file:
+                    height_file.write(height)
+
+                maxval = int(f.readline().decode().strip())
+                # Check that the depth is 8
+                if maxval != 255:
+                    raise ValueError("Invalid PPM depth")
+
+                data = f.read()
+
+            # Save pixel data
+            with open(rgb_filename, "wb") as f:
+                f.write(data)
+
+            # Delete the ppm file
+            os.remove(ppm_filename)
+
+        page_base = "/tmp/page"
+
+        # Convert to PPM, which is essentially an RGB format
+        pdftoppm_timeout = 1.0 * self.num_pages  # type: ignore [operator]
+        run_command(
+            [
+                "pdftoppm",
+                pdf_filename,
+                page_base,
+                "-progress",
+            ],
+            error_message="Conversion from PDF to PPM failed",
+            timeout_message=f"Error converting from PDF to PPM, pdftoppm timed out after {pdftoppm_timeout} seconds",
+            stderr_callback=pdftoppm_progress_callback,
+            timeout=pdftoppm_timeout,
+        )
 
         self.update_progress("Converted document to pixels")