diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index d8fa3482d..798687abf 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -14,11 +14,12 @@ import re import shutil import sys -from typing import Dict, Optional +from typing import Dict, List, Optional import magic from .common import DangerzoneConverter, running_on_qubes +from .errors import * class DocumentToPixels(DangerzoneConverter): @@ -139,7 +140,7 @@ async def convert(self) -> None: # Validate MIME type if mime_type not in conversions: - raise ValueError("The document format is not supported") + raise DocFormatUnsupported() # Temporary fix for the HWPX format # Should be removed after new release of `file' (current release 5.44) @@ -171,11 +172,9 @@ async def convert(self) -> None: "arm64", "aarch64", ): - raise ValueError( - "HWP / HWPX formats are not supported in ARM architectures" - ) + raise DocFormatUnsupportedHWPArm() if libreoffice_ext == "h2orestart.oxt" and running_on_qubes(): - raise ValueError("HWP / HWPX formats are not supported in Qubes") + raise DocFormatUnsupportedHWPQubes() if libreoffice_ext: await self.install_libreoffice_ext(libreoffice_ext) self.update_progress("Converting to PDF using LibreOffice") @@ -204,7 +203,7 @@ async def convert(self) -> None: # # https://github.com/freedomofpress/dangerzone/issues/494 if not os.path.exists(pdf_filename): - raise ValueError("Conversion to PDF with LibreOffice failed") + raise LibreofficeFailure() elif conversion["type"] == "convert": self.update_progress("Converting to PDF using GraphicsMagick") args = [ @@ -224,7 +223,7 @@ async def convert(self) -> None: ) pdf_filename = "/tmp/input_file.pdf" else: - raise ValueError( + raise InvalidGMConversion( f"Invalid conversion type {conversion['type']} for MIME type {mime_type}" ) self.percentage += 3 @@ -244,7 +243,7 @@ async def convert(self) -> None: if search is not None: num_pages: int = int(search.group(1)) else: - raise ValueError("Number of pages could not be extracted from PDF") + raise NoPageCountException() # Get a more precise timeout, based on the number of pages timeout = self.calculate_timeout(size, num_pages) @@ -291,7 +290,7 @@ def pdftoppm_progress_callback(line: bytes) -> None: # Read the header header = f.readline().decode().strip() if header != "P6": - raise ValueError("Invalid PPM header") + raise PDFtoPPMInvalidHeader() # Save the width and height dims = f.readline().decode().strip() @@ -304,7 +303,7 @@ def pdftoppm_progress_callback(line: bytes) -> None: maxval = int(f.readline().decode().strip()) # Check that the depth is 8 if maxval != 255: - raise ValueError("Invalid PPM depth") + raise PDFtoPPMInvalidDepth() data = f.read() @@ -378,10 +377,11 @@ async def main() -> int: try: await converter.convert() error_code = 0 # Success! - except (RuntimeError, TimeoutError, ValueError) as e: + except ConversionException as e: # Expected Errors + error_code = e.error_code + except (RuntimeError, TimeoutError, ValueError) as e: # Unexpected Errors converter.update_progress(str(e), error=True) error_code = 1 - if not running_on_qubes(): # Write debug information (containers version) with open("/tmp/dangerzone/captured_output.txt", "wb") as container_log: diff --git a/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py b/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py index 676b01835..6c13f58fa 100644 --- a/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py +++ b/dangerzone/conversion/doc_to_pixels_qubes_wrapper.py @@ -7,6 +7,7 @@ from typing import Optional, TextIO from .doc_to_pixels import DocumentToPixels +from .errors import ConversionException def _read_bytes() -> bytes: @@ -67,8 +68,13 @@ async def main() -> None: with open("/tmp/input_file", "wb") as f: f.write(data) - converter = DocumentToPixels() - await converter.convert() + try: + converter = DocumentToPixels() + await converter.convert() + except ConversionException as e: + sys.exit(e.error_code) + except (RuntimeError, TimeoutError, ValueError) as e: + sys.exit(1) num_pages = len(list(out_dir.glob("*.rgb"))) await write_int(num_pages) diff --git a/dangerzone/conversion/errors.py b/dangerzone/conversion/errors.py new file mode 100644 index 000000000..60d70544b --- /dev/null +++ b/dangerzone/conversion/errors.py @@ -0,0 +1,78 @@ +from typing import List, Optional + + +class ConversionException(Exception): + error_message = "Unspecificed error" + error_code = -1 + + def __init__(self, error_message: Optional[str] = None) -> None: + if error_message: + self.error_message = error_message + super().__init__(self.error_message) + + @classmethod + def get_subclasses(cls) -> List[type["ConversionException"]]: + subclasses = [cls] + for subclass in cls.__subclasses__(): + subclasses += subclass.get_subclasses() + return subclasses + + +class DocFormatUnsupported(ConversionException): + error_code = 10 + error_message = "The document format is not supported" + + +class DocFormatUnsupportedHWPArm(DocFormatUnsupported): + error_code = 15 + error_message = "HWP / HWPX formats are not supported in ARM architectures" + + +class DocFormatUnsupportedHWPQubes(DocFormatUnsupported): + error_code = 16 + error_message = "HWP / HWPX formats are not supported in Qubes" + + +class LibreofficeFailure(ConversionException): + error_code = 20 + error_message = "Conversion to PDF with LibreOffice failed" + + +class InvalidGMConversion(ConversionException): + error_code = 30 + error_message = "Invalid conversion (Graphics Magic)" + + def __init__(self, error_message: str) -> None: + super(error_message) + + +class PagesException(ConversionException): + error_code = 40 + + +class NoPageCountException(PagesException): + error_code = 41 + error_message = "Number of pages could not be extracted from PDF" + + +class PDFtoPPMException(ConversionException): + error_code = 50 + error_message = f"Error converting PDF to Pixels (pdftoppm)" + + +class PDFtoPPMInvalidHeader(PDFtoPPMException): + error_code = 51 + error_message = "Error converting PDF to Pixels (Invalid PPM header)" + + +class PDFtoPPMInvalidDepth(PDFtoPPMException): + error_code = 52 + error_message = "Error converting PDF to Pixels (Invalid PPM depth)" + + +def exception_from_error_code(error_code: int) -> Optional[ConversionException]: + """returns the conversion exception corresponding to the error code""" + for cls in ConversionException.get_subclasses(): + if cls.error_code == error_code: + return cls() + raise ValueError(f"Unknown error code '{error_code}'") diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 00552c9c4..c3484b34e 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -5,6 +5,7 @@ from colorama import Fore, Style +from ..conversion.errors import ConversionException from ..document import Document from ..util import replace_control_chars @@ -36,7 +37,10 @@ def convert( document.mark_as_converting() try: success = self._convert(document, ocr_lang) - except Exception: + except ConversionException as e: + success = False + self.print_progress_trusted(document, True, e.error_message, 0) + except Exception as e: success = False log.exception( f"An exception occurred while converting document '{document.id}'" diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 9d00985a9..7ef602646 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -11,6 +11,7 @@ import tempfile from typing import Any, Callable, List, Optional, Tuple +from ..conversion.errors import exception_from_error_code from ..document import Document from ..util import ( get_resource_path, @@ -305,6 +306,9 @@ def _convert_with_tmpdirs( if ret != 0: log.error("documents-to-pixels failed") + + # XXX Reconstruct exception from error code + raise exception_from_error_code(ret) # type: ignore [misc] else: # TODO: validate convert to pixels output diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index 8b56ae588..75b7ed53b 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -20,6 +20,7 @@ log = logging.getLogger(__name__) from ..conversion.common import running_on_qubes +from ..conversion.errors import exception_from_error_code from ..conversion.pixels_to_pdf import PixelsToPDF from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir from .base import MAX_CONVERSION_LOG_CHARS @@ -38,6 +39,8 @@ def read_bytes(p: subprocess.Popen, buff_size: int) -> bytes: def read_int(p: subprocess.Popen) -> int: """Read 2 bytes from stdout, and decode them as int.""" untrusted_int = p.stdout.read(2) # type: ignore [union-attr] + if untrusted_int == b"": + raise ValueError("Nothing read from stdout (expected an integer)") return int.from_bytes(untrusted_int, signed=False) @@ -104,7 +107,12 @@ def _convert( stderr=subprocess.DEVNULL, ) - n_pages = read_int(p) + try: + n_pages = read_int(p) + except ValueError: + error_code = p.wait() + # XXX Reconstruct exception from error code + raise exception_from_error_code(error_code) # type: ignore [misc] if n_pages == 0: # FIXME: Fail loudly in that case return False