diff --git a/.circleci/config.yml b/.circleci/config.yml index 2e8af6c49..805f83b45 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -151,10 +151,14 @@ jobs: - run: name: Install poetry dependencies command: | - # Remove this pin once the upstream Poetry issue is fixed: + sudo pip3 install poetry + # This flag is important, due to an open upstream Poetry issue: # https://github.com/python-poetry/poetry/issues/7184 - sudo pip3 install poetry==1.2.2 - poetry install + poetry install --no-ansi + - run: + name: Install test dependencies + command: | + sudo apt-get install -y libqt5gui5 --no-install-recommends - run: name: Prepare cache directory command: | @@ -219,6 +223,32 @@ jobs: ./dev_scripts/env.py --distro ubuntu --version 22.04 run --dev \ bash -c 'cd dangerzone; poetry run make test' + ci-fedora-37: + machine: + image: ubuntu-2004:202111-01 + steps: + - checkout + - run: *install-podman + + - run: + name: Prepare cache directory + command: | + sudo mkdir -p /caches + sudo chown -R $USER:$USER /caches + - restore_cache: *restore-cache + - run: *copy-image + + - run: + name: Prepare Dangerzone environment + command: | + ./dev_scripts/env.py --distro fedora --version 37 build-dev + + - run: + name: Run CI tests + command: | + ./dev_scripts/env.py --distro fedora --version 37 run --dev \ + bash -c 'cd dangerzone; poetry run make test' + ci-fedora-36: machine: image: ubuntu-2004:202111-01 @@ -462,6 +492,9 @@ workflows: - ci-debian-bookworm: requires: - build-container-image + - ci-fedora-37: + requires: + - build-container-image - ci-fedora-36: requires: - build-container-image diff --git a/container/Dockerfile b/container/Dockerfile index 7adc73b1b..bb6581cdd 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -6,6 +6,7 @@ RUN apk -U upgrade && \ ghostscript \ graphicsmagick \ libreoffice \ + openjdk8 \ poppler-utils \ python3 \ py3-magic \ diff --git a/container/dangerzone.py b/container/dangerzone.py index f888ee55e..af5ce21e9 100644 --- a/container/dangerzone.py +++ b/container/dangerzone.py @@ -12,6 +12,7 @@ - 95%-100%: Compress the final PDF """ +import asyncio import glob import json import os @@ -20,7 +21,7 @@ import subprocess import sys import time -from typing import Callable, Dict, List, Optional, Union +from typing import Callable, Dict, List, Optional, Tuple, Union import magic @@ -31,7 +32,30 @@ COMPRESSION_TIMEOUT: float = 10 -def run_command( +async def read_stream(sr: asyncio.StreamReader, callback: Callable = None) -> bytes: + """Consume a byte stream line-by-line. + + Read all lines in a stream until EOF. If a user has passed a callback, call it for + each line. + + Note that the lines are in bytes, since we can't assume that all command output will + be UTF-8 encoded. Higher level commands are advised to decode the output to Unicode, + if they know its encoding. + """ + buf = b"" + while True: + line = await sr.readline() + if sr.at_eof(): + break + if callback is not None: + callback(line) + # TODO: This would be a good place to log the received line, mostly for debug + # logging. + buf += line + return buf + + +async def run_command( args: List[str], *, error_message: str, @@ -39,68 +63,52 @@ def run_command( timeout: float = DEFAULT_TIMEOUT, stdout_callback: Callable = None, stderr_callback: Callable = None, -) -> None: - """ - Runs a command and returns the result. +) -> Tuple[bytes, bytes]: + """Run a command and get its output. + + Run a command using asyncio.subprocess, consume its standard streams, and return its + output in bytes. :raises RuntimeError: if the process returns a non-zero exit status :raises TimeoutError: if the process times out """ - if stdout_callback is None and stderr_callback is None: - try: - subprocess.run(args, timeout=timeout, check=True) - except subprocess.CalledProcessError as e: - raise RuntimeError(error_message) from e - except subprocess.TimeoutExpired as e: - raise TimeoutError(timeout_message) from e - - else: - p = subprocess.Popen( - args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - ) - - # Progress callback requires a manually implemented timeout - start_time = time.time() - - # Make reading from stdout or stderr non-blocking - if p.stdout: - os.set_blocking(p.stdout.fileno(), False) - if p.stderr: - os.set_blocking(p.stderr.fileno(), False) - - while True: - # Processes hasn't finished - if p.poll() is not None: - if p.returncode != 0: - raise RuntimeError(error_message) - break - - # Check if timeout hasn't expired - if time.time() - start_time > timeout: - p.kill() - raise TimeoutError(timeout_message) - - if p.stdout and stdout_callback is not None: - line = p.stdout.readline() - if len(line) > 0: - line = line.rstrip() # strip trailing "\n" - stdout_callback(line) + # Start the provided command, and return a handle. The command will run in the + # background. + proc = await asyncio.subprocess.create_subprocess_exec( + *args, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + assert proc.stdout is not None + assert proc.stderr is not None + + # Create asynchronous tasks that will consume the standard streams of the command, + # and call callbacks if necessary. + stdout_task = asyncio.create_task(read_stream(proc.stdout, stdout_callback)) + stderr_task = asyncio.create_task(read_stream(proc.stderr, stderr_callback)) + + # Wait until the command has finished, for a specific timeout. Then, verify that the + # command has completed successfully. In any other case, raise an exception. + try: + ret = await asyncio.wait_for(proc.wait(), timeout=timeout) + except asyncio.exceptions.TimeoutError: + raise TimeoutError(timeout_message) + if ret != 0: + raise RuntimeError(error_message) - if p.stderr and stderr_callback is not None: - line = p.stderr.readline() - if len(line) > 0: - line = line.rstrip() # strip trailing "\n" - stderr_callback(line) + # Wait until the tasks that consume the command's standard streams have exited as + # well, and return their output. + stdout = await stdout_task + stderr = await stderr_task + return (stdout, stderr) class DangerzoneConverter: def __init__(self) -> None: self.percentage: float = 0.0 - def document_to_pixels(self) -> None: + async def document_to_pixels(self) -> None: conversions: Dict[str, Dict[str, Optional[str]]] = { # .pdf @@ -194,7 +202,7 @@ def document_to_pixels(self) -> None: "/tmp", "/tmp/input_file", ] - run_command( + await run_command( args, error_message="Conversion to PDF with LibreOffice failed", timeout_message=f"Error converting document to PDF, LibreOffice timed out after {DEFAULT_TIMEOUT} seconds", @@ -208,7 +216,7 @@ def document_to_pixels(self) -> None: "/tmp/input_file", "/tmp/input_file.pdf", ] - run_command( + await run_command( args, error_message="Conversion to PDF with GraphicsMagick failed", timeout_message=f"Error converting document to PDF, GraphicsMagick timed out after {DEFAULT_TIMEOUT} seconds", @@ -222,25 +230,21 @@ def document_to_pixels(self) -> None: # Obtain number of pages self.update_progress("Calculating number of pages") - self.num_pages: Union[None, int] = None - - def get_num_pages(line: str) -> None: - search = re.search(r"^Pages: (\d+)", line) - if search is not None: - self.num_pages = int(search.group(1)) - - run_command( + stdout, _ = await run_command( ["pdfinfo", pdf_filename], error_message="PDF file is corrupted", timeout_message=f"Extracting metadata from PDF timed out after 1 second", timeout=1, - stdout_callback=get_num_pages, ) - if self.num_pages == None: - raise ValueError("Number of pages could not be extraced from PDF") - def pdftoppm_progress_callback(line: str) -> None: - """Function called for every line the 'pdftoppm'command outputs + search = re.search(r"Pages:\s*(\d+)\s*\n", stdout.decode()) + if search is not None: + self.num_pages: int = int(search.group(1)) + else: + raise ValueError("Number of pages could not be extracted from PDF") + + def pdftoppm_progress_callback(line: bytes) -> None: + """Function called for every line the 'pdftoppm' command outputs Sample pdftoppm output: @@ -253,7 +257,7 @@ def pdftoppm_progress_callback(line: str) -> None: Each successful line is in the format "{page} {page_num} {ppm_filename}" """ try: - (page_str, num_pages_str, _) = line.split() + (page_str, num_pages_str, _) = line.decode().split() num_pages = int(num_pages_str) page = int(page_str) except ValueError as e: @@ -305,8 +309,8 @@ def pdftoppm_progress_callback(line: str) -> None: page_base = "/tmp/page" # Convert to PPM, which is essentially an RGB format - pdftoppm_timeout = 1.0 * self.num_pages # type: ignore [operator] - run_command( + pdftoppm_timeout = 1.0 * self.num_pages + await run_command( [ "pdftoppm", pdf_filename, @@ -329,7 +333,7 @@ def pdftoppm_progress_callback(line: str) -> None: ): shutil.move(filename, "/dangerzone") - def pixels_to_pdf(self) -> None: + async def pixels_to_pdf(self) -> None: self.percentage = 50.0 num_pages = len(glob.glob("/dangerzone/page-*.rgb")) @@ -354,7 +358,7 @@ def pixels_to_pdf(self) -> None: self.update_progress( f"Converting page {page}/{num_pages} from pixels to searchable PDF" ) - run_command( + await run_command( [ "gm", "convert", @@ -368,7 +372,7 @@ def pixels_to_pdf(self) -> None: error_message=f"Page {page}/{num_pages} conversion to PNG failed", timeout_message=f"Error converting pixels to PNG, convert timed out after {DEFAULT_TIMEOUT} seconds", ) - run_command( + await run_command( [ "tesseract", png_filename, @@ -387,7 +391,7 @@ def pixels_to_pdf(self) -> None: self.update_progress( f"Converting page {page}/{num_pages} from pixels to PDF" ) - run_command( + await run_command( [ "gm", "convert", @@ -410,7 +414,7 @@ def pixels_to_pdf(self) -> None: for page in range(1, num_pages + 1): args.append(f"/tmp/page-{page}.pdf") args.append(f"/tmp/safe-output.pdf") - run_command( + await run_command( args, error_message="Merging pages into a single PDF failed", timeout_message=f"Error merging pages into a single PDF, pdfunite timed out after {DEFAULT_TIMEOUT} seconds", @@ -421,7 +425,7 @@ def pixels_to_pdf(self) -> None: # Compress self.update_progress("Compressing PDF") compress_timeout = num_pages * COMPRESSION_TIMEOUT - run_command( + await run_command( ["ps2pdf", "/tmp/safe-output.pdf", "/tmp/safe-output-compressed.pdf"], timeout_message=f"Error compressing PDF, ps2pdf timed out after {compress_timeout} seconds", error_message="Compressing PDF failed", @@ -444,7 +448,7 @@ def update_progress(self, text: str, *, error: bool = False) -> None: sys.stdout.flush() -def main() -> int: +async def main() -> int: if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} [document-to-pixels]|[pixels-to-pdf]") return -1 @@ -453,9 +457,9 @@ def main() -> int: try: if sys.argv[1] == "document-to-pixels": - converter.document_to_pixels() + await converter.document_to_pixels() elif sys.argv[1] == "pixels-to-pdf": - converter.pixels_to_pdf() + await converter.pixels_to_pdf() except (RuntimeError, TimeoutError, ValueError) as e: converter.update_progress(str(e), error=True) return 1 @@ -464,4 +468,4 @@ def main() -> int: if __name__ == "__main__": - sys.exit(main()) + sys.exit(asyncio.run(main())) diff --git a/dev_scripts/env.py b/dev_scripts/env.py index baa24703d..80020843c 100755 --- a/dev_scripts/env.py +++ b/dev_scripts/env.py @@ -74,7 +74,7 @@ # FIXME: Drop this fix after it's resolved upstream. # See https://github.com/freedomofpress/dangerzone/issues/286#issuecomment-1347149783 -RUN dnf reinstall -y shadow-utils && dnf clean all +RUN rpm --restore shadow-utils RUN dnf install -y mupdf && dnf clean all """ @@ -120,7 +120,7 @@ # FIXME: Drop this fix after it's resolved upstream. # See https://github.com/freedomofpress/dangerzone/issues/286#issuecomment-1347149783 -RUN dnf reinstall -y shadow-utils && dnf clean all +RUN rpm --restore shadow-utils """ # The Dockerfile for building an environment with Dangerzone installed in it. Parts of diff --git a/dev_scripts/pytest-wrapper.py b/dev_scripts/pytest-wrapper.py index 88e0ae68f..d7aafc4ca 100755 --- a/dev_scripts/pytest-wrapper.py +++ b/dev_scripts/pytest-wrapper.py @@ -14,7 +14,6 @@ import subprocess import sys -import pytest from pkg_resources import parse_version from dangerzone.isolation_provider.container import Container @@ -30,14 +29,22 @@ def get_podman_version(): return version.split("-dev")[0] # exclude "-dev" suffix from version +def run_tests(pytest_args): + cmd = ["pytest"] + pytest_args + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError: + sys.exit(1) + + def run_tests_in_parallel(pytest_args): - args = pytest_args + ["-n", "4"] - exit_code = pytest.main(args) + print("running tests in parallel") + run_tests(pytest_args + ["-n", "4"]) def run_tests_in_sequence(pytest_args): print("running tests sequentially") - exit_code = pytest.main(pytest_args) + run_tests(pytest_args) if __name__ == "__main__": diff --git a/poetry.lock b/poetry.lock index a11833a36..7b994b195 100644 --- a/poetry.lock +++ b/poetry.lock @@ -582,25 +582,6 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] -[[package]] -name = "pyside2" -version = "5.15.2.1" -description = "Python bindings for the Qt cross-platform application and UI framework" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <3.11" -files = [ - {file = "PySide2-5.15.2.1-5.15.2-cp27-cp27m-macosx_10_13_intel.whl", hash = "sha256:b5e1d92f26b0bbaefff67727ccbb2e1b577f2c0164b349b3d6e80febb4c5bde2"}, - {file = "PySide2-5.15.2.1-5.15.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:235240b6ec8206d9fdf0232472c6ef3241783d480425e5b54796f06e39ed23da"}, - {file = "PySide2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-abi3-macosx_10_13_intel.whl", hash = "sha256:a9e2e6bbcb5d2ebb421e46e72244a0f4fe0943b2288115f80a863aacc1de1f06"}, - {file = "PySide2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-abi3-manylinux1_x86_64.whl", hash = "sha256:23886c6391ebd916e835fa1b5ae66938048504fd3a2934ae3189a96cd5ac0b46"}, - {file = "PySide2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-none-win32.whl", hash = "sha256:439509e53cfe05abbf9a99422a2cbad086408b0f9bf5e6f642ff1b13b1f8b055"}, - {file = "PySide2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-none-win_amd64.whl", hash = "sha256:af6b263fe63ba6dea7eaebae80aa7b291491fe66f4f0057c0aafe780cc83da9d"}, -] - -[package.dependencies] -shiboken2 = "5.15.2.1" - [[package]] name = "pyside6" version = "6.4.2" @@ -814,22 +795,6 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-g testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] -[[package]] -name = "shiboken2" -version = "5.15.2.1" -description = "Python / C++ bindings helper module" -category = "main" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <3.11" -files = [ - {file = "shiboken2-5.15.2.1-5.15.2-cp27-cp27m-macosx_10_13_intel.whl", hash = "sha256:f890f5611ab8f48b88cfecb716da2ac55aef99e2923198cefcf781842888ea65"}, - {file = "shiboken2-5.15.2.1-5.15.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:87079c07587859a525b9800d60b1be971338ce9b371d6ead81f15ee5a46d448b"}, - {file = "shiboken2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-abi3-macosx_10_13_intel.whl", hash = "sha256:ffd3d0ec3d508e592d7ee3885d27fee1f279a49989f734eb130f46d9501273a9"}, - {file = "shiboken2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-abi3-manylinux1_x86_64.whl", hash = "sha256:63debfcc531b6a2b4985aa9b71433d2ad3bac542acffc729cc0ecaa3854390c0"}, - {file = "shiboken2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-none-win32.whl", hash = "sha256:eb0da44b6fa60c6bd317b8f219e500595e94e0322b33ec5b4e9f406bedaee555"}, - {file = "shiboken2-5.15.2.1-5.15.2-cp35.cp36.cp37.cp38.cp39.cp310-none-win_amd64.whl", hash = "sha256:a0d0fdeb12b72c8af349b9642ccc67afd783dca449309f45e78cda50272fd6b7"}, -] - [[package]] name = "shiboken6" version = "6.4.2" @@ -945,5 +910,5 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [metadata] lock-version = "2.0" -python-versions = ">=3.7,<3.11" -content-hash = "0c6081bcb22cdd2dae101bb2eaf3d5128b230246b653ca2abb400ad1fad7dc54" +python-versions = ">=3.7,<3.12" +content-hash = "6f9d5cf06f7f00efbf05fe3531356e29796538a990ed43f2638541dd66003428" diff --git a/pyproject.toml b/pyproject.toml index b5fdd084f..77bcd9463 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,11 +6,10 @@ authors = ["Micah Lee "] license = "MIT" [tool.poetry.dependencies] -python = ">=3.7,<3.11" +python = ">=3.7,<3.12" click = "*" appdirs = "*" -PySide2 = {version = "5.15.2.1", platform = "linux"} -PySide6 = {version = "^6.4.1", markers = "sys_platform == 'win32' or sys_platform == 'darwin'"} +PySide6 = "^6.4.1" colorama = "*" pyxdg = {version = "*", platform = "linux"}