diff --git a/dangerzone/conversion/common.py b/dangerzone/conversion/common.py index b367bc062..6a07f34a4 100644 --- a/dangerzone/conversion/common.py +++ b/dangerzone/conversion/common.py @@ -192,5 +192,6 @@ def calculate_timeout( async def convert(self) -> None: pass - def update_progress(self, text: str, *, error: bool = False) -> None: + @abstractmethod + def update_progress(self, text: str) -> None: pass diff --git a/dangerzone/conversion/doc_to_pixels.py b/dangerzone/conversion/doc_to_pixels.py index 6228c07f0..28928dc05 100644 --- a/dangerzone/conversion/doc_to_pixels.py +++ b/dangerzone/conversion/doc_to_pixels.py @@ -35,6 +35,9 @@ async def write_page_height(self, height: int) -> None: async def write_page_data(self, data: bytes) -> None: return await self.write_bytes(data) + def update_progress(self, text: str, *, error: bool = False) -> None: + print(text, file=sys.stderr) + async def convert(self) -> None: conversions: Dict[str, Dict[str, Optional[str]]] = { # .pdf diff --git a/dangerzone/conversion/pixels_to_pdf.py b/dangerzone/conversion/pixels_to_pdf.py index 0ff492cf7..072e197d5 100644 --- a/dangerzone/conversion/pixels_to_pdf.py +++ b/dangerzone/conversion/pixels_to_pdf.py @@ -93,11 +93,11 @@ async def convert( def update_progress(self, text: str, *, error: bool = False) -> None: if running_on_qubes(): if self.progress_callback: - self.progress_callback(error, text, int(self.percentage)) + self.progress_callback(error, text, self.percentage) else: print( json.dumps( - {"error": error, "text": text, "percentage": int(self.percentage)} + {"error": error, "text": text, "percentage": self.percentage} ) ) sys.stdout.flush() diff --git a/dangerzone/isolation_provider/base.py b/dangerzone/isolation_provider/base.py index 5a19168e7..17f8e0017 100644 --- a/dangerzone/isolation_provider/base.py +++ b/dangerzone/isolation_provider/base.py @@ -83,13 +83,13 @@ def convert( exception = errors.exception_from_error_code(error_code) document.mark_as_failed() except errors.ConversionException as e: - self.print_progress_trusted(document, True, str(e), 0) + self.print_progress(document, True, str(e), 0) document.mark_as_failed() except Exception as e: log.exception( f"An exception occurred while converting document '{document.id}'" ) - self.print_progress_trusted(document, True, str(e), 0) + self.print_progress(document, True, str(e), 0) document.mark_as_failed() def doc_to_pixels(self, document: Document, tempdir: str) -> None: @@ -120,7 +120,7 @@ def doc_to_pixels(self, document: Document, tempdir: str) -> None: sw.start() for page in range(1, n_pages + 1): text = f"Converting page {page}/{n_pages} to pixels" - self.print_progress_trusted(document, False, text, self.percentage) + self.print_progress(document, False, text, self.percentage) width = read_int(self.proc.stdout, timeout=sw.remaining) height = read_int(self.proc.stdout, timeout=sw.remaining) @@ -151,7 +151,7 @@ def doc_to_pixels(self, document: Document, tempdir: str) -> None: # TODO handle leftover code input text = "Converted document to pixels" - self.print_progress_trusted(document, False, text, self.percentage) + self.print_progress(document, False, text, self.percentage) if getattr(sys, "dangerzone_dev", False): assert self.proc.stderr is not None @@ -168,11 +168,11 @@ def pixels_to_pdf( ) -> None: pass - def _print_progress( + def print_progress( self, document: Document, error: bool, text: str, percentage: float ) -> None: s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] " - s += Fore.CYAN + f"{percentage}% " + Style.RESET_ALL + s += Fore.CYAN + f"{int(percentage)}% " + Style.RESET_ALL if error: s += Fore.RED + text + Style.RESET_ALL log.error(s) @@ -183,19 +183,6 @@ def _print_progress( if self.progress_callback: self.progress_callback(error, text, percentage) - def print_progress_trusted( - self, document: Document, error: bool, text: str, percentage: float - ) -> None: - return self._print_progress(document, error, text, int(percentage)) - - def print_progress( - self, document: Document, error: bool, untrusted_text: str, percentage: float - ) -> None: - text = replace_control_chars(untrusted_text) - return self.print_progress_trusted( - document, error, "UNTRUSTED> " + text, percentage - ) - @abstractmethod def get_max_parallel_conversions(self) -> int: pass diff --git a/dangerzone/isolation_provider/container.py b/dangerzone/isolation_provider/container.py index 2a68b281d..acbccdd76 100644 --- a/dangerzone/isolation_provider/container.py +++ b/dangerzone/isolation_provider/container.py @@ -11,12 +11,7 @@ from ..conversion import errors from ..document import Document -from ..util import ( - get_resource_path, - get_subprocess_startupinfo, - get_tmp_dir, - replace_control_chars, -) +from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir from .base import IsolationProvider # Define startupinfo for subprocesses @@ -147,29 +142,22 @@ def assert_field_type(self, val: Any, _type: object) -> None: if not type(val) == _type: raise ValueError("Status field has incorrect type") - def parse_progress(self, document: Document, untrusted_line: str) -> None: + def parse_progress_trusted(self, document: Document, line: str) -> None: """ Parses a line returned by the container. """ try: - untrusted_status = json.loads(untrusted_line) - - text = untrusted_status["text"] + status = json.loads(line) + text = status["text"] self.assert_field_type(text, str) - - error = untrusted_status["error"] + error = status["error"] self.assert_field_type(error, bool) - - percentage = untrusted_status["percentage"] - self.assert_field_type(percentage, int) - + percentage = status["percentage"] + self.assert_field_type(percentage, float) self.print_progress(document, error, text, percentage) except Exception: - line = replace_control_chars(untrusted_line) - error_message = ( - f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {line}" - ) - self.print_progress_trusted(document, True, error_message, -1) + error_message = f"Invalid JSON returned from container:\n\n\t {line}" + self.print_progress(document, True, error_message, -1) def exec( self, @@ -249,8 +237,9 @@ def pixels_to_pdf( ] pixels_to_pdf_proc = self.exec_container(command, extra_args) - for line in pixels_to_pdf_proc.stdout: - self.parse_progress(document, line) + if pixels_to_pdf_proc.stdout: + for line in pixels_to_pdf_proc.stdout: + self.parse_progress_trusted(document, line.decode()) error_code = pixels_to_pdf_proc.wait() if error_code != 0: log.error("pixels-to-pdf failed") diff --git a/dangerzone/isolation_provider/qubes.py b/dangerzone/isolation_provider/qubes.py index b64da4399..f778afebc 100644 --- a/dangerzone/isolation_provider/qubes.py +++ b/dangerzone/isolation_provider/qubes.py @@ -36,7 +36,7 @@ def pixels_to_pdf( self, document: Document, tempdir: str, ocr_lang: Optional[str] ) -> None: def print_progress_wrapper(error: bool, text: str, percentage: float) -> None: - self.print_progress_trusted(document, error, text, percentage) + self.print_progress(document, error, text, percentage) converter = PixelsToPDF(progress_callback=print_progress_wrapper) try: diff --git a/tests/isolation_provider/base.py b/tests/isolation_provider/base.py index f65effe88..a8d4a0a4d 100644 --- a/tests/isolation_provider/base.py +++ b/tests/isolation_provider/base.py @@ -18,46 +18,6 @@ ) @pytest.mark.skipif(not running_on_qubes(), reason="Not on a Qubes system") class IsolationProviderTest: - def test_print_progress( - self, - provider: base.IsolationProvider, - uncommon_text: str, - sanitized_text: str, - mocker: MockerFixture, - ) -> None: - """Test that the print_progress() method of our isolation providers sanitizes text. - - Iterate our isolation providers and make sure that their print_progress() methods - sanitizes the provided text, before passing it to the logging functions and other - callbacks. - """ - d = Document() - provider.progress_callback = mocker.MagicMock() - log_info_spy = mocker.spy(base.log, "info") - log_error_spy = mocker.spy(base.log, "error") - _print_progress_spy = mocker.spy(provider, "_print_progress") - - for error, untrusted_text, sanitized_text in [ - (True, "normal text", "UNTRUSTED> normal text"), - (False, "normal text", "UNTRUSTED> normal text"), - (True, uncommon_text, "UNTRUSTED> " + sanitized_text), - (False, uncommon_text, "UNTRUSTED> " + sanitized_text), - ]: - log_info_spy.reset_mock() - log_error_spy.reset_mock() - - provider.print_progress(d, error, untrusted_text, 0) - provider.progress_callback.assert_called_with(error, sanitized_text, 0) # type: ignore [union-attr] - _print_progress_spy.assert_called_with(d, error, sanitized_text, 0) - if error: - assert log_error_spy.call_args[0][0].endswith( - sanitized_text + Style.RESET_ALL - ) - log_info_spy.assert_not_called() - else: - assert log_info_spy.call_args[0][0].endswith(sanitized_text) - log_error_spy.assert_not_called() - def test_max_pages_received( self, pdf_11k_pages: str, diff --git a/tests/isolation_provider/test_container.py b/tests/isolation_provider/test_container.py index 4ad7831d1..8fedc85a9 100644 --- a/tests/isolation_provider/test_container.py +++ b/tests/isolation_provider/test_container.py @@ -19,77 +19,4 @@ def provider() -> Container: class TestContainer(IsolationProviderTest): - def test_parse_progress( - self, - provider: Container, - uncommon_text: str, - sanitized_text: str, - mocker: MockerFixture, - ) -> None: - """Test that the `parse_progress()` function handles escape sequences properly.""" - container = Container(enable_timeouts=False) - container.progress_callback = mocker.MagicMock() - print_progress_mock = mocker.patch.object(container, "_print_progress") - d = Document() - - # Test 1 - Check that normal JSON values are printed as is. - simple_json = json.dumps({"text": "text", "error": False, "percentage": 0}) - container.parse_progress(d, simple_json) - print_progress_mock.assert_called_with(d, False, "UNTRUSTED> text", 0) - - # Test 2 - Check that a totally invalid string is reported as a failure. If this - # string contains escape characters, they should be sanitized as well. - def assert_invalid_json(text: str) -> None: - print_progress_mock.assert_called_with( - d, - True, - f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {text}", - -1, - ) - - # Try first with a trivially invalid string. - invalid_json = "()" - container.parse_progress(d, invalid_json) - assert_invalid_json(invalid_json) - - # Try next with an invalid string that contains uncommon text. - container.parse_progress(d, uncommon_text) - assert_invalid_json(sanitized_text) - - # Test 3 - Check that a structurally valid JSON value with escape characters in it - # is sanitized. - valid_json = json.dumps( - {"text": uncommon_text, "error": False, "percentage": 0} - ) - sanitized_json = json.dumps( - {"text": sanitized_text, "error": False, "percentage": 0} - ) - container.parse_progress(d, valid_json) - print_progress_mock.assert_called_with( - d, False, "UNTRUSTED> " + sanitized_text, 0 - ) - - # Test 4 - Check that a structurally valid JSON, that otherwise does not have the - # expected keys, or the expected value types, is reported as an error, and any - # escape sequences are sanitized. - - keys = ["text", "error", "percentage", uncommon_text] - values = [uncommon_text, False, 0, None] - possible_kvs = list(itertools.product(keys, values, repeat=1)) - - # Based on the above keys and values, create any combination possible, from 0 to 3 - # elements. Take extra care to: - # - # * Remove combinations with non-unique keys. - # * Ignore the sole valid combination (see `valid_json`), since we have already - # tested it above. - for i in range(len(keys)): - for combo in itertools.combinations(possible_kvs, i): - dict_combo: Dict[str, Any] = dict(combo) # type: ignore [arg-type] - if len(combo) == len(dict_combo.keys()): - bad_json = json.dumps(dict_combo) - sanitized_json = bad_json.replace(uncommon_text, sanitized_text) - if bad_json == valid_json: - continue - container.parse_progress(d, bad_json) - assert_invalid_json(sanitized_json) + pass