Merge pull request #347 from Witiko/fix/paragraphs-with-trailing-spaces

Correctly parse paragraphs with trailing spaces
Witiko · Sep 9, 2023 · d4c8e22 · d4c8e22
2 parents c1af406 + 4581126
commit d4c8e22
Show file tree

Hide file tree

Showing 14 changed files with 150 additions and 67 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -281,7 +281,7 @@ jobs:
     runs-on: ubuntu-latest
     permissions:
       contents: write
-    if: github.event_name == 'pull_request'
+    if: github.event_name == 'pull_request_target'
     steps:
       - name: Automatically merge pull request
         uses: pascalgn/automerge-action@v0.15.6

diff --git a/CHANGES.md b/CHANGES.md
@@ -2,6 +2,15 @@
 
 ## 3.1.0
 
+Fixes:
+
+- Correctly parse paragraphs with trailing spaces.
+  (danopolan/istqb_latex#77, #345, #347)
+
+Unit Tests:
+
+- Add support for YAML metadata in testfiles. (#345, #347)
+
 ## 3.0.0 (2023-08-25)
 
 Development:

diff --git a/markdown.dtx b/markdown.dtx
@@ -27904,7 +27904,7 @@ end
                                     + V("Blank")^0 / writer.interblocksep
                                     )
                                   )
-                                + V("Paragraph")
+                                + ( V("Paragraph") + V("Plain") )
                                 * ( V("Blank")^0 * parsers.eof
                                   + ( V("Blank")^2 / writer.paragraphsep
                                     + V("Blank")^0 / writer.interblocksep
@@ -27916,7 +27916,7 @@ end
                                     + V("Blank")^0 / writer.interblocksep
                                     )
                                   )
-                                + V("Paragraph")
+                                + ( V("Paragraph") + V("Plain") )
                                 * ( V("Blank")^0 * parsers.eof
                                   + V("Blank")^0 / writer.paragraphsep
                                   )
@@ -33687,11 +33687,9 @@ end
 % \end{markdown}
 %  \begin{macrocode}
 \startluacode
-  document.markdown_preserve_trailing_spaces = true
-  document.markdown_is_buffering = false
+  document.markdown_buffering = false
   local function preserve_trailing_spaces(line)
-    if document.markdown_is_buffering and
-       document.markdown_preserve_trailing_spaces then
+    if document.markdown_buffering then
       line = line:gsub("[ \t][ \t]$", "\t\t")
     end
     return line
@@ -33702,11 +33700,11 @@ end
   \catcode`\|=0%
   \catcode`\\=12%
   |gdef|startmarkdown{%
-    |ctxlua{document.markdown_is_buffering = true}%
+    |ctxlua{document.markdown_buffering = true}%
     |markdownReadAndConvert{\stopmarkdown}%
                            {|stopmarkdown}}%
   |gdef|stopmarkdown{%
-    |ctxlua{document.markdown_is_buffering = false}%
+    |ctxlua{document.markdown_buffering = false}%
     |markdownEnd}%
 |endgroup
 %    \end{macrocode}

diff --git a/tests/requirements.txt b/tests/requirements.txt
@@ -1,3 +1,4 @@
 click~=8.1.6
 more-itertools~=10.0.0
+PyYAML~=6.0.1
 tqdm~=4.65.0
diff --git a/tests/templates/context-mkiv/verbatim/body.tex.m4 b/tests/templates/context-mkiv/verbatim/body.tex.m4
@@ -4,8 +4,6 @@
 % Prevent the folding of characters into a single space token in logs.
 \catcode"09=12%  Tabs (U+0009)
 \catcode"20=12%  Spaces (U+0020)
-% Do not preserve trailing spaces in input buffer for parity with other TeX formats.
-\ctxlua{document.markdown_preserve_trailing_spaces = false}
 % Disable active characters of the TeX engine.
 \catcode"7E=12%  Tildes (U+007E)
 % Perform the test.

diff --git a/tests/test.py b/tests/test.py
@@ -21,6 +21,7 @@
 import click
 from more_itertools import chunked, zip_equal
 from tqdm import tqdm
+import yaml
 
 
 # Global variables
@@ -66,6 +67,7 @@
 Template = Path
 Command = Tuple[str, ...]
 
+Metadata = str
 SetupText = str
 InputText = str
 OutputText = str
@@ -74,6 +76,7 @@
 
 
 class ReadTestFile(NamedTuple):
+    metadata: Metadata
     setup_text: SetupText
     input_text: InputText
     expected_output_text: OutputText
@@ -190,6 +193,10 @@ def first_subresult(self) -> TestSubResult:
     def testfile(self) -> TestFile:
         return self.first_subresult.testfile
 
+    @property
+    def metadata(self) -> Metadata:
+        return self.first_subresult.read_test_file.metadata
+
     @property
     def setup_text(self) -> SetupText:
         return self.first_subresult.read_test_file.setup_text
@@ -221,16 +228,19 @@ def try_to_update_testfile(self) -> None:
 
         actual_output_texts = set()
         for subresult in self:
-            self.updated_testfile = False
             actual_output_texts.add(subresult.actual_output_text)
 
         if len(actual_output_texts) > 1:
+            self.updated_testfile = False
             LOGGER.debug(f'Cannot update testfile {self.testfile}, different commands produced different outputs.')
             return
 
         actual_output_text, = list(actual_output_texts)
 
         with self.testfile.open('wt') as f:
+            if self.metadata:
+                print(self.metadata, file=f, end='')
+                print('---', file=f)
             print(self.setup_text, file=f, end='')
             print('<<<', file=f)
             print(self.input_text, file=f, end='')
@@ -433,6 +443,7 @@ def __bool__(self) -> bool:
 
     @classmethod
     def run_test_batch_with_parameters(cls, testfile_batch: TestFileBatch, test_parameters: TestParameters) -> 'BatchResult':
+        assert len(testfile_batch) >= 1
         read_testfile_results, tex_format, template, command = test_parameters
 
         # Create a temporary directory.
@@ -451,7 +462,7 @@ def run_test_batch_with_parameters(cls, testfile_batch: TestFileBatch, test_para
         test_texts.append(head_text)
 
         for testfile_number, (testfile, read_testfile_result) in enumerate(zip_equal(testfile_batch, read_testfile_results)):
-            setup_text, input_text, expected_output_text = read_testfile_result
+            _, setup_text, input_text, expected_output_text = read_testfile_result
 
             test_setup_filename = TEST_SETUP_FILENAME_FORMAT.format(testfile_number)
             test_input_filename = TEST_INPUT_FILENAME_FORMAT.format(testfile_number)
@@ -501,22 +512,28 @@ def run_test_batch_with_parameters(cls, testfile_batch: TestFileBatch, test_para
         return batch_result
 
     @classmethod
-    def run_test_batch(cls, args: Tuple[TestFileBatch, bool]) -> List[TestResult]:
-
+    def run_test_batch(cls, args: Tuple[TestFileBatch, bool]) -> List[Optional[TestResult]]:
         testfile_batch, fail_fast = args
 
         # Run the test for all different test parameters.
-        all_test_subresults_by_parameters: List[List[TestSubResult]] = []
-        for test_parameters in get_test_parameters(testfile_batch):
-            batch_result = cls.run_test_batch_with_parameters(testfile_batch, test_parameters)
-            subresults = list(batch_result.subresults)
-            all_test_subresults_by_parameters.append(subresults)
+        all_subresults: Dict[TestFile, List[TestSubResult]] = defaultdict(lambda: list())
+        for test_parameters, filtered_testfile_batch in get_test_parameters(testfile_batch):
+            assert len(filtered_testfile_batch) >= 1
+            batch_result = cls.run_test_batch_with_parameters(filtered_testfile_batch, test_parameters)
+            for subresult in batch_result.subresults:
+                all_subresults[subresult.testfile].append(subresult)
             if fail_fast and not batch_result:  # If we want to fail fast, stop after the first failed command.
                 break
 
         # For each testfile in the batch, create a test result
-        all_test_subresults_by_testfiles = transpose_rectangle(all_test_subresults_by_parameters)
-        test_results = list(map(TestResult, all_test_subresults_by_testfiles))
+        test_results = list()
+        for testfile in testfile_batch:
+            if testfile not in all_subresults:
+                LOGGER.warning('Skipping testfile {format_testfile(testfile)}, because it supports no test parameters.')
+                test_results.append(None)
+            else:
+                test_result = TestResult(all_subresults[testfile])
+                test_results.append(test_result)
         return test_results
 
 
@@ -568,12 +585,23 @@ def read_testfile(testfile: TestFile) -> ReadTestFile:
             else:
                 input_lines[input_part].append(line)
 
-    setup_text = ''.join(input_lines['setup'])
+    # Read optional YAML metadata.
+    stripped_setup_input_lines = [input_line.strip() for input_line in input_lines['setup']]
+    if '---' in stripped_setup_input_lines:
+        yaml_delimiter_index = stripped_setup_input_lines.index('---')
+        yaml_text = ''.join(input_lines['setup'][:yaml_delimiter_index])
+        setup_text = ''.join(input_lines['setup'][yaml_delimiter_index + 1:])
+    else:
+        yaml_text = ''
+        setup_text = ''.join(input_lines['setup'])
+
+    # Read mandatory parts of the testfile.
     input_text = ''.join(input_lines['input'])
     expected_output_text = ''.join(input_lines['expected_output'])
     if expected_output_text and not expected_output_text.endswith('\n'):
         expected_output_text = f'{expected_output_text}\n'
-    return ReadTestFile(setup_text, input_text, expected_output_text)
+
+    return ReadTestFile(yaml_text, setup_text, input_text, expected_output_text)
 
 
 def read_test_output_from_tex_log_file(tex_log_file: Path) -> OutputText:
@@ -652,15 +680,21 @@ def format_testfile(testfile: TestFile) -> str:
     return format_testfiles([testfile])
 
 
-def get_test_parameters(testfile_batch: TestFileBatch) -> Iterable[TestParameters]:
+def should_process_testfile(read_testfile_result: ReadTestFile, test_parameters: TestParameters) -> bool:
+    metadata = yaml.safe_load(read_testfile_result.metadata)
+    if not isinstance(metadata, dict) or 'if' not in metadata:
+        return True  # The testfile has no restrictions on the parameters.
+    variables = {'format': test_parameters.tex_format, 'template': test_parameters.template.name}
+    should_process_testfile = eval(metadata['if'], variables)
+    return should_process_testfile
+
+
+def get_test_parameters(testfile_batch: TestFileBatch) -> Iterable[Tuple[TestParameters, TestFileBatch]]:
     plural = 's' if len(testfile_batch) > 1 else ''
     LOGGER.debug(f'Testfile{plural} {format_testfiles(testfile_batch)}')
 
     # Read testfiles in the batch.
-    read_testfile_results: ReadTestFiles = []
-    for testfile in testfile_batch:
-        read_testfile_result = read_testfile(testfile)
-        read_testfile_results.append(read_testfile_result)
+    read_testfile_results = [read_testfile(testfile) for testfile in testfile_batch]
 
     # List TeX formats, templates, and commands.
     for tex_format in get_tex_formats():
@@ -669,18 +703,21 @@ def get_test_parameters(testfile_batch: TestFileBatch) -> Iterable[TestParameter
             LOGGER.debug(f'    Template {template.name}')
             for command in get_commands(tex_format):
                 LOGGER.debug(f'      Command {format_command(command)}')
-                yield TestParameters(read_testfile_results, tex_format, template, command)
-
-
-def transpose_rectangle(input_list: Iterable[Iterable[T]]) -> List[List[T]]:
-    columns: List[List[T]] = []
-    for column in zip_equal(*input_list):
-        column = list(column)
-        columns.append(column)
-    return columns
-
-
-def run_tests(testfiles: Iterable[TestFile], fail_fast: bool) -> Iterable[TestResult]:
+                # Filter out testfiles that do not support the current parameters.
+                test_parameters = TestParameters(read_testfile_results, tex_format, template, command)
+                filtered_testfile_batch, filtered_read_testfile_results = list(), list()
+                for testfile, read_testfile_result in zip_equal(testfile_batch, read_testfile_results):
+                    if should_process_testfile(read_testfile_result, test_parameters):
+                        filtered_testfile_batch.append(testfile)
+                        filtered_read_testfile_results.append(read_testfile_result)
+                if len(filtered_testfile_batch) == 0:
+                    LOGGER.debug('        Skipping, because no testfiles in the batch support these parameters.')
+                else:
+                    filtered_test_parameters = TestParameters(filtered_read_testfile_results, tex_format, template, command)
+                    yield filtered_test_parameters, filtered_testfile_batch
+
+
+def run_tests(testfiles: Iterable[TestFile], fail_fast: bool) -> Iterable[Optional[TestResult]]:
     testfiles: List[TestFile] = list(testfiles)
 
     def get_all_results() -> Iterable[Iterable[TestResult]]:
@@ -769,7 +806,11 @@ def main(testfiles: Iterable[str], update_tests: Optional[bool], fail_fast: Opti
     show_progress_bar = LOG_LEVEL >= logging.INFO
     progress_bar = tqdm(result_iter, total=len(testfiles), disable=not show_progress_bar)
     for result in progress_bar:
+        if result is None:
+            # A testfile was skipped.
+            continue
         if not result:
+            # A test failed.
             some_tests_failed = True
             if update_tests:
                 result.try_to_update_testfile()

diff --git a/tests/testfiles/CommonMark_0.30/indented_code_blocks/005.test b/tests/testfiles/CommonMark_0.30/indented_code_blocks/005.test
@@ -1,3 +1,6 @@
+if: not (format == 'context-mkiv' and template == 'verbatim')
+---
+
 %   ---RESULT--- "example": 111,
 %   
 %   <pre><code>chunk1

diff --git a/tests/testfiles/CommonMark_0.30/indented_code_blocks/006.test b/tests/testfiles/CommonMark_0.30/indented_code_blocks/006.test
@@ -1,3 +1,6 @@
+if: not (format == 'context-mkiv' and template == 'verbatim')
+---
+
 %   ---RESULT--- "example": 112,
 %   
 %   <pre><code>chunk1

diff --git a/tests/testfiles/README.md b/tests/testfiles/README.md
@@ -1,6 +1,8 @@
 This directory contains subdirectories with test files, which can be recognized
 by the `.test` suffix. A test file has the following syntax:
 
+    Optional YAML metadata
+    ---
     The test setup TeX source code
     <<<
     The test markdown source code
@@ -11,11 +13,17 @@ The test setup TeX source code can be used to configure the Markdown package
 through its plain TeX interface before the test markdown source code is
 processed.
 
+The optional YAML metadata may contain any useful information, although we
+currently only process the `if` key that can be used to specify for which
+TeX formats and templates the testfile should run using Python syntax:
+
+``` yaml
+if: format == 'context-mkiv' or template == 'verbatim'
+```
+
+If no YAML metadata are specified, the `---` delimiter may also be omitted.
+
 The test markdown source code is the markdown code that will be processed
 during the test. The majority of markdown tokens are configured by the support
 files to produce output to the log file. This output will be compared against
 the expected test output.
-
-The `<<<` and `>>>` markers may be surrounded by optional whitespaces. If the
-last section beginning with `>>>` is not present, it will be automatically
-generated during the testing and appended to the test file.
diff --git a/tests/testfiles/lunamark-markdown/fenced-divs.test b/tests/testfiles/lunamark-markdown/fenced-divs.test
@@ -1,8 +1,7 @@
 \def\markdownOptionFencedDivs{true}
-\def\markdownOptionFencedCode{true}
 <<<
-This test ensures that the Lua `fencedDivs` and `fencedCode` options correctly
-propagates through the plain TeX interface.
+This test ensures that the Lua `fencedDivs` option correctly propagates through
+the plain TeX interface.
 
 :::
 This is not a div
@@ -115,7 +114,6 @@ This is not a div
 >>>
 documentBegin
 codeSpan: fencedDivs
-codeSpan: fencedCode
 softLineBreak
 paragraphSeparator
 softLineBreak

diff --git a/tests/testfiles/lunamark-markdown/hard-line-breaks.test b/tests/testfiles/lunamark-markdown/hard-line-breaks.test
@@ -0,0 +1,34 @@
+if: format == 'context-mkiv' or template == 'input'
+---
+<<<
+This test ensures that two and more trailing spaces or one or more trailing
+tabs produce a hard line break for all templates of the ConTeXt MkIV format
+and for the `input` templates of all other formats.
+
+Here is a line with no trailing spaces, producing a soft line break.
+Here is a line with a single trailing space, producing a soft line break. 
+Here is a line with two trailing spaces, producing a hard line break.  
+Here is a line with three trailing spaces, producing a hard line break.   
+Here is a line with four trailing spaces at the end of a paragraph.    
+
+Here is a line with no trailing tabs, producing a soft line break.
+Here is a line with a single trailing tab, producing a soft line break.	
+Here is a line with two trailing tabs, producing a hard line break.		
+Here is a line with three trailing tabs, producing a hard line break.			
+Here is a line with four trailing tabs at the end of the document.				
+>>>
+documentBegin
+softLineBreak
+softLineBreak
+codeSpan: input
+paragraphSeparator
+softLineBreak
+softLineBreak
+hardLineBreak
+hardLineBreak
+paragraphSeparator
+softLineBreak
+softLineBreak
+hardLineBreak
+hardLineBreak
+documentEnd