diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py index 71617bc..fafabcc 100644 --- a/machine/corpora/update_usfm_parser_handler.py +++ b/machine/corpora/update_usfm_parser_handler.py @@ -104,9 +104,10 @@ def end_usfm(self, state: UsfmParserState) -> None: super().end_usfm(state) def start_book(self, state: UsfmParserState, marker: str, code: str) -> None: - self._verse_rows_ref = state.verse_ref.copy() - self._update_verse_rows_map() - self._update_verse_rows() + if self._verse_rows_ref.book_num != state.verse_ref.book_num: + self._verse_rows_ref = state.verse_ref.copy() + self._update_verse_rows_map() + self._update_verse_rows() self._collect_readonly_tokens(state) self._update_block_stack.append(UsfmUpdateBlock()) diff --git a/tests/corpora/test_update_usfm_parser_handler.py b/tests/corpora/test_update_usfm_parser_handler.py index a81920e..202f522 100644 --- a/tests/corpora/test_update_usfm_parser_handler.py +++ b/tests/corpora/test_update_usfm_parser_handler.py @@ -1294,6 +1294,45 @@ def test_duplicate_verses() -> None: assert_usfm_equals(target, result) +def test_duplicate_id_tags() -> None: + rows = [ + UpdateUsfmRow(scr_ref("MAT 1:0/1:s"), "new section header"), + UpdateUsfmRow(scr_ref("MAT 1:1"), "new verse 1"), + UpdateUsfmRow(scr_ref("MAT 1:2"), "new verse 2"), + UpdateUsfmRow(scr_ref("MAT 1:3"), "new verse 3"), + UpdateUsfmRow(scr_ref("MAT 1:4"), "new verse 4"), + ] + usfm = r"""\id MAT +\s section header +\c 1 +\s1 beginning-of-chapter header +\p +\v 1 verse 1 +\id +\v 2 verse 2 +\id MAT +\v 3 verse 3 +\id MRK +\v 4 verse 4 +""" + + target = update_usfm(rows, usfm, paragraph_behavior=UpdateUsfmMarkerBehavior.STRIP) + result = r"""\id MAT +\s new section header +\c 1 +\s1 beginning-of-chapter header +\p +\v 1 new verse 1 +\id +\v 2 new verse 2 +\id MAT +\v 3 new verse 3 +\id MRK +\v 4 new verse 4 +""" + assert_usfm_equals(target, result) + + def test_pass_remark(): rows = [ UpdateUsfmRow(