diff --git a/machine/corpora/scripture_ref_usfm_parser_handler.py b/machine/corpora/scripture_ref_usfm_parser_handler.py index 5de5710..163b951 100644 --- a/machine/corpora/scripture_ref_usfm_parser_handler.py +++ b/machine/corpora/scripture_ref_usfm_parser_handler.py @@ -39,7 +39,7 @@ def verse( self, state: UsfmParserState, number: str, marker: str, alt_number: Optional[str], pub_number: Optional[str] ) -> None: if state.verse_ref == self._cur_verse_ref: - self._end_verse_text_wrapper(state) + self._end_verse_text(state, self._create_verse_refs()) # ignore duplicate verses self._duplicate_verse = True elif are_overlapping_verse_ranges(number, self._cur_verse_ref.verse): @@ -61,7 +61,7 @@ def start_para( ) -> None: if self._cur_verse_ref.is_default: self._update_verse_ref(state.verse_ref, marker) - if not state.is_verse_text or self._current_text_type == ScriptureTextType.NONVERSE: + if not state.is_verse_text or marker == "d": self._start_parent_element(marker) self._start_non_verse_text_wrapper(state) @@ -123,6 +123,7 @@ def _start_verse_text_wrapper(self, state: UsfmParserState) -> None: def _end_verse_text_wrapper(self, state: UsfmParserState) -> None: if not self._duplicate_verse and self._cur_verse_ref.verse_num > 0: self._end_verse_text(state, self._create_verse_refs()) + if self._cur_verse_ref.verse_num > 0: self._cur_text_type_stack.pop() def _start_non_verse_text_wrapper(self, state: UsfmParserState) -> None: diff --git a/tests/corpora/test_usfm_tokenizer.py b/tests/corpora/test_usfm_tokenizer.py index 0515de2..1e3ecb7 100644 --- a/tests/corpora/test_usfm_tokenizer.py +++ b/tests/corpora/test_usfm_tokenizer.py @@ -7,7 +7,7 @@ def test_tokenize() -> None: usfm = _read_usfm() usfm_tokenizer = UsfmTokenizer() tokens = usfm_tokenizer.tokenize(usfm) - assert len(tokens) == 202 + assert len(tokens) == 204 assert tokens[0].type is UsfmTokenType.BOOK assert tokens[0].marker == "id" diff --git a/tests/testutils/data/usfm/Tes/41MATTes.SFM b/tests/testutils/data/usfm/Tes/41MATTes.SFM index 9c8968a..3de6f69 100644 --- a/tests/testutils/data/usfm/Tes/41MATTes.SFM +++ b/tests/testutils/data/usfm/Tes/41MATTes.SFM @@ -32,7 +32,9 @@ \v 4b Chapter two, verse four. \p \v 6 Chapter two, verse \w six|strong="12345" \w*. +\p \v 6 Bad verse. +\p \v 5 Chapter two, verse five \rq (MAT 3:1)\rq*. \v 7a Chapter two, verse seven A, \s Section header \ts-s\*