diff --git a/.gitignore b/.gitignore
index a44bbfe..10b04fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,10 @@ coverage.xml
 *.py,cover
 .hypothesis/
 .pytest_cache/
+tests/testutils/data/usfm/source/*
+tests/testutils/data/usfm/target/*
+tests/testutils/data/project/*
+tests/testutils/data/pretranslations.json
 
 # Translations
 *.mo
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6e4c2a3..5bbc1b5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -9,5 +9,12 @@
     "editor.defaultFormatter": "ms-python.black-formatter",
     "editor.formatOnSave": true
   },
-  "black-formatter.path": ["poetry", "run", "black"]
+  "black-formatter.path": [
+    "poetry",
+    "run",
+    "black"
+  ],
+  "python.analysis.extraPaths": [
+    "./tests"
+  ]
 }
diff --git a/machine/corpora/__init__.py b/machine/corpora/__init__.py
index 553ed97..f653d17 100644
--- a/machine/corpora/__init__.py
+++ b/machine/corpora/__init__.py
@@ -7,13 +7,17 @@
 from .dbl_bundle_text_corpus import DblBundleTextCorpus
 from .dictionary_alignment_corpus import DictionaryAlignmentCorpus
 from .dictionary_text_corpus import DictionaryTextCorpus
+from .file_paratext_project_settings_parser import FileParatextProjectSettingsParser
 from .flatten import flatten
 from .memory_alignment_collection import MemoryAlignmentCollection
 from .memory_text import MemoryText
 from .multi_key_ref import MultiKeyRef
 from .parallel_text_corpus import ParallelTextCorpus
 from .parallel_text_row import ParallelTextRow
+from .paratext_backup_terms_corpus import ParatextBackupTermsCorpus
 from .paratext_backup_text_corpus import ParatextBackupTextCorpus
+from .paratext_project_settings import ParatextProjectSettings
+from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
 from .paratext_text_corpus import ParatextTextCorpus
 from .scripture_element import ScriptureElement
 from .scripture_ref import EMPTY_SCRIPTURE_REF, ScriptureRef
@@ -57,6 +61,8 @@
 from .usx_file_text import UsxFileText
 from .usx_file_text_corpus import UsxFileTextCorpus
 from .usx_zip_text import UsxZipText
+from .zip_paratext_project_settings_parser import ZipParatextProjectSettingsParser
+from .zip_paratext_project_settings_parser_base import ZipParatextProjectSettingsParserBase
 
 __all__ = [
     "AlignedWordPair",
@@ -72,6 +78,7 @@
     "EMPTY_SCRIPTURE_REF",
     "escape_spaces",
     "extract_scripture_corpus",
+    "FileParatextProjectSettingsParser",
     "flatten",
     "is_scripture",
     "lowercase",
@@ -85,7 +92,10 @@
     "normalize",
     "ParallelTextCorpus",
     "ParallelTextRow",
+    "ParatextBackupTermsCorpus",
     "ParatextBackupTextCorpus",
+    "ParatextProjectSettings",
+    "ParatextProjectSettingsParserBase",
     "ParatextTextCorpus",
     "parse_usfm",
     "RtlReferenceOrder",
@@ -128,4 +138,6 @@
     "UsxFileText",
     "UsxFileTextCorpus",
     "UsxZipText",
+    "ZipParatextProjectSettingsParser",
+    "ZipParatextProjectSettingsParserBase",
 ]
diff --git a/machine/corpora/paratext_backup_text_corpus.py b/machine/corpora/paratext_backup_text_corpus.py
index 347ba08..77d7065 100644
--- a/machine/corpora/paratext_backup_text_corpus.py
+++ b/machine/corpora/paratext_backup_text_corpus.py
@@ -1,8 +1,6 @@
 from typing import List
 from zipfile import ZipFile
 
-import regex as re
-
 from ..utils.typeshed import StrPath
 from .scripture_text_corpus import ScriptureTextCorpus
 from .usfm_zip_text import UsfmZipText
@@ -16,20 +14,23 @@ def __init__(self, filename: StrPath, include_markers: bool = False, include_all
             settings = parser.parse()
 
             versification = settings.versification
-            regex = re.compile(f"^{re.escape(settings.file_name_prefix)}.*{re.escape(settings.file_name_suffix)}$")
 
             texts: List[UsfmZipText] = []
-            for sfm_entry in (zi for zi in archive.filelist if regex.match(zi.filename)):
-                texts.append(
-                    UsfmZipText(
-                        settings.stylesheet,
-                        settings.encoding,
-                        filename,
-                        sfm_entry.filename,
-                        versification,
-                        include_markers,
-                        include_all_text,
+            for sfm_entry in archive.filelist:
+                book_id = settings.get_book_id(sfm_entry.filename)
+                if book_id:
+                    texts.append(
+                        UsfmZipText(
+                            settings.stylesheet,
+                            settings.encoding,
+                            book_id,
+                            filename,
+                            sfm_entry.filename,
+                            versification,
+                            include_markers,
+                            include_all_text,
+                            settings.name,
+                        )
                     )
-                )
 
         super().__init__(versification, texts)
diff --git a/machine/corpora/paratext_project_settings.py b/machine/corpora/paratext_project_settings.py
index 41796c6..548ae6c 100644
--- a/machine/corpora/paratext_project_settings.py
+++ b/machine/corpora/paratext_project_settings.py
@@ -1,6 +1,7 @@
 from dataclasses import dataclass
+from typing import Optional
 
-from ..scripture.canon import book_id_to_number
+from ..scripture.canon import book_id_to_number, book_number_to_id
 from ..scripture.verse_ref import Versification
 from .usfm_stylesheet import UsfmStylesheet
 
@@ -19,6 +20,29 @@ class ParatextProjectSettings:
     biblical_terms_project_name: str
     biblical_terms_file_name: str
 
+    def get_book_id(self, file_name: str) -> Optional[str]:
+        """Returns None when the file name doesn't match the pattern of a book file name for the project."""
+        if not file_name.startswith(self.file_name_prefix) or not file_name.endswith(self.file_name_suffix):
+            return None
+
+        book_part: str = file_name[len(self.file_name_prefix) : -len(self.file_name_suffix)]
+        if self.file_name_form == "MAT":
+            if len(book_part) != 3:
+                return None
+            book_id = book_part
+        elif self.file_name_form in ("40", "41"):
+            if book_part != "100" and len(book_part) != 2:
+                return None
+            book_id = book_number_to_id(_get_book_number(book_part))
+        else:
+            if book_part.startswith("100"):
+                if len(book_part) != 6:
+                    return None
+            elif len(book_part) != 5:
+                return None
+            book_id = book_part[2:] if len(book_part) == 5 else book_part[3:]
+        return book_id
+
     def get_book_file_name(self, book_id: str) -> str:
         if self.file_name_form == "MAT":
             book_part = book_id
@@ -42,3 +66,17 @@ def _get_book_file_name_digits(book_id: str) -> str:
     if book_num < 120:
         return f"B{book_num - 110}"
     return f"C{book_num - 120}"
+
+
+def _get_book_number(book_file_name_digits: str) -> int:
+    if book_file_name_digits.startswith("A"):
+        return 100 + int(book_file_name_digits[1:])
+    if book_file_name_digits.startswith("B"):
+        return 110 + int(book_file_name_digits[1:])
+    if book_file_name_digits.startswith("C"):
+        return 120 + int(book_file_name_digits[1:])
+
+    book_num: int = int(book_file_name_digits)
+    if book_num >= 40:
+        return book_num - 1
+    return book_num
diff --git a/machine/corpora/paratext_text_corpus.py b/machine/corpora/paratext_text_corpus.py
index 53c883c..24c24dd 100644
--- a/machine/corpora/paratext_text_corpus.py
+++ b/machine/corpora/paratext_text_corpus.py
@@ -16,15 +16,19 @@ def __init__(self, project_dir: StrPath, include_markers: bool = False, include_
 
         texts: List[UsfmFileText] = []
         for sfm_filename in Path(project_dir).glob(f"{settings.file_name_prefix}*{settings.file_name_suffix}"):
-            texts.append(
-                UsfmFileText(
-                    settings.stylesheet,
-                    settings.encoding,
-                    sfm_filename,
-                    versification,
-                    include_markers,
-                    include_all_text,
+            book_id = settings.get_book_id(sfm_filename.name)
+            if book_id:
+                texts.append(
+                    UsfmFileText(
+                        settings.stylesheet,
+                        settings.encoding,
+                        book_id,
+                        sfm_filename,
+                        versification,
+                        include_markers,
+                        include_all_text,
+                        settings.name,
+                    )
                 )
-            )
 
         super().__init__(versification, texts)
diff --git a/machine/corpora/scripture_element.py b/machine/corpora/scripture_element.py
index 503630b..db98bff 100644
--- a/machine/corpora/scripture_element.py
+++ b/machine/corpora/scripture_element.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from functools import total_ordering
-from typing import Optional
 
 from ..utils.comparable import Comparable
 
@@ -20,17 +19,27 @@ def position(self) -> int:
     def name(self) -> str:
         return self._name
 
-    def compare_to(self, other: object, strict: Optional[bool] = True) -> int:
+    def to_relaxed(self) -> ScriptureElement:
+        return ScriptureElement(0, self.name)
+
+    def compare_to(self, other: object) -> int:
         if not isinstance(other, ScriptureElement):
             raise (TypeError("other is not a ScriptureElement object."))
         if self is other:
             return 0
 
-        if strict:
-            res = self.position - other.position
-            if res != 0:
-                return res
-
+        if self.position == 0 or other.position == 0:
+            if self.name == other.name:
+                return 0
+            # position 0 is always greater than any other position
+            if self.position == 0 and other.position != 0:
+                return 1
+            if other.position == 0 and self.position != 0:
+                return -1
+            return (self.name > other.name) - (self.name < other.name)
+        res = self.position - other.position
+        if res != 0:
+            return res
         return (self.name > other.name) - (self.name < other.name)
 
     def __eq__(self, other: ScriptureElement) -> bool:
diff --git a/machine/corpora/scripture_ref.py b/machine/corpora/scripture_ref.py
index b5fd75c..4bbefc2 100644
--- a/machine/corpora/scripture_ref.py
+++ b/machine/corpora/scripture_ref.py
@@ -4,7 +4,7 @@
 from typing import List, Optional
 
 from ..scripture.constants import ENGLISH_VERSIFICATION
-from ..scripture.verse_ref import VerseRef, Versification, are_overlapping_verse_ranges
+from ..scripture.verse_ref import VerseRef, Versification
 from ..utils.comparable import Comparable
 from .scripture_element import ScriptureElement
 
@@ -81,17 +81,15 @@ def is_empty(self) -> bool:
     def is_verse(self) -> bool:
         return VerseRef.verse_num != 0 and len(self.path) == 0
 
+    def to_relaxed(self) -> ScriptureRef:
+        return ScriptureRef(self.verse_ref, [pe.to_relaxed() for pe in self.path])
+
     def change_versification(self, versification: Versification) -> ScriptureRef:
         vr: VerseRef = self.verse_ref.copy()
         vr.change_versification(versification)
         return ScriptureRef(vr, self.path)
 
-    def overlaps(self, other: ScriptureRef) -> bool:
-        if not are_overlapping_verse_ranges(self.verse_ref, other.verse_ref):
-            return False
-        return self.path == other.path
-
-    def compare_to(self, other: object, compare_segments: bool = True, strict: bool = True):
+    def compare_to(self, other: object, compare_segments: bool = True) -> int:
         if not isinstance(other, ScriptureRef):
             raise TypeError("other is not a ScriptureRef object.")
         if self is other:
@@ -102,11 +100,14 @@ def compare_to(self, other: object, compare_segments: bool = True, strict: bool
             return res
 
         for se1, se2 in zip(self.path, other.path):
-            res = se1.compare_to(se2, strict=strict)
+            res = se1.compare_to(se2)
             if res != 0:
                 return res
-
-        return len(self.path) - len(other.path)
+        if len(self.path) < len(other.path):
+            return -1
+        elif len(self.path) > len(other.path):
+            return 1
+        return 0
 
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, ScriptureRef):
diff --git a/machine/corpora/scripture_ref_usfm_parser_handler.py b/machine/corpora/scripture_ref_usfm_parser_handler.py
index 65c26cf..a5e0766 100644
--- a/machine/corpora/scripture_ref_usfm_parser_handler.py
+++ b/machine/corpora/scripture_ref_usfm_parser_handler.py
@@ -12,6 +12,7 @@
 
 
 class ScriptureTextType(Enum):
+    NONE = auto()
     NONVERSE = auto()
     VERSE = auto()
     NOTE = auto()
@@ -26,7 +27,7 @@ def __init__(self) -> None:
 
     @property
     def _current_text_type(self) -> ScriptureTextType:
-        return ScriptureTextType.NONVERSE if len(self._cur_text_type_stack) == 0 else self._cur_text_type_stack[-1]
+        return ScriptureTextType.NONE if len(self._cur_text_type_stack) == 0 else self._cur_text_type_stack[-1]
 
     def end_usfm(self, state: UsfmParserState) -> None:
         self._end_verse_text_wrapper(state)
@@ -39,7 +40,7 @@ def verse(
         self, state: UsfmParserState, number: str, marker: str, alt_number: Optional[str], pub_number: Optional[str]
     ) -> None:
         if state.verse_ref == self._cur_verse_ref:
-            self._end_verse_text_wrapper(state)
+            self._end_verse_text(state, self._create_verse_refs())
             # ignore duplicate verses
             self._duplicate_verse = True
         elif are_overlapping_verse_ranges(number, self._cur_verse_ref.verse):
@@ -61,7 +62,7 @@ def start_para(
     ) -> None:
         if self._cur_verse_ref.is_default:
             self._update_verse_ref(state.verse_ref, marker)
-        if not state.is_verse_text:
+        if not state.is_verse_text or marker == "d":
             self._start_parent_element(marker)
             self._start_non_verse_text_wrapper(state)
 
@@ -69,17 +70,23 @@ def end_para(self, state: UsfmParserState, marker: str) -> None:
         if self._current_text_type == ScriptureTextType.NONVERSE:
             self._end_parent_element()
             self._end_non_verse_text_wrapper(state)
+        elif self._current_text_type == ScriptureTextType.NONE:
+            # empty verse paragraph
+            self._start_parent_element(marker)
+            self._start_non_verse_text_wrapper(state)
+            self._end_parent_element()
+            self._end_non_verse_text_wrapper(state)
 
     def start_row(self, state: UsfmParserState, marker: str) -> None:
-        if self._current_text_type == ScriptureTextType.NONVERSE:
+        if self._current_text_type == ScriptureTextType.NONVERSE or self._current_text_type == ScriptureTextType.NONE:
             self._start_parent_element(marker)
 
     def end_row(self, state: UsfmParserState, marker: str) -> None:
-        if self._current_text_type == ScriptureTextType.NONVERSE:
+        if self._current_text_type == ScriptureTextType.NONVERSE or self._current_text_type == ScriptureTextType.NONE:
             self._end_parent_element()
 
     def start_cell(self, state: UsfmParserState, marker: str, align: str, colspan: int) -> None:
-        if self._current_text_type == ScriptureTextType.NONVERSE:
+        if self._current_text_type == ScriptureTextType.NONVERSE or self._current_text_type == ScriptureTextType.NONE:
             self._start_parent_element(marker)
             self._start_non_verse_text_wrapper(state)
 
@@ -95,13 +102,27 @@ def end_sidebar(self, state: UsfmParserState, marker: str, closed: bool) -> None
         self._end_parent_element()
 
     def start_note(self, state: UsfmParserState, marker: str, caller: str, category: Optional[str]) -> None:
-        self._next_element(marker)
-        self._start_note_text_wrapper(state)
+        if self._current_text_type != ScriptureTextType.NONE:
+            self._next_element(marker)
+            self._start_note_text_wrapper(state)
 
     def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None:
-        self._end_note_text_wrapper(state)
-
-    def ref(self, state: UsfmParserState, marker: str, display: str, target: str) -> None: ...
+        if self._current_text_type == ScriptureTextType.NOTE:
+            self._end_note_text_wrapper(state)
+
+    def text(self, state: UsfmParserState, text: str) -> None:
+        # if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment
+        para_tag = state.para_tag
+        if (
+            self._current_text_type == ScriptureTextType.NONE
+            and para_tag is not None
+            and para_tag.marker != "tr"
+            and state.is_verse_text
+            and self._cur_verse_ref.verse_num == 0
+            and len(text.strip()) > 0
+        ):
+            self._start_parent_element(para_tag.marker)
+            self._start_non_verse_text_wrapper(state)
 
     def _start_verse_text(self, state: UsfmParserState, scripture_refs: Optional[List[ScriptureRef]]) -> None: ...
 
@@ -121,8 +142,9 @@ def _start_verse_text_wrapper(self, state: UsfmParserState) -> None:
         self._start_verse_text(state, self._create_verse_refs())
 
     def _end_verse_text_wrapper(self, state: UsfmParserState) -> None:
-        if not self._duplicate_verse and self._cur_verse_ref.verse_num != 0:
+        if not self._duplicate_verse and self._cur_verse_ref.verse_num > 0:
             self._end_verse_text(state, self._create_verse_refs())
+        if self._cur_verse_ref.verse_num > 0:
             self._cur_text_type_stack.pop()
 
     def _start_non_verse_text_wrapper(self, state: UsfmParserState) -> None:
diff --git a/machine/corpora/usfm_file_text.py b/machine/corpora/usfm_file_text.py
index d4c2d68..ede53c4 100644
--- a/machine/corpora/usfm_file_text.py
+++ b/machine/corpora/usfm_file_text.py
@@ -14,29 +14,16 @@ def __init__(
         self,
         stylesheet: UsfmStylesheet,
         encoding: str,
+        id: str,
         filename: StrPath,
         versification: Optional[Versification] = None,
         include_markers: bool = False,
         include_all_text: bool = False,
+        project: Optional[str] = None,
     ) -> None:
-        super().__init__(
-            _get_id(filename, encoding), stylesheet, encoding, versification, include_markers, include_all_text
-        )
+        super().__init__(id, stylesheet, encoding, versification, include_markers, include_all_text, project)
 
         self._filename = Path(filename)
 
     def _create_stream_container(self) -> StreamContainer:
         return FileStreamContainer(self._filename)
-
-
-def _get_id(filename: StrPath, encoding: str) -> str:
-    with open(filename, "r", encoding=encoding) as file:
-        for line in file:
-            line = line.strip()
-            if line.startswith("\\id "):
-                id = line[4:]
-                index = id.find(" ")
-                if index != -1:
-                    id = id[:index]
-                return id.strip().upper()
-    raise RuntimeError(f"The USFM file '{filename}' does not contain an 'id' marker.")
diff --git a/machine/corpora/usfm_file_text_corpus.py b/machine/corpora/usfm_file_text_corpus.py
index 4033d02..dbc5f67 100644
--- a/machine/corpora/usfm_file_text_corpus.py
+++ b/machine/corpora/usfm_file_text_corpus.py
@@ -25,7 +25,24 @@ def __init__(
         stylesheet = UsfmStylesheet(stylesheet_filename)
         texts: List[UsfmFileText] = []
         for sfm_filename in Path(project_dir).glob(file_pattern):
-            texts.append(
-                UsfmFileText(stylesheet, encoding, sfm_filename, versification, include_markers, include_all_text)
-            )
+            id = _get_id(sfm_filename, encoding)
+            if id:
+                texts.append(
+                    UsfmFileText(
+                        stylesheet, encoding, id, sfm_filename, versification, include_markers, include_all_text
+                    )
+                )
         super().__init__(versification, texts)
+
+
+def _get_id(filename: StrPath, encoding: str) -> Optional[str]:
+    with open(filename, "r", encoding=encoding) as file:
+        for line in file:
+            line = line.strip()
+            if line.startswith("\\id "):
+                id = line[4:]
+                index = id.find(" ")
+                if index != -1:
+                    id = id[:index]
+                return id.strip().upper()
+    return None
diff --git a/machine/corpora/usfm_text_base.py b/machine/corpora/usfm_text_base.py
index 481f047..bae08fe 100644
--- a/machine/corpora/usfm_text_base.py
+++ b/machine/corpora/usfm_text_base.py
@@ -11,7 +11,7 @@
 from .scripture_text import ScriptureText
 from .stream_container import StreamContainer
 from .text_row import TextRow
-from .usfm_parser import parse_usfm
+from .usfm_parser import UsfmParser
 from .usfm_parser_state import UsfmParserState
 from .usfm_stylesheet import UsfmStylesheet
 from .usfm_token import UsfmAttribute, UsfmToken, UsfmTokenType
@@ -26,6 +26,7 @@ def __init__(
         versification: Optional[Versification],
         include_markers: bool,
         include_all_text: bool,
+        project: Optional[str] = None,
     ) -> None:
         super().__init__(id, versification)
 
@@ -33,6 +34,7 @@ def __init__(
         self._encoding = encoding
         self._include_markers = include_markers
         self._include_all_text = include_all_text
+        self.project = project
 
     @abstractmethod
     def _create_stream_container(self) -> StreamContainer: ...
@@ -40,13 +42,16 @@ def _create_stream_container(self) -> StreamContainer: ...
     def _get_rows(self) -> Generator[TextRow, None, None]:
         usfm = self._read_usfm()
         row_collector = _TextRowCollector(self)
-        parse_usfm(
-            usfm,
-            row_collector,
-            self._stylesheet,
-            self.versification,
-            preserve_whitespace=self._include_markers,
-        )
+        parser = UsfmParser(usfm, row_collector, self._stylesheet, self._versification, self._include_markers)
+        try:
+            parser.process_tokens()
+        except Exception as e:
+            error_message = (
+                f"An error occurred while parsing the text '{self.id}'"
+                f"{f' in project {self.project}' if self.project else ''}"
+                f". Verse: {parser.state.verse_ref}, offset: {parser.state.verse_offset}, error: '{e}'"
+            )
+            raise RuntimeError(error_message) from e
         return gen(row_collector.rows)
 
     def _read_usfm(self) -> str:
diff --git a/machine/corpora/usfm_text_updater.py b/machine/corpora/usfm_text_updater.py
index ba62eea..f178dfe 100644
--- a/machine/corpora/usfm_text_updater.py
+++ b/machine/corpora/usfm_text_updater.py
@@ -14,37 +14,34 @@ def __init__(
         rows: Optional[List[Tuple[List[ScriptureRef], str]]] = None,
         id_text: Optional[str] = None,
         strip_all_text: bool = False,
-        strict_comparison: bool = True,
+        prefer_existing_text: bool = False,
     ) -> None:
         super().__init__()
         self._rows = rows or []
         self._tokens: List[UsfmToken] = []
+        self._new_tokens: List[UsfmToken] = []
         self._id_text = id_text
         self._strip_all_text = strip_all_text
-        self._strict_comparison = strict_comparison
+        self._prefer_existing_text = prefer_existing_text
         self._replace_stack: List[bool] = []
         self._row_index: int = 0
         self._token_index: int = 0
-        self._replace_text: bool = False
 
     @property
     def tokens(self) -> List[UsfmToken]:
         return self._tokens
 
-    @property
-    def replace_text(self) -> bool:
-        return self._strip_all_text or (len(self._replace_stack) > 0 and self._replace_stack[-1])
-
     def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
         self._collect_tokens(state)
+        start_book_tokens: List[UsfmToken] = []
         if self._id_text is not None:
-            self._tokens.append(UsfmToken(UsfmTokenType.TEXT, text=self._id_text + " "))
-        self._replace_stack.append(self._id_text is not None)
+            start_book_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=self._id_text + " "))
+        self._push_new_tokens(start_book_tokens)
 
         super().start_book(state, marker, code)
 
     def end_book(self, state: UsfmParserState, marker: str) -> None:
-        self._replace_stack.pop()
+        self._pop_new_tokens()
 
         super().end_book(state, marker)
 
@@ -127,7 +124,7 @@ def start_char(
         unknown: bool,
         attributes: List[UsfmAttribute],
     ) -> None:
-        if self.replace_text:
+        if self._replace_with_new_tokens(state):
             self._skip_tokens(state)
         else:
             self._collect_tokens(state)
@@ -141,7 +138,7 @@ def end_char(
         attributes: List[UsfmAttribute],
         closed: bool,
     ) -> None:
-        if closed and self.replace_text:
+        if closed and self._replace_with_new_tokens(state):
             self._skip_tokens(state)
 
         super().end_char(state, marker, attributes, closed)
@@ -153,7 +150,7 @@ def start_note(
         caller: str,
         category: str,
     ) -> None:
-        if self.replace_text:
+        if self._replace_with_new_tokens(state):
             self._skip_tokens(state)
         else:
             self._collect_tokens(state)
@@ -161,13 +158,13 @@ def start_note(
         super().start_note(state, marker, caller, category)
 
     def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None:
-        if closed and self.replace_text:
+        if closed and self._replace_with_new_tokens(state):
             self._skip_tokens(state)
 
         super().end_note(state, marker, closed)
 
     def ref(self, state: UsfmParserState, marker: str, display: str, target: str) -> None:
-        if self.replace_text:
+        if self._replace_with_new_tokens(state):
             self._skip_tokens(state)
         else:
             self._collect_tokens(state)
@@ -175,7 +172,7 @@ def ref(self, state: UsfmParserState, marker: str, display: str, target: str) ->
         super().ref(state, marker, display, target)
 
     def text(self, state: UsfmParserState, text: str) -> None:
-        if self.replace_text:
+        if self._replace_with_new_tokens(state):
             self._skip_tokens(state)
         else:
             self._collect_tokens(state)
@@ -183,7 +180,7 @@ def text(self, state: UsfmParserState, text: str) -> None:
         super().text(state, text)
 
     def opt_break(self, state: UsfmParserState) -> None:
-        if self.replace_text:
+        if self._replace_with_new_tokens(state):
             self._skip_tokens(state)
         else:
             self._collect_tokens(state)
@@ -191,7 +188,7 @@ def opt_break(self, state: UsfmParserState) -> None:
         super().opt_break(state)
 
     def unmatched(self, state: UsfmParserState, marker: str) -> None:
-        if self.replace_text:
+        if self._replace_with_new_tokens(state):
             self._skip_tokens(state)
         else:
             self._collect_tokens(state)
@@ -200,38 +197,37 @@ def unmatched(self, state: UsfmParserState, marker: str) -> None:
 
     def _start_verse_text(self, state: UsfmParserState, scripture_refs: List[ScriptureRef]) -> None:
         row_texts: List[str] = self._advance_rows(scripture_refs)
-        self._tokens.extend(UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts)
-        self._replace_stack.append(len(row_texts) > 0)
+        self._push_new_tokens([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
 
     def _end_verse_text(self, state: UsfmParserState, scripture_refs: List[ScriptureRef]) -> None:
-        self._replace_stack.pop()
+        self._pop_new_tokens()
 
     def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
         row_texts = self._advance_rows([scripture_ref])
-        self._tokens.extend(UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts)
-        self._replace_stack.append(len(row_texts) > 0)
+        self._push_new_tokens([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
 
     def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
-        self._replace_stack.pop()
+        self._pop_new_tokens()
 
     def _start_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
         row_texts = self._advance_rows([scripture_ref])
+        new_tokens: List[UsfmToken] = []
         if len(row_texts) > 0:
             if state.token is None:
                 raise ValueError("Invalid parser state.")
-            self._tokens.append(state.token)
-            self._tokens.append(UsfmToken(UsfmTokenType.CHARACTER, "ft", None, "ft*"))
+            new_tokens.append(state.token)
+            new_tokens.append(UsfmToken(UsfmTokenType.CHARACTER, "ft", None, "ft*"))
             for i, text in enumerate(row_texts):
                 if i < len(row_texts) - 1:
                     text += " "
-                self._tokens.append(UsfmToken(UsfmTokenType.TEXT, text=text))
-            self._tokens.append(UsfmToken(UsfmTokenType.END, state.token.end_marker, None, None))
-            self._replace_stack.append(True)
+                new_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=text))
+            new_tokens.append(UsfmToken(UsfmTokenType.END, state.token.end_marker, None, None))
+            self._push_new_tokens(new_tokens)
         else:
-            self._replace_stack.append(self._replace_stack[-1])
+            self._push_token_as_previous()
 
     def _end_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
-        self._replace_stack.pop()
+        self._pop_new_tokens()
 
     def get_usfm(self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty") -> str:
         if isinstance(stylesheet, str):
@@ -241,36 +237,61 @@ def get_usfm(self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty") -> str:
 
     def _advance_rows(self, seg_scr_refs: List[ScriptureRef]) -> List[str]:
         row_texts: List[str] = []
-        i = 0
-        while self._row_index < len(self._rows) and i < len(seg_scr_refs):
+        source_index: int = 0
+        while self._row_index < len(self._rows) and source_index < len(seg_scr_refs):
+            compare: int = 0
             row_scr_refs, text = self._rows[self._row_index]
-            stop = False
             for row_scr_ref in row_scr_refs:
-                found = False
-                for seg_scr_ref in seg_scr_refs[i:]:
-                    compare = row_scr_ref.compare_to(
-                        seg_scr_refs[i], compare_segments=False, strict=self._strict_comparison
-                    )
-                    if compare == 0:
-                        row_texts.append(text)
-                        i += 1
-                        found = True
+                while source_index < len(seg_scr_refs):
+                    compare = row_scr_ref.compare_to(seg_scr_refs[source_index], compare_segments=False)
+                    if compare > 0:
+                        # source is ahead of row, increment source
+                        source_index += 1
+                    else:
                         break
-                    elif compare > 0:
-                        stop = True
-                        break
-                if stop or found:
+                if compare == 0:
+                    # source and row match
+                    # grab the text and increment both
+                    row_texts.append(text)
+                    source_index += 1
                     break
-            if stop:
-                break
-            else:
+            if compare <= 0:
+                # row is ahead of source, increment row
                 self._row_index += 1
         return row_texts
 
     def _collect_tokens(self, state: UsfmParserState) -> None:
+        self._tokens.extend(self._new_tokens)
+        self._new_tokens.clear()
         while self._token_index <= state.index + state.special_token_count:
             self._tokens.append(state.tokens[self._token_index])
             self._token_index += 1
 
     def _skip_tokens(self, state: UsfmParserState) -> None:
         self._token_index = state.index + 1 + state.special_token_count
+
+    def _replace_with_new_tokens(self, state: UsfmParserState) -> bool:
+        new_text: bool = len(self._replace_stack) > 0 and self._replace_stack[-1]
+        token_end: int = state.index + state.special_token_count + 1
+        existing_text: bool = False
+        for index in range(self._token_index, token_end + 1):
+            if state.tokens[index].type == UsfmTokenType.TEXT and state.tokens[index].text:
+                existing_text = True
+                break
+        use_new_tokens: bool = (
+            self._strip_all_text or (new_text and not existing_text) or (new_text and not self._prefer_existing_text)
+        )
+        if use_new_tokens:
+            self._tokens.extend(self._new_tokens)
+        self._new_tokens.clear()
+        return use_new_tokens
+
+    def _push_new_tokens(self, tokens: List[UsfmToken]) -> None:
+        self._replace_stack.append(any(tokens))
+        self._new_tokens.extend(tokens)
+
+    def _push_token_as_previous(self) -> None:
+        self._replace_stack.append(self._replace_stack[-1])
+
+    def _pop_new_tokens(self) -> None:
+        self._replace_stack.pop()
diff --git a/machine/corpora/usfm_tokenizer.py b/machine/corpora/usfm_tokenizer.py
index 54a5e69..0a25b43 100644
--- a/machine/corpora/usfm_tokenizer.py
+++ b/machine/corpora/usfm_tokenizer.py
@@ -212,6 +212,7 @@ def tokenize(self, usfm: str, preserve_whitespace: bool = False) -> Sequence[Usf
     def detokenize(self, tokens: Iterable[UsfmToken], tokens_have_whitespace: bool = False) -> str:
         prev_token: Optional[UsfmToken] = None
         usfm = ""
+        in_book = False
         for token in tokens:
             token_usfm = ""
             if token.type in {UsfmTokenType.BOOK, UsfmTokenType.CHAPTER, UsfmTokenType.PARAGRAPH}:
@@ -224,6 +225,7 @@ def detokenize(self, tokens: Iterable[UsfmToken], tokens_have_whitespace: bool =
                     if not tokens_have_whitespace:
                         usfm += "\r\n"
                 token_usfm = token.to_usfm()
+                in_book = token.type == UsfmTokenType.BOOK
             elif token.type is UsfmTokenType.VERSE:
                 # Add newline if after anything other than [ or (
                 if len(usfm) > 0 and usfm[-1] != "[" and usfm[-1] != "(":
@@ -242,7 +244,7 @@ def detokenize(self, tokens: Iterable[UsfmToken], tokens_have_whitespace: bool =
                         "\u200e" if self.rtl_reference_order is RtlReferenceOrder.BOOK_VERSE_CHAPTER else "\u200f"
                     )
                     token_usfm = _RTL_VERSE_REGEX.sub(token_usfm, f"$1{direction_marker}$2")
-
+                in_book = False
             elif token.type is UsfmTokenType.TEXT:
                 # Ensure spaces are preserved
                 token_usfm = token.to_usfm()
@@ -257,7 +259,15 @@ def detokenize(self, tokens: Iterable[UsfmToken], tokens_have_whitespace: bool =
                     else:
                         token_usfm = token_usfm.lstrip(" ")
             else:
+                if in_book:
+                    if usfm[-1] == " " and (
+                        (prev_token is not None and prev_token.to_usfm().strip() != "") or not tokens_have_whitespace
+                    ):
+                        usfm = usfm[:-1]
+                    if not tokens_have_whitespace:
+                        usfm += "\r\n"
                 token_usfm = token.to_usfm()
+                in_book = False
 
             usfm += token_usfm
             prev_token = token
diff --git a/machine/corpora/usfm_zip_text.py b/machine/corpora/usfm_zip_text.py
index 8e85570..0b4f44b 100644
--- a/machine/corpora/usfm_zip_text.py
+++ b/machine/corpora/usfm_zip_text.py
@@ -1,6 +1,4 @@
-from io import TextIOWrapper
 from typing import Optional
-from zipfile import ZipFile
 
 from ..scripture.verse_ref import Versification
 from ..utils.typeshed import StrPath
@@ -15,38 +13,17 @@ def __init__(
         self,
         stylesheet: UsfmStylesheet,
         encoding: str,
+        id: str,
         archive_filename: StrPath,
         path: str,
         versification: Optional[Versification] = None,
         include_markers: bool = False,
         include_all_text: bool = False,
+        project: Optional[str] = None,
     ) -> None:
-        super().__init__(
-            _get_id(archive_filename, path, encoding),
-            stylesheet,
-            encoding,
-            versification,
-            include_markers,
-            include_all_text,
-        )
+        super().__init__(id, stylesheet, encoding, versification, include_markers, include_all_text, project)
         self._archive_filename = archive_filename
         self._path = path
 
     def _create_stream_container(self) -> StreamContainer:
         return ZipEntryStreamContainer(self._archive_filename, self._path)
-
-
-def _get_id(archive_filename: StrPath, path: str, encoding: str) -> str:
-    with ZipFile(archive_filename, "r") as archive:
-        entry = next((zi for zi in archive.filelist if zi.filename == path))
-        with archive.open(entry, "r") as file:
-            stream = TextIOWrapper(file, encoding=encoding)
-            for line in stream:
-                line = line.strip()
-                if line.startswith("\\id "):
-                    id = line[4:]
-                    index = id.find(" ")
-                    if index != -1:
-                        id = id[:index]
-                    return id.strip().upper()
-    raise RuntimeError("The USFM does not contain an 'id' marker.")
diff --git a/machine/scripture/verse_ref.py b/machine/scripture/verse_ref.py
index 2bfa081..cf077b2 100644
--- a/machine/scripture/verse_ref.py
+++ b/machine/scripture/verse_ref.py
@@ -407,7 +407,11 @@ def _compare_verses(self, other: VerseRef, compare_segments: bool) -> int:
             result = verse.compare_to(other_verse, compare_all_verses=False, compare_segments=compare_segments)
             if result != 0:
                 return result
-        return len(verse_list) - len(other_verse_list)
+        if len(verse_list) < len(other_verse_list):
+            return -1
+        elif len(verse_list) > len(other_verse_list):
+            return 1
+        return 0
 
     def _validate_single_verse(self) -> ValidStatus:
         # Unknown versification is always invalid
diff --git a/tests/corpora/test_paratext_backup_text_corpus.py b/tests/corpora/test_paratext_backup_text_corpus.py
index 61d067a..57907c3 100644
--- a/tests/corpora/test_paratext_backup_text_corpus.py
+++ b/tests/corpora/test_paratext_backup_text_corpus.py
@@ -11,7 +11,7 @@
 
 def test_texts() -> None:
     with _TestEnvironment() as env:
-        assert [t.id for t in env.corpus.texts] == ["LEV", "1CH", "MAT", "MRK"]
+        assert [t.id for t in env.corpus.texts] == ["LEV", "1CH", "MAT", "MRK", "JHN"]
 
 
 def test_get_text() -> None:
@@ -23,6 +23,10 @@ def test_get_text() -> None:
         luk = env.corpus.get_text("LUK")
         assert luk is None
 
+        jhn = env.corpus.get_text("JHN")
+        assert jhn is not None
+        assert not any(jhn.get_rows())
+
 
 class _TestEnvironment(ContextManager["_TestEnvironment"]):
     def __init__(self) -> None:
diff --git a/tests/corpora/test_paratext_project_settings.py b/tests/corpora/test_paratext_project_settings.py
index f30f836..039dde8 100644
--- a/tests/corpora/test_paratext_project_settings.py
+++ b/tests/corpora/test_paratext_project_settings.py
@@ -43,6 +43,77 @@ def test_get_book_file_name_book_num_prefix_c() -> None:
     assert settings.get_book_file_name("3MQ") == "PROJC0.SFM"
 
 
+def test_get_book_id_book_num() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJ42.SFM") == "MRK"
+
+
+def test_get_book_id_book_num_book_id() -> None:
+    settings = _create_settings("41MAT")
+    assert settings.get_book_id("PROJ42MRK.SFM") == "MRK"
+
+
+def test_get_book_id_book_id() -> None:
+    settings = _create_settings("MAT")
+    assert settings.get_book_id("PROJMRK.SFM") == "MRK"
+
+
+def test_get_book_id_book_num_double_digit() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJ01.SFM") == "GEN"
+
+
+def test_get_book_id_book_num_xxg_book_num() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJ100.SFM") == "XXG"
+
+
+def test_get_book_id_book_num_xxg_book_num_book_id() -> None:
+    settings = _create_settings("41MAT")
+    assert settings.get_book_id("PROJ100XXG.SFM") == "XXG"
+
+
+def test_get_book_id_book_num_prefix_a() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJA0.SFM") == "FRT"
+
+
+def test_get_book_id_book_num_prefix_b() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJB0.SFM") == "TDX"
+
+
+def test_get_book_id_book_num_prefix_c() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJC0.SFM") == "3MQ"
+
+
+def test_get_book_id_wrong_prefix() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("WRONG42.SFM") is None
+
+
+def test_get_book_id_wrong_suffix() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJ42.WRONG") is None
+
+
+def test_get_book_id_wrong_book_part_book_num() -> None:
+    settings = _create_settings("41")
+    assert settings.get_book_id("PROJ42MRK.SFM") is None
+
+
+def test_get_book_id_wrong_book_part_book_id() -> None:
+    settings = _create_settings("MAT")
+    assert settings.get_book_id("PROJ42.SFM") is None
+
+
+def test_get_book_id_wrong_book_part_book_num_book_id() -> None:
+    settings = _create_settings("41MAT")
+    assert settings.get_book_id("PROJMRK.SFM") is None
+    assert settings.get_book_id("PROJ100.SFM") is None
+
+
 def _create_settings(file_name_form: str) -> ParatextProjectSettings:
     return ParatextProjectSettings(
         "Name",
diff --git a/tests/corpora/test_scripture_ref.py b/tests/corpora/test_scripture_ref.py
index 3247d39..8674082 100644
--- a/tests/corpora/test_scripture_ref.py
+++ b/tests/corpora/test_scripture_ref.py
@@ -1,45 +1,48 @@
+from pytest import raises
+
 from machine.corpora import ScriptureRef
 
 
-def test_compare_to_strict():
-    assert compare_to_strict("MAT 1:1", "MAT 1:2") == -1, "VerseLessThan"
-    assert compare_to_strict("MAT 1:1", "MAT 1:1") == 0, "VerseEqualTo"
-    assert compare_to_strict("MAT 1:2", "MAT 1:1") == 1, "VerseGreaterThan"
-    assert compare_to_strict("MAT 1:0/1:p", "MAT 1:0/2:p") == -1, "NonVerseLessThan"
-    assert compare_to_strict("MAT 1:0/1:p", "MAT 1:0/1:p") == 0, "NonVerseEqualTo"
-    assert compare_to_strict("MAT 1:0/2:p", "MAT 1:0/1:p") == 1, "NonVerseGreaterThan"
-    assert compare_to_strict("MAT 1:0/1:esb", "MAT 1:0/1:esb/1:p") == -1, "NonVerseParentChild"
+def test_compare_to():
+    assert compare_to("MAT 1:1", "MAT 1:2") == -1, "VerseLessThan"
+    assert compare_to("MAT 1:1", "MAT 1:1") == 0, "VerseEqualTo"
+    assert compare_to("MAT 1:2", "MAT 1:1") == 1, "VerseGreaterThan"
+    assert compare_to("MAT 1:1-3", "MAT 1:1") == 1, "MultiVerseExtensionGreaterThan"
+    assert compare_to("MAT 1:1", "MAT 1:1-3") == -1, "MultiVerseExtensionLessThan"
+    assert compare_to("MAT 1:1-3", "MAT 1:2") == -1, "MultiVerseStartLessThan"
+    assert compare_to("MAT 1:2", "MAT 1:1-3") == 1, "MultiVerseEndGreaterThan"
+    assert compare_to("MAT 1:0/1:p", "MAT 1:0/2:p") == -1, "NonVerseLessThan"
+    assert compare_to("MAT 1:0/1:p", "MAT 1:0/1:p") == 0, "NonVerseEqualTo"
+    assert compare_to("MAT 1:0/2:p", "MAT 1:0/1:p") == 1, "NonVerseGreaterThan"
+    assert compare_to("MAT 1:0/1:esb", "MAT 1:0/1:esb/1:p") == -1, "NonVerseParentChild"
+    assert compare_to("MAT 1:0/2:esb", "MAT 1:0/1:esb/1:p") == 1, "NonVerseParentOtherChild"
+    assert compare_to("MAT 1:0/p", "MAT 1:0/2:p") == 0, "RelaxedSameMarker"
+    assert compare_to("MAT 1:0/p", "MAT 1:0/2:esb") == 1, "RelaxedSameLevel"
+    assert compare_to("MAT 1:0/esb", "MAT 1:0/1:esb/1:p") == -1, "RelaxedParentChild"
+    assert compare_to("MAT 1:0/2:esb", "MAT 1:0/esb/p") == -1, "ParentRelaxedChild"
 
 
-def test_compare_to_relaxed():
-    assert compare_to_relaxed("MAT 1:1", "MAT 1:2") == -1, "VerseLessThan"
-    assert compare_to_relaxed("MAT 1:1", "MAT 1:1") == 0, "VerseEqualTo"
-    assert compare_to_relaxed("MAT 1:2", "MAT 1:1") == 1, "VerseGreaterThan"
-    assert compare_to_relaxed("MAT 1:0/1:p", "MAT 1:0/2:p") == 0, "NonVerseSameMarkerDifferentPosition"
-    assert compare_to_relaxed("MAT 1:0/2:esb", "MAT 1:0/1:esb/1:p") == -1, "NonVerseParentChild"
+def test_is_equal_to():
+    ref1 = ScriptureRef.parse("MAT 1:1/1:p")
+    ref1dup = ScriptureRef.parse("MAT 1:1/1:p")
+    ref2 = ScriptureRef.parse("MAT 1:2/1:p")
+    obj1 = "A different type"
 
+    assert ref1 == ref1dup
+    assert ref1 != ref2
+    assert ref1 != obj1
 
-def compare_to_strict(ref1_str, ref2_str):
-    ref1 = ScriptureRef.parse(ref1_str)
-    ref2 = ScriptureRef.parse(ref2_str)
 
-    result = ref1.compare_to(ref2)
+def test_is_equal_to_throws_argument_exception():
+    ref1 = ScriptureRef.parse("MAT 1:1/1:p")
+    obj1 = "A different type"
 
-    if result < 0:
-        result = -1
-    elif result > 0:
-        result = 1
-    return result
+    with raises(TypeError):
+        ref1.compare_to(obj1)
 
 
-def compare_to_relaxed(ref1_str, ref2_str):
+def compare_to(ref1_str, ref2_str):
     ref1 = ScriptureRef.parse(ref1_str)
     ref2 = ScriptureRef.parse(ref2_str)
 
-    result = ref1.compare_to(ref2, strict=False)
-
-    if result < 0:
-        result = -1
-    elif result > 0:
-        result = 1
-    return result
+    return ref1.compare_to(ref2)
diff --git a/tests/corpora/test_usfm_file_text.py b/tests/corpora/test_usfm_file_text.py
index 33679ae..9bf3afc 100644
--- a/tests/corpora/test_usfm_file_text.py
+++ b/tests/corpora/test_usfm_file_text.py
@@ -10,7 +10,7 @@ def test_get_rows_nonempty_text() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 19
+    assert len(rows) == 23
 
     assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:1", corpus.versification)
     assert rows[0].text == "Chapter one, verse one."
@@ -21,41 +21,41 @@ def test_get_rows_nonempty_text() -> None:
     assert scripture_ref(rows[4]) == ScriptureRef.parse("MAT 1:5", corpus.versification)
     assert rows[4].text == "Chapter one, verse five."
 
-    assert scripture_ref(rows[5]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
-    assert rows[5].text == "Chapter two, verse one."
+    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
+    assert rows[8].text == "Chapter two, verse one."
 
-    assert scripture_ref(rows[6]) == ScriptureRef.parse("MAT 2:2", corpus.versification)
-    assert rows[6].text == "Chapter two, verse two. Chapter two, verse three."
-    assert rows[6].is_in_range
-    assert rows[6].is_range_start
+    assert scripture_ref(rows[9]) == ScriptureRef.parse("MAT 2:2", corpus.versification)
+    assert rows[9].text == "Chapter two, verse two. Chapter two, verse three."
+    assert rows[9].is_in_range
+    assert rows[9].is_range_start
 
-    assert scripture_ref(rows[7]) == ScriptureRef.parse("MAT 2:3", corpus.versification)
-    assert len(rows[7].segment) == 0
-    assert rows[7].is_in_range
-    assert not rows[7].is_range_start
+    assert scripture_ref(rows[10]) == ScriptureRef.parse("MAT 2:3", corpus.versification)
+    assert len(rows[10].segment) == 0
+    assert rows[10].is_in_range
+    assert not rows[10].is_range_start
 
-    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 2:4a", corpus.versification)
-    assert len(rows[8].segment) == 0
-    assert rows[8].is_in_range
-    assert not rows[8].is_range_start
+    assert scripture_ref(rows[11]) == ScriptureRef.parse("MAT 2:4a", corpus.versification)
+    assert len(rows[11].segment) == 0
+    assert rows[11].is_in_range
+    assert not rows[11].is_range_start
 
-    assert scripture_ref(rows[9]) == ScriptureRef.parse("MAT 2:4b", corpus.versification)
-    assert rows[9].text == "Chapter two, verse four."
+    assert scripture_ref(rows[12]) == ScriptureRef.parse("MAT 2:4b", corpus.versification)
+    assert rows[12].text == "Chapter two, verse four."
 
-    assert scripture_ref(rows[10]) == ScriptureRef.parse("MAT 2:5", corpus.versification)
-    assert rows[10].text == "Chapter two, verse five."
+    assert scripture_ref(rows[13]) == ScriptureRef.parse("MAT 2:5", corpus.versification)
+    assert rows[13].text == "Chapter two, verse five."
 
-    assert scripture_ref(rows[11]) == ScriptureRef.parse("MAT 2:6", corpus.versification)
-    assert rows[11].text == "Chapter two, verse six."
+    assert scripture_ref(rows[14]) == ScriptureRef.parse("MAT 2:6", corpus.versification)
+    assert rows[14].text == "Chapter two, verse six."
 
-    assert scripture_ref(rows[15]) == ScriptureRef.parse("MAT 2:9", corpus.versification)
-    assert rows[15].text == "Chapter 2 verse 9"
+    assert scripture_ref(rows[18]) == ScriptureRef.parse("MAT 2:9", corpus.versification)
+    assert rows[18].text == "Chapter 2 verse 9"
 
-    assert scripture_ref(rows[16]) == ScriptureRef.parse("MAT 2:10", corpus.versification)
-    assert rows[16].text == "Chapter 2 verse 10"
+    assert scripture_ref(rows[19]) == ScriptureRef.parse("MAT 2:10", corpus.versification)
+    assert rows[19].text == "Chapter 2 verse 10"
 
-    assert scripture_ref(rows[17]) == ScriptureRef.parse("MAT 2:11", corpus.versification)
-    assert not rows[17].text
+    assert scripture_ref(rows[20]) == ScriptureRef.parse("MAT 2:11", corpus.versification)
+    assert not rows[20].text
 
 
 def test_get_rows_nonempty_text_all_text() -> None:
@@ -65,7 +65,7 @@ def test_get_rows_nonempty_text_all_text() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 36
+    assert len(rows) == 49
 
     assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/1:h", corpus.versification)
     assert rows[0].text == "Matthew"
@@ -79,44 +79,53 @@ def test_get_rows_nonempty_text_all_text() -> None:
     assert scripture_ref(rows[3]) == ScriptureRef.parse("MAT 1:0/3:ip/1:fe", corpus.versification)
     assert rows[3].text == "This is an endnote."
 
-    assert scripture_ref(rows[4]) == ScriptureRef.parse("Mat 1:0/4:s", corpus.versification)
-    assert rows[4].text == "Chapter One"
+    assert scripture_ref(rows[4]) == ScriptureRef.parse("Mat 1:0/4:p", corpus.versification)
+    assert rows[4].text == "Here is another paragraph."
 
-    assert scripture_ref(rows[6]) == ScriptureRef.parse("MAT 1:1/1:f", corpus.versification)
-    assert rows[6].text == "1:1: This is a footnote."
+    assert scripture_ref(rows[7]) == ScriptureRef.parse("MAT 1:0/7:weirdtaglookingthing", corpus.versification)
+    assert rows[7].text == "that is not an actual tag."
 
-    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 1:2/1:f", corpus.versification)
-    assert rows[8].text == "1:2: This is a footnote."
+    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 1:0/8:s", corpus.versification)
+    assert rows[8].text == "Chapter One"
 
-    assert scripture_ref(rows[12]) == ScriptureRef.parse("MAT 2:0/1:tr/1:tc1", corpus.versification)
-    assert rows[12].text == "Row one, column one."
+    assert scripture_ref(rows[10]) == ScriptureRef.parse("MAT 1:1/1:f", corpus.versification)
+    assert rows[10].text == "1:1: This is a footnote."
 
-    assert scripture_ref(rows[13]) == ScriptureRef.parse("MAT 2:0/1:tr/2:tc2", corpus.versification)
-    assert rows[13].text == "Row one, column two."
+    assert scripture_ref(rows[12]) == ScriptureRef.parse("MAT 1:2/1:f", corpus.versification)
+    assert rows[12].text == "1:2: This is a footnote."
 
-    assert scripture_ref(rows[14]) == ScriptureRef.parse("MAT 2:0/2:tr/1:tc1", corpus.versification)
-    assert rows[14].text == "Row two, column one."
+    assert scripture_ref(rows[19]) == ScriptureRef.parse("MAT 2:0/1:tr/1:tc1", corpus.versification)
+    assert rows[19].text == "Row one, column one."
 
-    assert scripture_ref(rows[15]) == ScriptureRef.parse("MAT 2:0/2:tr/2:tc2", corpus.versification)
-    assert rows[15].text == "Row two, column two."
+    assert scripture_ref(rows[20]) == ScriptureRef.parse("MAT 2:0/1:tr/2:tc2", corpus.versification)
+    assert rows[20].text == "Row one, column two."
 
-    assert scripture_ref(rows[16]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
-    assert rows[16].text == "Chapter Two"
+    assert scripture_ref(rows[21]) == ScriptureRef.parse("MAT 2:0/2:tr/1:tc1", corpus.versification)
+    assert rows[21].text == "Row two, column one."
 
-    assert scripture_ref(rows[18]) == ScriptureRef.parse("MAT 2:1/1:f", corpus.versification)
-    assert rows[18].text == "2:1: This is a footnote."
+    assert scripture_ref(rows[22]) == ScriptureRef.parse("MAT 2:0/2:tr/2:tc2", corpus.versification)
+    assert rows[22].text == "Row two, column two."
 
-    assert scripture_ref(rows[21]) == ScriptureRef.parse("MAT 2:3/1:esb/1:ms", corpus.versification)
-    assert rows[21].text == "This is a sidebar"
+    assert scripture_ref(rows[23]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
+    assert rows[23].text == "Chapter Two"
 
-    assert scripture_ref(rows[22]) == ScriptureRef.parse("MAT 2:3/1:esb/2:p", corpus.versification)
-    assert rows[22].text == "Here is some sidebar content."
+    assert scripture_ref(rows[24]) == ScriptureRef.parse("MAT 2:0/4:p", corpus.versification)
+    assert not rows[24].text
 
-    assert scripture_ref(rows[28]) == ScriptureRef.parse("MAT 2:7a/1:s", corpus.versification)
-    assert rows[28].text == "Section header"
+    assert scripture_ref(rows[26]) == ScriptureRef.parse("MAT 2:1/1:f", corpus.versification)
+    assert rows[26].text == "2:1: This is a footnote."
 
-    assert scripture_ref(rows[35]) == ScriptureRef.parse("MAT 2:12/1:restore", corpus.versification)
-    assert rows[35].text == "restore information"
+    assert scripture_ref(rows[29]) == ScriptureRef.parse("MAT 2:3/1:esb/1:ms", corpus.versification)
+    assert rows[29].text == "This is a sidebar"
+
+    assert scripture_ref(rows[30]) == ScriptureRef.parse("MAT 2:3/1:esb/2:p", corpus.versification)
+    assert rows[30].text == "Here is some sidebar content."
+
+    assert scripture_ref(rows[36]) == ScriptureRef.parse("MAT 2:7a/1:s", corpus.versification)
+    assert rows[36].text == "Section header"
+
+    assert scripture_ref(rows[43]) == ScriptureRef.parse("MAT 2:12/1:restore", corpus.versification)
+    assert rows[43].text == "restore information"
 
 
 def test_get_rows_sentence_start() -> None:
@@ -126,7 +135,7 @@ def test_get_rows_sentence_start() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 19
+    assert len(rows) == 23
 
     assert scripture_ref(rows[3]) == ScriptureRef.parse("MAT 1:4", corpus.versification)
     assert rows[3].text == "Chapter one, verse four,"
@@ -154,7 +163,7 @@ def test_get_rows_include_markers() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 19
+    assert len(rows) == 23
 
     assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:1", corpus.versification)
     assert (
@@ -167,38 +176,38 @@ def test_get_rows_include_markers() -> None:
     assert scripture_ref(rows[4]) == ScriptureRef.parse("MAT 1:5", corpus.versification)
     assert rows[4].text == 'Chapter one, \\li2 verse \\fig Figure 1|src="image1.png" size="col" ref="1:5"\\fig* five.'
 
-    assert scripture_ref(rows[5]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
-    assert rows[5].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
+    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
+    assert rows[8].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
 
-    assert scripture_ref(rows[6]) == ScriptureRef.parse("MAT 2:2", corpus.versification)
-    assert rows[6].text == "Chapter two, // verse \\fm ∆\\fm*two. Chapter two, verse \\w three|lemma\\w*."
-    assert rows[6].is_in_range
-    assert rows[6].is_range_start
+    assert scripture_ref(rows[9]) == ScriptureRef.parse("MAT 2:2", corpus.versification)
+    assert rows[9].text == "Chapter two, // verse \\fm ∆\\fm*two. Chapter two, verse \\w three|lemma\\w*."
+    assert rows[9].is_in_range
+    assert rows[9].is_range_start
 
-    assert scripture_ref(rows[7]) == ScriptureRef.parse("MAT 2:3", corpus.versification)
-    assert len(rows[7].segment) == 0
-    assert rows[7].is_in_range
-    assert not rows[7].is_range_start
+    assert scripture_ref(rows[10]) == ScriptureRef.parse("MAT 2:3", corpus.versification)
+    assert len(rows[10].segment) == 0
+    assert rows[10].is_in_range
+    assert not rows[10].is_range_start
 
-    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 2:4a", corpus.versification)
-    assert len(rows[8].segment) == 0
-    assert rows[8].is_in_range
-    assert not rows[8].is_range_start
+    assert scripture_ref(rows[11]) == ScriptureRef.parse("MAT 2:4a", corpus.versification)
+    assert len(rows[11].segment) == 0
+    assert rows[11].is_in_range
+    assert not rows[11].is_range_start
 
-    assert scripture_ref(rows[9]) == ScriptureRef.parse("MAT 2:4b", corpus.versification)
-    assert rows[9].text == "Chapter two, verse four."
+    assert scripture_ref(rows[12]) == ScriptureRef.parse("MAT 2:4b", corpus.versification)
+    assert rows[12].text == "Chapter two, verse four."
 
-    assert scripture_ref(rows[10]) == ScriptureRef.parse("MAT 2:5", corpus.versification)
-    assert rows[10].text == "Chapter two, verse five \\rq (MAT 3:1)\\rq*."
+    assert scripture_ref(rows[13]) == ScriptureRef.parse("MAT 2:5", corpus.versification)
+    assert rows[13].text == "Chapter two, verse five \\rq (MAT 3:1)\\rq*."
 
-    assert scripture_ref(rows[11]) == ScriptureRef.parse("MAT 2:6", corpus.versification)
-    assert rows[11].text == 'Chapter two, verse \\w six|strong="12345" \\w*.'
+    assert scripture_ref(rows[14]) == ScriptureRef.parse("MAT 2:6", corpus.versification)
+    assert rows[14].text == 'Chapter two, verse \\w six|strong="12345" \\w*.'
 
-    assert scripture_ref(rows[15]) == ScriptureRef.parse("MAT 2:9", corpus.versification)
-    assert rows[15].text == "Chapter\\tcr2 2\\tc3 verse\\tcr4 9"
+    assert scripture_ref(rows[18]) == ScriptureRef.parse("MAT 2:9", corpus.versification)
+    assert rows[18].text == "Chapter\\tcr2 2\\tc3 verse\\tcr4 9"
 
-    assert scripture_ref(rows[16]) == ScriptureRef.parse("MAT 2:10", corpus.versification)
-    assert rows[16].text == "\\tc3-4 Chapter 2 verse 10"
+    assert scripture_ref(rows[19]) == ScriptureRef.parse("MAT 2:10", corpus.versification)
+    assert rows[19].text == "\\tc3-4 Chapter 2 verse 10"
 
 
 def test_get_rows_include_markers_all_text() -> None:
@@ -209,30 +218,30 @@ def test_get_rows_include_markers_all_text() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 32
+    assert len(rows) == 45
 
     assert scripture_ref(rows[2]) == ScriptureRef.parse("MAT 1:0/3:ip", corpus.versification)
     assert rows[2].text == "An introduction to Matthew\\fe + \\ft This is an endnote.\\fe*"
 
-    assert scripture_ref(rows[4]) == ScriptureRef.parse("MAT 1:1", corpus.versification)
+    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 1:1", corpus.versification)
     assert (
-        rows[4].text == "Chapter \\pn one\\+pro WON\\+pro*\\pn*, verse one.\\f + \\fr 1:1: \\ft This is a footnote.\\f*"
+        rows[8].text == "Chapter \\pn one\\+pro WON\\+pro*\\pn*, verse one.\\f + \\fr 1:1: \\ft This is a footnote.\\f*"
     )
 
-    assert scripture_ref(rows[5]) == ScriptureRef.parse("MAT 1:2", corpus.versification)
-    assert rows[5].text == "\\bd C\\bd*hapter one, \\li2 verse\\f + \\fr 1:2: \\ft This is a footnote.\\f* two."
+    assert scripture_ref(rows[9]) == ScriptureRef.parse("MAT 1:2", corpus.versification)
+    assert rows[9].text == "\\bd C\\bd*hapter one, \\li2 verse\\f + \\fr 1:2: \\ft This is a footnote.\\f* two."
 
-    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 1:5", corpus.versification)
-    assert rows[8].text == 'Chapter one, \\li2 verse \\fig Figure 1|src="image1.png" size="col" ref="1:5"\\fig* five.'
+    assert scripture_ref(rows[12]) == ScriptureRef.parse("MAT 1:5", corpus.versification)
+    assert rows[12].text == 'Chapter one, \\li2 verse \\fig Figure 1|src="image1.png" size="col" ref="1:5"\\fig* five.'
 
-    assert scripture_ref(rows[13]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
-    assert rows[13].text == "Chapter \\it Two \\it*"
+    assert scripture_ref(rows[20]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
+    assert rows[20].text == "Chapter \\it Two \\it*"
 
-    assert scripture_ref(rows[14]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
-    assert rows[14].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
+    assert scripture_ref(rows[22]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
+    assert rows[22].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
 
-    assert scripture_ref(rows[18]) == ScriptureRef.parse("MAT 2:3/1:esb/2:p", corpus.versification)
-    assert rows[18].text == "Here is some sidebar // content."
+    assert scripture_ref(rows[26]) == ScriptureRef.parse("MAT 2:3/1:esb/2:p", corpus.versification)
+    assert rows[26].text == "Here is some sidebar // content."
 
 
 def test_usfm_file_text_corpus_lowercase_usfm_id() -> None:
diff --git a/tests/corpora/test_usfm_manual.py b/tests/corpora/test_usfm_manual.py
new file mode 100644
index 0000000..90aa4f9
--- /dev/null
+++ b/tests/corpora/test_usfm_manual.py
@@ -0,0 +1,77 @@
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Tuple
+
+import pytest
+from testutils.corpora_test_helpers import TEST_DATA_PATH, USFM_SOURCE_PROJECT_PATH, USFM_TARGET_PROJECT_PATH
+
+from machine.corpora import (
+    FileParatextProjectSettingsParser,
+    ParatextProjectSettings,
+    ParatextTextCorpus,
+    ScriptureRef,
+    StandardParallelTextCorpus,
+    UsfmTextUpdater,
+    parse_usfm,
+)
+
+
+@pytest.mark.skip(reason="This is for manual testing only. Remove this decorator to run the test.")
+def test_parse_parallel_corpus():
+    t_corpus = ParatextTextCorpus(USFM_TARGET_PROJECT_PATH, include_all_text=True, include_markers=True)
+    s_corpus = ParatextTextCorpus(USFM_SOURCE_PROJECT_PATH, include_all_text=True, include_markers=True)
+    p_corpus = StandardParallelTextCorpus(s_corpus, t_corpus, all_source_rows=True, all_target_rows=False)
+
+    rows = list(p_corpus.get_rows())
+    assert rows
+
+
+@dataclass
+class PretranslationDto:
+    text_id: str
+    refs: List[str]
+    translation: str
+
+    def __post_init__(self):
+        if self.text_id is None:
+            raise ValueError("text_id is a required field")
+        if self.refs is None:
+            raise ValueError("refs is a required field")
+        if self.translation is None:
+            raise ValueError("translation is a required field")
+
+
+PRETRANSLATION_PATH = TEST_DATA_PATH / "pretranslations.json"
+PARATEXT_PROJECT_PATH = TEST_DATA_PATH / "project"
+
+
+@pytest.mark.skip(reason="This is for manual testing only. Remove this decorator to run the test.")
+def test_create_usfm_file():
+    parser = FileParatextProjectSettingsParser(PARATEXT_PROJECT_PATH)
+    settings: ParatextProjectSettings = parser.parse()
+
+    # Read text from pretranslations file
+    with open(PRETRANSLATION_PATH, mode="r") as pretranslation_stream:
+        pretranslations_dto: List[PretranslationDto] = [
+            PretranslationDto(text_id=item["textId"], refs=item["refs"], translation=item["translation"])
+            for item in json.loads(pretranslation_stream.read())
+        ]
+
+    pretranslations: List[Tuple[List[ScriptureRef], str]] = [
+        (
+            [ScriptureRef.parse(ref, settings.versification).to_relaxed() for ref in p.refs] or [],
+            p.translation or "",
+        )
+        for p in pretranslations_dto
+    ]
+
+    for sfm_file_name in Path(PARATEXT_PROJECT_PATH).rglob(f"{settings.file_name_prefix}*{settings.file_name_suffix}"):
+        updater = UsfmTextUpdater(pretranslations, strip_all_text=True, prefer_existing_text=True)
+
+        with open(sfm_file_name, mode="r") as sfm_file:
+            usfm: str = sfm_file.read()
+
+        parse_usfm(usfm, updater, settings.stylesheet, settings.versification)
+        new_usfm: str = updater.get_usfm(settings.stylesheet)
+        assert new_usfm is not None
diff --git a/tests/corpora/test_usfm_text_updater.py b/tests/corpora/test_usfm_text_updater.py
index 80693ab..5545a69 100644
--- a/tests/corpora/test_usfm_text_updater.py
+++ b/tests/corpora/test_usfm_text_updater.py
@@ -30,6 +30,40 @@ def test_get_usfm_strip_all_text() -> None:
     assert "\\s\r\n" in target
 
 
+def test_get_usfm_prefer_existing():
+    rows = [
+        (
+            scr_ref("MAT 1:6"),
+            str("Text 6"),
+        ),
+        (
+            scr_ref("MAT 1:7"),
+            str("Text 7"),
+        ),
+    ]
+    target = update_usfm(rows, prefer_existing_text=True)
+    assert "\\id MAT - Test\r\n" in target
+    assert "\\v 6 Verse 6 content.\r\n" in target
+    assert "\\v 7 Text 7\r\n" in target
+
+
+def test_get_usfm_prefer_rows():
+    rows = [
+        (
+            scr_ref("MAT 1:6"),
+            str("Text 6"),
+        ),
+        (
+            scr_ref("MAT 1:7"),
+            str("Text 7"),
+        ),
+    ]
+    target = update_usfm(rows, prefer_existing_text=False)
+    assert "\\id MAT - Test\r\n" in target
+    assert "\\v 6 Text 6\r\n" in target
+    assert "\\v 7 Text 7\r\n" in target
+
+
 def test_get_usfm_verse_skip_note() -> None:
     rows = [
         (
@@ -185,7 +219,7 @@ def test_get_usfm_nonverse_char_style() -> None:
 def test_get_usfm_nonverse_paragraph() -> None:
     rows = [
         (
-            scr_ref("MAT 1:0/4:s"),
+            scr_ref("MAT 1:0/8:s"),
             str("The first chapter."),
         )
     ]
@@ -216,7 +250,7 @@ def test_get_usfm_nonverse_relaxed() -> None:
             str("The third cell of the table."),
         ),
     ]
-    target = update_usfm(rows, strict_comparison=False)
+    target = update_usfm(rows)
     assert "\\s The first chapter.\r\n" in target
     assert "\\v 1 First verse of the first chapter.\r\n" in target
     assert "\\tr \\tc1 The first cell of the table. \\tc2 The second cell of the table.\r\n" in target
@@ -297,6 +331,50 @@ def test_get_usfm_nonverse_replace_note() -> None:
     assert "\\ip The introductory paragraph. \\fe + \\ft This is a new endnote.\\fe*\r\n" in target
 
 
+def test_get_usfm_verse_double_va_vp() -> None:
+    rows = [
+        (
+            scr_ref("MAT 3:1"),
+            str("Updating later in the book to start."),
+        )
+    ]
+    target = update_usfm(rows)
+    assert "\\id MAT - Test\r\n" in target
+    assert "\\v 1 \\va 2\\va*\\vp 1 (2)\\vp*Updating later in the book to start.\r\n" in target
+
+
+def test_get_usfm_verse_pretranslations_before_text() -> None:
+    rows = [
+        (
+            scr_ref("GEN 1:1"),
+            str("Pretranslations before the start"),
+        ),
+        (
+            scr_ref("GEN 1:2"),
+            str("Pretranslations before the start"),
+        ),
+        (
+            scr_ref("GEN 1:3"),
+            str("Pretranslations before the start"),
+        ),
+        (
+            scr_ref("GEN 1:4"),
+            str("Pretranslations before the start"),
+        ),
+        (
+            scr_ref("GEN 1:5"),
+            str("Pretranslations before the start"),
+        ),
+        (
+            scr_ref("MAT 1:0/3:ip"),
+            str("The introductory paragraph."),
+        ),
+    ]
+
+    target = update_usfm(rows)
+    assert "\\ip The introductory paragraph.\r\n" in target
+
+
 def scr_ref(*refs: str) -> List[ScriptureRef]:
     return [ScriptureRef.parse(ref) for ref in refs]
 
@@ -305,10 +383,10 @@ def update_usfm(
     rows: Optional[List[Tuple[List[ScriptureRef], str]]] = None,
     id_text: Optional[str] = None,
     strip_all_text: bool = False,
-    strict_comparison: bool = True,
+    prefer_existing_text: bool = False,
 ) -> str:
     source = read_usfm()
-    updater = UsfmTextUpdater(rows, id_text, strip_all_text, strict_comparison)
+    updater = UsfmTextUpdater(rows, id_text, strip_all_text, prefer_existing_text)
     parse_usfm(source, updater)
     return updater.get_usfm()
 
diff --git a/tests/corpora/test_usfm_tokenizer.py b/tests/corpora/test_usfm_tokenizer.py
index 74c30ac..d9b07be 100644
--- a/tests/corpora/test_usfm_tokenizer.py
+++ b/tests/corpora/test_usfm_tokenizer.py
@@ -7,22 +7,22 @@ def test_tokenize() -> None:
     usfm = _read_usfm()
     usfm_tokenizer = UsfmTokenizer()
     tokens = usfm_tokenizer.tokenize(usfm)
-    assert len(tokens) == 170
+    assert len(tokens) == 224
 
     assert tokens[0].type is UsfmTokenType.BOOK
     assert tokens[0].marker == "id"
     assert tokens[0].data == "MAT"
 
-    assert tokens[15].type is UsfmTokenType.TEXT
-    assert tokens[15].text == "Chapter One "
+    assert tokens[34].type is UsfmTokenType.TEXT
+    assert tokens[34].text == "Chapter One "
 
-    assert tokens[16].type is UsfmTokenType.VERSE
-    assert tokens[16].marker == "v"
-    assert tokens[16].data == "1"
+    assert tokens[35].type is UsfmTokenType.VERSE
+    assert tokens[35].marker == "v"
+    assert tokens[35].data == "1"
 
-    assert tokens[25].type is UsfmTokenType.NOTE
-    assert tokens[25].marker == "f"
-    assert tokens[25].data == "+"
+    assert tokens[44].type is UsfmTokenType.NOTE
+    assert tokens[44].marker == "f"
+    assert tokens[44].data == "+"
 
 
 def test_detokenize() -> None:
diff --git a/tests/testutils/corpora_test_helpers.py b/tests/testutils/corpora_test_helpers.py
index 38c0429..11edfdb 100644
--- a/tests/testutils/corpora_test_helpers.py
+++ b/tests/testutils/corpora_test_helpers.py
@@ -7,6 +7,8 @@
 from . import TEST_DATA_PATH
 
 USFM_TEST_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "Tes"
+USFM_TARGET_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "target"
+USFM_SOURCE_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "source"
 USX_TEST_PROJECT_PATH = TEST_DATA_PATH / "usx" / "Tes"
 TEXT_TEST_PROJECT_PATH = TEST_DATA_PATH / "txt"
 CUSTOM_VERS_PATH = TEST_DATA_PATH / "custom.vrs"
diff --git a/tests/testutils/data/usfm/Tes/04LEVTes.SFM b/tests/testutils/data/usfm/Tes/03LEVTes.SFM
similarity index 100%
rename from tests/testutils/data/usfm/Tes/04LEVTes.SFM
rename to tests/testutils/data/usfm/Tes/03LEVTes.SFM
diff --git a/tests/testutils/data/usfm/Tes/41MATTes.SFM b/tests/testutils/data/usfm/Tes/41MATTes.SFM
index af634ba..43b2665 100644
--- a/tests/testutils/data/usfm/Tes/41MATTes.SFM
+++ b/tests/testutils/data/usfm/Tes/41MATTes.SFM
@@ -1,7 +1,11 @@
 \id MAT - Test
+\f + \fr 1.0 \ft \f*
 \h Matthew
 \mt Matthew
 \ip An introduction to Matthew\fe + \ft This is an endnote.\fe*
+\p Here is another paragraph.
+\p and with a \w keyword|a special concept\w* in it.
+\p and a \weirdtaglookingthing that is not an actual tag.
 \c 1
 \s Chapter One
 \v 1 Chapter \pn one\+pro WON\+pro*\pn*, verse one.\f + \fr 1:1: \ft This is a footnote.\f*
@@ -14,11 +18,15 @@
 \li2 verse four,
 \v 5 Chapter one,
 \li2 verse \fig Figure 1|src="image1.png" size="col" ref="1:5"\fig* five.
+\v 6 Verse 6 content.
+\v 7
+\v 8
 \c 2
 \tr \tc1 Row one, column one. \tc2 Row one, column two.
 \tr \tc1 Row two, column one. \tc2 Row two, column two.
 \s1 Chapter \it Two \it*
 \p
+\p
 \v 1 Chapter \add two\add*, verse \f + \fr 2:1: \ft This is a footnote.\f*one.
 \v 2-3 Chapter two, // verse \fm ∆\fm*two.
 \esb
@@ -29,7 +37,9 @@
 \v 4b Chapter two, verse four.
 \p
 \v 6 Chapter two, verse \w six|strong="12345" \w*.
+\p
 \v 6 Bad verse.
+\p
 \v 5 Chapter two, verse five \rq (MAT 3:1)\rq*.
 \v 7a Chapter two, verse seven A,
 \s Section header \ts-s\*
@@ -44,3 +54,14 @@
 \v 10 \tc3-4 \qt-s |Jesus\*Chapter 2 verse 10\qt-e\*
 \v 11-12
 \restore restore information
+\c 3
+\cl PSALM 3
+\s1 Section 1
+\mt1 Major Title 1
+\d \va (1)\va* Description
+\q1
+\v 1 \va 2\va*\vp 1 (2)\vp* Chapter 3 verse 1.
+\q1 3.1 part 2
+\b
+\q1 3.1 part 3
+\q1 3.1 part 4
diff --git a/tests/testutils/data/usfm/Tes/44JHNTes.SFM b/tests/testutils/data/usfm/Tes/44JHNTes.SFM
new file mode 100644
index 0000000..e69de29