From d3af847c6505a45472d1d9a4cd22095885f5a6be Mon Sep 17 00:00:00 2001
From: mshannon-sil <matthew_shannon@sil.org>
Date: Sat, 13 Jul 2024 18:13:47 -0400
Subject: [PATCH] port commit 436a67d that moved logic to parallel text corpus

---
 .../corpora/standard_parallel_text_corpus.py  | 28 +++++++++++++++++--
 tests/corpora/test_scripture_text_corpus.py   |  2 --
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/machine/corpora/standard_parallel_text_corpus.py b/machine/corpora/standard_parallel_text_corpus.py
index 303d4016..2a4ff7ab 100644
--- a/machine/corpora/standard_parallel_text_corpus.py
+++ b/machine/corpora/standard_parallel_text_corpus.py
@@ -14,6 +14,7 @@
 from .dictionary_alignment_corpus import DictionaryAlignmentCorpus
 from .parallel_text_corpus import ParallelTextCorpus
 from .parallel_text_row import ParallelTextRow
+from .scripture_text_corpus import ScriptureTextCorpus
 from .text_corpus import TextCorpus
 from .text_row import TextRow, TextRowFlags
 
@@ -81,6 +82,12 @@ def _get_rows(self) -> Generator[ParallelTextRow, None, None]:
             alignment_iterator = stack.enter_context(self._alignment_corpus.get_rows(text_ids))
 
             range_info = _RangeInfo()
+            if isinstance(self._target_corpus, ScriptureTextCorpus) and isinstance(
+                self._source_corpus, ScriptureTextCorpus
+            ):
+                range_info.versification = self._target_corpus.versification
+            else:
+                range_info.versification = None
             source_same_ref_rows: List[TextRow] = []
             target_same_ref_rows: List[TextRow] = []
 
@@ -241,6 +248,16 @@ def _create_rows(
         else:
             raise ValueError("Either a source or target must be specified.")
 
+        src_refs = [] if src_row is None else [src_row.ref]
+        trg_refs = [] if trg_row is None else [trg_row.ref]
+
+        if len(trg_refs) == 0 and isinstance(self._target_corpus, ScriptureTextCorpus):
+            for r in src_refs:
+                r: VerseRef
+                t = r.copy()
+                t.change_versification(self._target_corpus.versification)
+                trg_refs.append(t)
+
         if src_row is None:
             source_flags = TextRowFlags.IN_RANGE if force_source_in_range else TextRowFlags.NONE
         else:
@@ -253,8 +270,8 @@ def _create_rows(
 
         yield ParallelTextRow(
             text_id,
-            [] if src_row is None else [src_row.ref],
-            [] if trg_row is None else [trg_row.ref],
+            src_refs,
+            trg_refs,
             [] if src_row is None else src_row.segment,
             [] if trg_row is None else trg_row.segment,
             aligned_word_pairs,
@@ -300,12 +317,19 @@ class _RangeInfo:
     is_target_sentence_start: bool = field(default=False, init=False)
     is_source_empty: bool = field(default=True, init=False)
     is_target_empty: bool = field(default=True, init=False)
+    versification: Optional[Versification] = field(default=None, init=False)
 
     @property
     def is_in_range(self) -> bool:
         return len(self.source_refs) > 0 and len(self.target_refs) > 0
 
     def create_row(self) -> ParallelTextRow:
+        if len(self.target_refs) == 0 and self.versification is not None:
+            for r in self.source_refs:
+                r: VerseRef
+                t = r.copy()
+                t.change_versification(self.versification)
+                self.target_refs.append(t)
         row = ParallelTextRow(
             self.text_id,
             self.source_refs.copy(),
diff --git a/tests/corpora/test_scripture_text_corpus.py b/tests/corpora/test_scripture_text_corpus.py
index 486967c8..5afb7dd0 100644
--- a/tests/corpora/test_scripture_text_corpus.py
+++ b/tests/corpora/test_scripture_text_corpus.py
@@ -13,7 +13,6 @@ def test_extract_scripture_corpus() -> None:
     text, orig_vref, corpus_vref = lines[0]
     assert text == ""
     assert orig_vref.exact_equals(VerseRef.from_string("GEN 1:1", ORIGINAL_VERSIFICATION))
-    assert corpus_vref is None
 
     text, orig_vref, corpus_vref = lines[3167]
     assert text == "Chapter fourteen, verse fifty-five. Segment b."
@@ -28,7 +27,6 @@ def test_extract_scripture_corpus() -> None:
     text, orig_vref, corpus_vref = lines[10727]
     assert text == "<range>"
     assert orig_vref.exact_equals(VerseRef.from_string("1CH 12:4", ORIGINAL_VERSIFICATION))
-    assert corpus_vref is None
 
     text, orig_vref, corpus_vref = lines[10731]
     assert text == "<range>"