From d307a1bde16094927ee7fafca3a588241c94560f Mon Sep 17 00:00:00 2001 From: mshannon-sil <131058912+mshannon-sil@users.noreply.github.com> Date: Fri, 17 Nov 2023 10:55:19 -0500 Subject: [PATCH] convert book abbreviation in \id marker to uppercase (#60) * convert book id to uppercase * add unit test, fix comment typo --- machine/corpora/usfm_file_text.py | 4 ++-- machine/corpora/usfm_zip_text.py | 4 ++-- tests/corpora/test_usfm_file_text.py | 16 ++++++++++++++++ tests/testutils/data/usfm/Tes/04LEVTes.SFM | 2 +- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/machine/corpora/usfm_file_text.py b/machine/corpora/usfm_file_text.py index 5a81153..4ba9d05 100644 --- a/machine/corpora/usfm_file_text.py +++ b/machine/corpora/usfm_file_text.py @@ -35,5 +35,5 @@ def _get_id(filename: StrPath, encoding: str) -> str: index = id.find(" ") if index != -1: id = id[:index] - return id.strip() - raise RuntimeError("The USFM does not contain and 'id' marker.") + return id.strip().upper() + raise RuntimeError("The USFM does not contain an 'id' marker.") diff --git a/machine/corpora/usfm_zip_text.py b/machine/corpora/usfm_zip_text.py index 753d15d..42b42b9 100644 --- a/machine/corpora/usfm_zip_text.py +++ b/machine/corpora/usfm_zip_text.py @@ -42,5 +42,5 @@ def _get_id(archive_filename: StrPath, path: str, encoding: str) -> str: index = id.find(" ") if index != -1: id = id[:index] - return id.strip() - raise RuntimeError("The USFM does not contain and 'id' marker.") + return id.strip().upper() + raise RuntimeError("The USFM does not contain an 'id' marker.") diff --git a/tests/corpora/test_usfm_file_text.py b/tests/corpora/test_usfm_file_text.py index e769e2d..eb34169 100644 --- a/tests/corpora/test_usfm_file_text.py +++ b/tests/corpora/test_usfm_file_text.py @@ -136,3 +136,19 @@ def test_get_rows_include_markers() -> None: assert verse_ref(rows[16]).exact_equals(VerseRef.from_string("MAT 2:10", corpus.versification)) assert rows[16].text == "\\tc3-4 Chapter 2 verse 10" + + +def test_usfm_file_text_corpus_lowercase_usfm_id() -> None: + corpus = UsfmFileTextCorpus(USFM_TEST_PROJECT_PATH) + + text = corpus.get_text("LEV") + assert text is not None + rows = list(text) + + assert len(rows) == 2 + + assert verse_ref(rows[0]).exact_equals(VerseRef.from_string("LEV 14:55", corpus.versification)) + assert rows[0].text == "Chapter fourteen, verse fifty-five. Segment b." + + assert verse_ref(rows[1]).exact_equals(VerseRef.from_string("LEV 14:56", corpus.versification)) + assert rows[1].text == "Chapter fourteen, verse fifty-six." diff --git a/tests/testutils/data/usfm/Tes/04LEVTes.SFM b/tests/testutils/data/usfm/Tes/04LEVTes.SFM index 6d57d14..6fc8cd9 100644 --- a/tests/testutils/data/usfm/Tes/04LEVTes.SFM +++ b/tests/testutils/data/usfm/Tes/04LEVTes.SFM @@ -1,4 +1,4 @@ -\id LEV - Test +\id lev - Test \h Leviticus \mt Leviticus \c 14