From bc41e28e27b17e9512a1a99f367cd943eaafffa5 Mon Sep 17 00:00:00 2001 From: mshannon-sil Date: Tue, 12 Nov 2024 15:42:33 -0500 Subject: [PATCH] Revert "raise error when id tag doesn't match filename book id" This reverts commit 8679b785cdecb2a427726baf55b023a80496498e. --- .../corpora/paratext_backup_text_corpus.py | 32 +++++++---------- machine/corpora/paratext_text_corpus.py | 29 ++++++---------- .../test_paratext_backup_text_corpus.py | 30 +++------------- tests/corpora/test_paratext_text_corpus.py | 14 -------- tests/testutils/corpora_test_helpers.py | 12 ------- .../testutils/data/usfm/invalid_id/07JDG.SFM | 5 --- .../data/usfm/invalid_id/Settings.xml | 34 ------------------- .../testutils/data/usfm/invalid_id/custom.vrs | 31 ----------------- .../testutils/data/usfm/mismatch_id/07JDG.SFM | 5 --- .../data/usfm/mismatch_id/Settings.xml | 34 ------------------- .../data/usfm/mismatch_id/custom.vrs | 31 ----------------- 11 files changed, 27 insertions(+), 230 deletions(-) delete mode 100644 tests/corpora/test_paratext_text_corpus.py delete mode 100644 tests/testutils/data/usfm/invalid_id/07JDG.SFM delete mode 100644 tests/testutils/data/usfm/invalid_id/Settings.xml delete mode 100644 tests/testutils/data/usfm/invalid_id/custom.vrs delete mode 100644 tests/testutils/data/usfm/mismatch_id/07JDG.SFM delete mode 100644 tests/testutils/data/usfm/mismatch_id/Settings.xml delete mode 100644 tests/testutils/data/usfm/mismatch_id/custom.vrs diff --git a/machine/corpora/paratext_backup_text_corpus.py b/machine/corpora/paratext_backup_text_corpus.py index 34bf8f9f..77d70654 100644 --- a/machine/corpora/paratext_backup_text_corpus.py +++ b/machine/corpora/paratext_backup_text_corpus.py @@ -19,26 +19,18 @@ def __init__(self, filename: StrPath, include_markers: bool = False, include_all for sfm_entry in archive.filelist: book_id = settings.get_book_id(sfm_entry.filename) if book_id: - text = UsfmZipText( - settings.stylesheet, - settings.encoding, - book_id, - filename, - sfm_entry.filename, - versification, - include_markers, - include_all_text, - settings.name, + texts.append( + UsfmZipText( + settings.stylesheet, + settings.encoding, + book_id, + filename, + sfm_entry.filename, + versification, + include_markers, + include_all_text, + settings.name, + ) ) - with text.get_rows() as rows: - row = next(rows, None) - if row and row.ref.book != book_id: - if row.ref.book == "": - raise ValueError(f"The \\id tag in {sfm_entry.filename} is invalid.") - raise ValueError( - f"The \\id tag {row.ref.book} in {sfm_entry.filename}" - f" does not match filename book id {book_id}." - ) - texts.append(text) super().__init__(versification, texts) diff --git a/machine/corpora/paratext_text_corpus.py b/machine/corpora/paratext_text_corpus.py index 0831ae6a..24c24dd3 100644 --- a/machine/corpora/paratext_text_corpus.py +++ b/machine/corpora/paratext_text_corpus.py @@ -18,24 +18,17 @@ def __init__(self, project_dir: StrPath, include_markers: bool = False, include_ for sfm_filename in Path(project_dir).glob(f"{settings.file_name_prefix}*{settings.file_name_suffix}"): book_id = settings.get_book_id(sfm_filename.name) if book_id: - text = UsfmFileText( - settings.stylesheet, - settings.encoding, - book_id, - sfm_filename, - versification, - include_markers, - include_all_text, - settings.name, + texts.append( + UsfmFileText( + settings.stylesheet, + settings.encoding, + book_id, + sfm_filename, + versification, + include_markers, + include_all_text, + settings.name, + ) ) - with text.get_rows() as rows: - row = next(rows, None) - if row and row.ref.book != book_id: - if row.ref.book == "": - raise ValueError(f"The \\id tag in {sfm_filename} is invalid.") - raise ValueError( - f"The \\id tag {row.ref.book} in {sfm_filename} does not match filename book id {book_id}." - ) - texts.append(text) super().__init__(versification, texts) diff --git a/tests/corpora/test_paratext_backup_text_corpus.py b/tests/corpora/test_paratext_backup_text_corpus.py index 31d85bff..57907c33 100644 --- a/tests/corpora/test_paratext_backup_text_corpus.py +++ b/tests/corpora/test_paratext_backup_text_corpus.py @@ -2,14 +2,9 @@ from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, ContextManager, Optional +from typing import Any, ContextManager -from pytest import raises -from testutils.corpora_test_helpers import ( - create_test_paratext_backup, - create_test_paratext_backup_invalid_id, - create_test_paratext_backup_mismatch_id, -) +from testutils.corpora_test_helpers import create_test_paratext_backup from machine.corpora import ParatextBackupTextCorpus @@ -33,27 +28,10 @@ def test_get_text() -> None: assert not any(jhn.get_rows()) -def test_invalid_id() -> None: - with raises(ValueError, match=r"The \\id tag in .* is invalid."): - with _TestEnvironment("invalid_id") as env: - env.corpus.get_text("JDG") - - -def test_mismatch_id() -> None: - with raises(ValueError, match=r"The \\id tag .* in .* does not match filename book id .*"): - with _TestEnvironment("mismatch_id") as env: - env.corpus.get_text("JDG") - - class _TestEnvironment(ContextManager["_TestEnvironment"]): - def __init__(self, project_folder_name: Optional[str] = None) -> None: + def __init__(self) -> None: self._temp_dir = TemporaryDirectory() - if project_folder_name == "invalid_id": - archive_filename = create_test_paratext_backup_invalid_id(Path(self._temp_dir.name)) - elif project_folder_name == "mismatch_id": - archive_filename = create_test_paratext_backup_mismatch_id(Path(self._temp_dir.name)) - else: - archive_filename = create_test_paratext_backup(Path(self._temp_dir.name)) + archive_filename = create_test_paratext_backup(Path(self._temp_dir.name)) self._corpus = ParatextBackupTextCorpus(archive_filename) @property diff --git a/tests/corpora/test_paratext_text_corpus.py b/tests/corpora/test_paratext_text_corpus.py deleted file mode 100644 index ee3906cb..00000000 --- a/tests/corpora/test_paratext_text_corpus.py +++ /dev/null @@ -1,14 +0,0 @@ -from pytest import raises -from testutils.corpora_test_helpers import USFM_INVALID_ID_PROJECT_PATH, USFM_MISMATCH_ID_PROJECT_PATH - -from machine.corpora import ParatextTextCorpus - - -def test_paratext_text_corpus_invalid_id() -> None: - with raises(ValueError, match=r"The \\id tag in .* is invalid."): - ParatextTextCorpus(USFM_INVALID_ID_PROJECT_PATH, include_all_text=True) - - -def test_paratext_text_corpus_mismatch_id() -> None: - with raises(ValueError, match=r"The \\id tag .* in .* does not match filename book id .*"): - ParatextTextCorpus(USFM_MISMATCH_ID_PROJECT_PATH, include_all_text=True) diff --git a/tests/testutils/corpora_test_helpers.py b/tests/testutils/corpora_test_helpers.py index 2a2fc502..4fd93416 100644 --- a/tests/testutils/corpora_test_helpers.py +++ b/tests/testutils/corpora_test_helpers.py @@ -9,8 +9,6 @@ USFM_TEST_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "Tes" USFM_TARGET_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "target" USFM_SOURCE_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "source" -USFM_MISMATCH_ID_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "mismatch_id" -USFM_INVALID_ID_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "invalid_id" USX_TEST_PROJECT_PATH = TEST_DATA_PATH / "usx" / "Tes" TEXT_TEST_PROJECT_PATH = TEST_DATA_PATH / "txt" CUSTOM_VERS_PATH = TEST_DATA_PATH / "custom.vrs" @@ -26,16 +24,6 @@ def create_test_paratext_backup(temp_dir: Path) -> Path: return temp_dir / "Tes.zip" -def create_test_paratext_backup_invalid_id(temp_dir: Path) -> Path: - shutil.make_archive(str(temp_dir / "invalid_id"), "zip", USFM_INVALID_ID_PROJECT_PATH) - return temp_dir / "invalid_id.zip" - - -def create_test_paratext_backup_mismatch_id(temp_dir: Path) -> Path: - shutil.make_archive(str(temp_dir / "mismatch_id"), "zip", USFM_MISMATCH_ID_PROJECT_PATH) - return temp_dir / "mismatch_id.zip" - - def verse_ref(segment: TextRow) -> VerseRef: assert isinstance(segment.ref, VerseRef) return segment.ref diff --git a/tests/testutils/data/usfm/invalid_id/07JDG.SFM b/tests/testutils/data/usfm/invalid_id/07JDG.SFM deleted file mode 100644 index 40d866f3..00000000 --- a/tests/testutils/data/usfm/invalid_id/07JDG.SFM +++ /dev/null @@ -1,5 +0,0 @@ -\id JGS - Test -\h Judges -\mt Judges -\c 1 -\v 1 Chapter one, verse one. diff --git a/tests/testutils/data/usfm/invalid_id/Settings.xml b/tests/testutils/data/usfm/invalid_id/Settings.xml deleted file mode 100644 index 45cf3eab..00000000 --- a/tests/testutils/data/usfm/invalid_id/Settings.xml +++ /dev/null @@ -1,34 +0,0 @@ - - usfm.sty - 4 - en::: - English - 8.0.100.76 - Test - 65001 - T - - NFC - invalid_id - a7e0b3ce0200736062f9f810a444dbfbe64aca35 - Charis SIL - 12 - - - - 41MAT - - .SFM - Major::BiblicalTerms.xml - F - F - F - Public - Standard:: - - 3 - 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 - 000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000 - - - \ No newline at end of file diff --git a/tests/testutils/data/usfm/invalid_id/custom.vrs b/tests/testutils/data/usfm/invalid_id/custom.vrs deleted file mode 100644 index 9c1cd387..00000000 --- a/tests/testutils/data/usfm/invalid_id/custom.vrs +++ /dev/null @@ -1,31 +0,0 @@ -# custom.vrs - -LEV 14:56 -ROM 14:26 -REV 12:17 -TOB 5:22 -TOB 10:12 -SIR 23:28 -ESG 1:22 -ESG 3:15 -ESG 5:14 -ESG 8:17 -ESG 10:14 -SIR 33:33 -SIR 41:24 -BAR 1:22 -4MA 7:25 -4MA 12:20 - -# deliberately missing verses --ROM 16:26 --ROM 16:27 --3JN 1:15 --S3Y 1:49 --ESG 4:6 --ESG 9:5 --ESG 9:30 - -LEV 14:55 = LEV 14:55 -LEV 14:55 = LEV 14:56 -LEV 14:56 = LEV 14:57 diff --git a/tests/testutils/data/usfm/mismatch_id/07JDG.SFM b/tests/testutils/data/usfm/mismatch_id/07JDG.SFM deleted file mode 100644 index bc7c876f..00000000 --- a/tests/testutils/data/usfm/mismatch_id/07JDG.SFM +++ /dev/null @@ -1,5 +0,0 @@ -\id JUD - Test -\h Judges -\mt Judges -\c 1 -\v 1 Chapter one, verse one. diff --git a/tests/testutils/data/usfm/mismatch_id/Settings.xml b/tests/testutils/data/usfm/mismatch_id/Settings.xml deleted file mode 100644 index a068c35c..00000000 --- a/tests/testutils/data/usfm/mismatch_id/Settings.xml +++ /dev/null @@ -1,34 +0,0 @@ - - usfm.sty - 4 - en::: - English - 8.0.100.76 - Test - 65001 - T - - NFC - mismatch_id - a7e0b3ce0200736062f9f810a444dbfbe64aca35 - Charis SIL - 12 - - - - 41MAT - - .SFM - Major::BiblicalTerms.xml - F - F - F - Public - Standard:: - - 3 - 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 - 000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000 - - - \ No newline at end of file diff --git a/tests/testutils/data/usfm/mismatch_id/custom.vrs b/tests/testutils/data/usfm/mismatch_id/custom.vrs deleted file mode 100644 index 9c1cd387..00000000 --- a/tests/testutils/data/usfm/mismatch_id/custom.vrs +++ /dev/null @@ -1,31 +0,0 @@ -# custom.vrs - -LEV 14:56 -ROM 14:26 -REV 12:17 -TOB 5:22 -TOB 10:12 -SIR 23:28 -ESG 1:22 -ESG 3:15 -ESG 5:14 -ESG 8:17 -ESG 10:14 -SIR 33:33 -SIR 41:24 -BAR 1:22 -4MA 7:25 -4MA 12:20 - -# deliberately missing verses --ROM 16:26 --ROM 16:27 --3JN 1:15 --S3Y 1:49 --ESG 4:6 --ESG 9:5 --ESG 9:30 - -LEV 14:55 = LEV 14:55 -LEV 14:55 = LEV 14:56 -LEV 14:56 = LEV 14:57