From d5a5b54ef8fed6367be8323045fb92c371a554b8 Mon Sep 17 00:00:00 2001 From: Isaac Schifferer Date: Fri, 10 Nov 2023 09:53:43 -0600 Subject: [PATCH] Move get_books to parse.py and add get_chapters --- machine/scripture/__init__.py | 3 +- machine/scripture/canon.py | 32 +------- machine/scripture/parse.py | 150 ++++++++++++++++++++++++++++++++++ tests/scripture/test_canon.py | 33 -------- tests/scripture/test_parse.py | 79 ++++++++++++++++++ 5 files changed, 232 insertions(+), 65 deletions(-) create mode 100644 machine/scripture/parse.py delete mode 100644 tests/scripture/test_canon.py create mode 100644 tests/scripture/test_parse.py diff --git a/machine/scripture/__init__.py b/machine/scripture/__init__.py index 7e93de3..6435f43 100644 --- a/machine/scripture/__init__.py +++ b/machine/scripture/__init__.py @@ -8,13 +8,13 @@ NON_CANONICAL_IDS, book_id_to_number, book_number_to_id, - get_books, is_book_id_valid, is_canonical, is_nt, is_ot, is_ot_nt, ) +from .parse import get_books, get_chapters from .verse_ref import ( NULL_VERSIFICATION, VERSE_RANGE_SEPARATOR, @@ -52,6 +52,7 @@ def __getattr__(name: str) -> Any: "FIRST_BOOK", "get_bbbcccvvv", "get_books", + "get_chapters", "is_book_id_valid", "is_canonical", "is_nt", diff --git a/machine/scripture/canon.py b/machine/scripture/canon.py index ae7166e..f986c49 100644 --- a/machine/scripture/canon.py +++ b/machine/scripture/canon.py @@ -1,4 +1,4 @@ -from typing import List, Set, Union +from typing import Union ALL_BOOK_IDS = [ "GEN", @@ -161,36 +161,6 @@ def book_id_to_number(id: str) -> int: return BOOK_NUMBERS.get(id.upper(), 0) -def get_books(books: Union[str, List[str]]) -> Set[int]: - if isinstance(books, str): - books = books.split(",") - book_set: Set[int] = set() - for book_id in books: - book_id = book_id.strip().strip("*").upper() - if book_id == "NT": - book_set.update(range(40, 67)) - elif book_id == "OT": - book_set.update(range(1, 40)) - elif book_id.startswith("-"): - # remove the book from the set - book_id = book_id[1:] - book_num = book_id_to_number(book_id) - if book_num == 0: - raise RuntimeError(f"{book_id} is an invalid book ID.") - elif book_num not in book_set: - raise RuntimeError( - f"{book_id}:{book_num} cannot be removed as it is not in the existing book set of {book_set}" - ) - else: - book_set.remove(book_num) - else: - book_num = book_id_to_number(book_id) - if book_num == 0: - raise RuntimeError(f"{book_id} is an invalid book ID.") - book_set.add(book_num) - return book_set - - def is_nt(book_num: int) -> bool: return book_num >= 40 and book_num < 67 diff --git a/machine/scripture/parse.py b/machine/scripture/parse.py new file mode 100644 index 0000000..87a18f4 --- /dev/null +++ b/machine/scripture/parse.py @@ -0,0 +1,150 @@ +from typing import List, Set, Union +import re + +from .verse_ref import Versification +from .canon import book_id_to_number + + +USFM_FILE_PATTERN = re.compile(r"(?<=[A-Z]{3})\d+\.usfm") +BOOK_SPAN = re.compile(r"[A-Z]{3}-[A-Z]{3}") + + +def get_books(books: Union[str, List[str]]) -> Set[int]: + if isinstance(books, str): + books = books.split(",") + book_set: Set[int] = set() + for book_id in books: + book_id = book_id.strip().strip("*").upper() + if book_id == "NT": + book_set.update(range(40, 67)) + elif book_id == "OT": + book_set.update(range(1, 40)) + elif book_id.startswith("-"): + # remove the book from the set + book_id = book_id[1:] + book_num = book_id_to_number(book_id) + if book_num == 0: + raise RuntimeError(f"{book_id} is an invalid book ID.") + elif book_num not in book_set: + raise RuntimeError( + f"{book_id}:{book_num} cannot be removed as it is not in the existing book set of {book_set}" + ) + else: + book_set.remove(book_num) + else: + book_num = book_id_to_number(book_id) + if book_num == 0: + raise RuntimeError(f"{book_id} is an invalid book ID.") + book_set.add(book_num) + return book_set + + +# Output format: { book_num: [chapters] } +# An empty list, i.e. book_num: [] signifies the inclusion of all chapters, while the absence of an entry means the book is not included +def get_chapters(chapter_selections: str) -> dict: + versification = Versification.create("Original") + chapters = {} + spans = [] + subtractions = [] + + # Normalize books written as "MAT01.usfm" to "MAT" + chapter_selections = re.sub(USFM_FILE_PATTERN, "", chapter_selections) + + if ";" not in chapter_selections and not any( + s.isdigit() and (i == len(chapter_selections) - 1 or not chapter_selections[i + 1].isalpha()) + for i, s in enumerate(chapter_selections) + ): # Backwards compatibility with get_books syntax: + sections = chapter_selections.split(",") + else: + sections = chapter_selections.split(";") + + for section in sections: + if section == "": + continue + elif section.startswith("-"): + subtractions.append(section[1:]) + elif any( + s.isdigit() and (i == len(section) - 1 or not section[i + 1].isalpha()) for i, s in enumerate(section) + ): # Specific chapters from one book + book = book_id_to_number(section[:3]) + + if book == 0: + raise RuntimeError(f"{section[:3]} is an invalid book ID.") + + chapter_nums = section[3:].split(",") + chapters[book] = set() + last_chapter = versification.get_last_chapter(book) + for chapter_num in chapter_nums: + if "-" in chapter_num: + start, end = chapter_num.split("-") + for i in range(int(start), min(int(end), last_chapter) + 1): + chapters[book].add(i) + elif int(chapter_num) <= last_chapter: + chapters[book].add(int(chapter_num)) + + # Delete entry if no chapter numbers were valid + if len(chapters[book]) == 0: + del chapters[book] + elif "-" in section: # Spans of books + spans.append(section) + elif section == "OT": + for i in range(1, 40): + if i not in chapters: + chapters[i] = set() + elif section == "NT": + for i in range(40, 67): + if i not in chapters: + chapters[i] = set() + else: # Single whole book + book = book_id_to_number(section) + if book == 0: + raise RuntimeError(f"{section} is an invalid book ID.") + + if book not in chapters: + chapters[book] = set() + + for span in spans: + ends = span.split("-") + if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0: + raise RuntimeError(f"{span} is an invalid book range.") + + for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1): + if not i in chapters: + chapters[i] = set() + + for subtraction in subtractions: + if re.match(BOOK_SPAN, subtraction) is not None: + raise RuntimeError("Cannot subtract spans of books.") + + book = book_id_to_number(subtraction[:3]) + if book == 0: + raise RuntimeError(f"{subtraction[:3]} is an invalid book ID.") + if book not in chapters: + raise RuntimeError(f"{subtraction[:3]} cannot be removed as it is not in the existing book selection.") + + # Subtract entire book + if len(subtraction) == 3: + del chapters[book] + continue + + if len(chapters[book]) == 0: + chapters[book] = {i + 1 for i in range(versification.get_last_chapter(book))} + chapter_nums = subtraction[3:].split(",") + for chapter_num in chapter_nums: + if "-" in chapter_num: + start, end = chapter_num.split("-") + for i in range(int(start), int(end) + 1): + chapters[book].discard(i) + else: + chapters[book].discard(int(chapter_num)) + + # Delete entry if no chapter numbers are left or make entry the empty set again if all chapters are still present + if len(chapters[book]) == 0: + del chapters[book] + elif len(chapters[book]) == versification.get_last_chapter(book): + chapters[book] = set() + + for k, v in chapters.items(): + chapters[k] = sorted(list(v)) + + return chapters diff --git a/tests/scripture/test_canon.py b/tests/scripture/test_canon.py deleted file mode 100644 index 0b2f50c..0000000 --- a/tests/scripture/test_canon.py +++ /dev/null @@ -1,33 +0,0 @@ -from pytest import raises - -from machine.scripture import get_books - - -def test_get_books() -> None: - assert get_books("MAL") == {39} - assert get_books("GEN,EXO") == {1, 2} - assert get_books("GEN,EXO") == get_books(["GEN", "EXO"]) - assert get_books("OT") == {i for i in range(1, 40)} - assert get_books("NT") == {i for i in range(40, 67)} - whole_bible = {i for i in range(1, 67)} - assert get_books("NT,OT") == whole_bible - - whole_bible.remove(2) # EXO - whole_bible.remove(41) # MRK - assert get_books("NT,OT,-MRK,-EXO") == whole_bible - - with raises(RuntimeError): - # invalid name - get_books("HELLO_WORLD") - - with raises(RuntimeError): - # subtracting book from nothing - get_books("-MRK") - - with raises(RuntimeError): - # invalid subtracting name - get_books("NT,OT,-HELLO_WORLD") - - with raises(RuntimeError): - # subtracting book from wrong set - get_books("OT,-MRK,NT") diff --git a/tests/scripture/test_parse.py b/tests/scripture/test_parse.py new file mode 100644 index 0000000..4454383 --- /dev/null +++ b/tests/scripture/test_parse.py @@ -0,0 +1,79 @@ +from pytest import raises + +from machine.scripture import get_books, get_chapters + + +def test_get_books() -> None: + assert get_books("MAL") == {39} + assert get_books("GEN,EXO") == {1, 2} + assert get_books("GEN,EXO") == get_books(["GEN", "EXO"]) + assert get_books("OT") == {i for i in range(1, 40)} + assert get_books("NT") == {i for i in range(40, 67)} + whole_bible = {i for i in range(1, 67)} + assert get_books("NT,OT") == whole_bible + + whole_bible.remove(2) # EXO + whole_bible.remove(41) # MRK + assert get_books("NT,OT,-MRK,-EXO") == whole_bible + + with raises(RuntimeError): + # invalid name + get_books("HELLO_WORLD") + + with raises(RuntimeError): + # subtracting book from nothing + get_books("-MRK") + + with raises(RuntimeError): + # invalid subtracting name + get_books("NT,OT,-HELLO_WORLD") + + with raises(RuntimeError): + # subtracting book from wrong set + get_books("OT,-MRK,NT") + + +def test_get_chapters() -> None: + assert get_chapters("MAL") == {39: []} + assert get_chapters("GEN,EXO") == {1: [], 2: []} + assert get_chapters("OT") == {i: [] for i in range(1, 40)} + assert get_chapters("NT") == {i: [] for i in range(40, 67)} + whole_bible = {i: [] for i in range(1, 67)} + assert get_chapters("NT,OT") == whole_bible + + del whole_bible[2] # EXO + del whole_bible[41] # MRK + assert get_chapters("NT,OT,-MRK,-EXO") == whole_bible + + assert get_chapters("MAT;MRK") == {40: [], 41: []} + assert get_chapters("MAT1,2,3") == {40: [1, 2, 3]} + assert get_chapters("MAT400-500") == {} + assert get_chapters("MAT1-4,12,9,100") == {40: [1, 2, 3, 4, 9, 12]} + assert get_chapters("MAT-LUK") == {40: [], 41: [], 42: []} + assert get_chapters("2JN-3JN;EXO1,8,3-5;GEN") == {1: [], 2: [1, 3, 4, 5, 8], 63: [], 64: []} + + assert get_chapters("NT;OT;-MRK;-EXO") == whole_bible + test_bible = {i: [] for i in range(40, 67)} + test_chapters_mat = [1, 2] + [i for i in range(6, 17)] + [i for i in range(18, 29)] + test_bible[40] = test_chapters_mat + test_chapters_rev = [i for i in range(1, 21)] + test_bible[66] = test_chapters_rev + assert get_chapters("NT;-MAT3-5,17;-REV21,22") == test_bible + + assert get_chapters("MAT40.usfm;MRK41.usfm1,2,3,4-6;LUK") == {40: [], 41: [1, 2, 3, 4, 5, 6], 42: []} + + with raises(RuntimeError): + # invalid name + get_chapters("HELLO_WORLD") + + with raises(RuntimeError): + # subtracting book from nothing + get_chapters("-MRK") + + with raises(RuntimeError): + # invalid subtracting name + get_chapters("NT;OT;-HELLO_WORLD") + + with raises(RuntimeError): + # subtracting range + get_chapters("OT;NT;-MAT-LUK")