diff --git a/machine/scripture/parse.py b/machine/scripture/parse.py index 06e66e5..1ed76a5 100644 --- a/machine/scripture/parse.py +++ b/machine/scripture/parse.py @@ -1,12 +1,9 @@ import re -from typing import List, Set, Union +from typing import Dict, List, Set, Union from .canon import book_id_to_number from .verse_ref import Versification -USFM_FILE_PATTERN = re.compile(r"(?<=[A-Z]{3})\d+\.usfm") -BOOK_SPAN = re.compile(r"[A-Z]{3}-[A-Z]{3}") - def get_books(books: Union[str, List[str]]) -> Set[int]: if isinstance(books, str): @@ -40,14 +37,10 @@ def get_books(books: Union[str, List[str]]) -> Set[int]: # Output format: { book_num: [chapters] } # An empty list, i.e. book_num: [] signifies the inclusion of all chapters -def get_chapters(chapter_selections: str) -> dict: - versification = Versification.create("Original") +def get_chapters( + chapter_selections: str, versification: Versification = Versification.create("Original") +) -> Dict[int, List[int]]: chapters = {} - spans = [] - subtractions = [] - - # Normalize books written as "MAT01.usfm" to "MAT" - chapter_selections = re.sub(USFM_FILE_PATTERN, "", chapter_selections) if ";" not in chapter_selections and not any( s.isdigit() and (i == len(chapter_selections) - 1 or not chapter_selections[i + 1].isalpha()) @@ -58,93 +51,120 @@ def get_chapters(chapter_selections: str) -> dict: sections = chapter_selections.split(";") for section in sections: - if section == "": - continue - elif section.startswith("-"): - subtractions.append(section[1:]) + section = section.strip() + + if section.startswith("-"): # Subtraction + section = section[1:] + if any( + s.isdigit() and (i == len(section) - 1 or not section[i + 1].isalpha()) for i, s in enumerate(section) + ): # Specific chapters from one book + book = book_id_to_number(section[:3]) + if book == 0: + raise ValueError(f"{section[:3]} is an invalid book ID.") + if book not in chapters: + raise ValueError(f"{section[:3]} cannot be removed as it is not in the existing book selection.") + + if chapters[book] == []: + chapters[book] = [i + 1 for i in range(versification.get_last_chapter(book))] + + last_chapter = versification.get_last_chapter(book) + chapter_nums = section[3:].split(",") + for chapter_num in chapter_nums: + chapter_num = chapter_num.strip() + if "-" in chapter_num: + start, end = chapter_num.split("-") + if int(start) > last_chapter or int(end) > last_chapter: + raise ValueError(f"{chapter_num} is an invalid chapter range.") + + for i in range(int(start), int(end) + 1): + if i not in chapters[book]: + raise ValueError( + f"{i} cannot be removed as it is not in the existing chapter selection." + ) + chapters[book].remove(i) + else: + if int(chapter_num) > last_chapter: + raise ValueError(f"{int(chapter_num)} is an invalid chapter number.") + if int(chapter_num) not in chapters[book]: + raise ValueError( + f"{chapter_num} cannot be removed as it is not in the existing chapter selection." + ) + chapters[book].remove(int(chapter_num)) + + if len(chapters[book]) == 0: + del chapters[book] + elif "-" in section: # Spans of books + ends = section.split("-") + if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0: + raise ValueError(f"{section} is an invalid book range.") + + for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1): + if i not in chapters: + raise ValueError( + f"{section[:3]} cannot be removed as it is not in the existing book selection." + ) + + del chapters[i] + else: # Single whole book + book = book_id_to_number(section) + if book == 0: + raise ValueError(f"{section} is an invalid book ID.") + if book not in chapters: + raise ValueError(f"{section[:3]} cannot be removed as it is not in the existing book selection.") + + del chapters[book] elif any( s.isdigit() and (i == len(section) - 1 or not section[i + 1].isalpha()) for i, s in enumerate(section) ): # Specific chapters from one book book = book_id_to_number(section[:3]) - if book == 0: - raise RuntimeError(f"{section[:3]} is an invalid book ID.") + raise ValueError(f"{section[:3]} is an invalid book ID.") + + if book in chapters: + if chapters[book] == []: + continue + book_chapters = set(chapters[book]) + else: + book_chapters = set() - chapter_nums = section[3:].split(",") - chapters[book] = set() last_chapter = versification.get_last_chapter(book) + chapter_nums = section[3:].split(",") for chapter_num in chapter_nums: + chapter_num = chapter_num.strip() if "-" in chapter_num: start, end = chapter_num.split("-") - for i in range(int(start), min(int(end), last_chapter) + 1): - chapters[book].add(i) - elif int(chapter_num) <= last_chapter: - chapters[book].add(int(chapter_num)) + if int(start) > last_chapter or int(end) > last_chapter: + raise ValueError(f"{chapter_num} is an invalid chapter range.") - # Delete entry if no chapter numbers were valid - if len(chapters[book]) == 0: - del chapters[book] + for i in range(int(start), int(end) + 1): + book_chapters.add(i) + else: + if int(chapter_num) > last_chapter: + raise ValueError(f"{chapter_num} is an invalid chapter number.") + + book_chapters.add(int(chapter_num)) + + if len(book_chapters) == last_chapter: + chapters[book] = [] + else: + chapters[book] = sorted(list(book_chapters)) elif "-" in section: # Spans of books - spans.append(section) + ends = section.split("-") + if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0: + raise ValueError(f"{section} is an invalid book range.") + + for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1): + chapters[i] = [] elif section == "OT": for i in range(1, 40): - if i not in chapters: - chapters[i] = set() + chapters[i] = [] elif section == "NT": for i in range(40, 67): - if i not in chapters: - chapters[i] = set() + chapters[i] = [] else: # Single whole book book = book_id_to_number(section) if book == 0: - raise RuntimeError(f"{section} is an invalid book ID.") - - if book not in chapters: - chapters[book] = set() - - for span in spans: - ends = span.split("-") - if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0: - raise RuntimeError(f"{span} is an invalid book range.") - - for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1): - if i not in chapters: - chapters[i] = set() - - for subtraction in subtractions: - if re.match(BOOK_SPAN, subtraction) is not None: - raise RuntimeError("Cannot subtract spans of books.") - - book = book_id_to_number(subtraction[:3]) - if book == 0: - raise RuntimeError(f"{subtraction[:3]} is an invalid book ID.") - if book not in chapters: - raise RuntimeError(f"{subtraction[:3]} cannot be removed as it is not in the existing book selection.") - - # Subtract entire book - if len(subtraction) == 3: - del chapters[book] - continue - - if len(chapters[book]) == 0: - chapters[book] = {i + 1 for i in range(versification.get_last_chapter(book))} - chapter_nums = subtraction[3:].split(",") - for chapter_num in chapter_nums: - if "-" in chapter_num: - start, end = chapter_num.split("-") - for i in range(int(start), int(end) + 1): - chapters[book].discard(i) - else: - chapters[book].discard(int(chapter_num)) - - # Delete entry if no chapter numbers are left - if len(chapters[book]) == 0: - del chapters[book] - # Make entry the empty set again if all chapters are still present - elif len(chapters[book]) == versification.get_last_chapter(book): - chapters[book] = set() - - for k, v in chapters.items(): - chapters[k] = sorted(list(v)) + raise ValueError(f"{section} is an invalid book ID.") + chapters[book] = [] return chapters diff --git a/tests/scripture/test_parse.py b/tests/scripture/test_parse.py index a10aba3..65c68d3 100644 --- a/tests/scripture/test_parse.py +++ b/tests/scripture/test_parse.py @@ -36,6 +36,7 @@ def test_get_books() -> None: def test_get_chapters() -> None: assert get_chapters("MAL") == {39: []} assert get_chapters("GEN,EXO") == {1: [], 2: []} + assert get_chapters("1JN,2JN") == {62: [], 63: []} assert get_chapters("OT") == {i: [] for i in range(1, 40)} assert get_chapters("NT") == {i: [] for i in range(40, 67)} whole_bible = {i: [] for i in range(1, 67)} @@ -46,12 +47,15 @@ def test_get_chapters() -> None: assert get_chapters("NT,OT,-MRK,-EXO") == whole_bible assert get_chapters("MAT;MRK") == {40: [], 41: []} + assert get_chapters("MAT; MRK") == {40: [], 41: []} assert get_chapters("MAT1,2,3") == {40: [1, 2, 3]} - assert get_chapters("MAT400-500") == {} - assert get_chapters("MAT1-4,12,9,100") == {40: [1, 2, 3, 4, 9, 12]} + assert get_chapters("MAT 1, 2, 3") == {40: [1, 2, 3]} + assert get_chapters("MAT1-4,12,9") == {40: [1, 2, 3, 4, 9, 12]} assert get_chapters("MAT-LUK") == {40: [], 41: [], 42: []} - assert get_chapters("MAT1,2,3;MAT-LUK") == {40: [1, 2, 3], 41: [], 42: []} + assert get_chapters("MAT1,2,3;MAT-LUK") == {40: [], 41: [], 42: []} assert get_chapters("2JN-3JN;EXO1,8,3-5;GEN") == {1: [], 2: [1, 3, 4, 5, 8], 63: [], 64: []} + assert get_chapters("1JN 1;1JN 2;1JN 3-5") == {62: []} + assert get_chapters("MAT-ROM;-ACT4-28") == {40: [], 41: [], 42: [], 43: [], 44: [1, 2, 3], 45: []} assert get_chapters("NT;OT;-MRK;-EXO") == whole_bible test_bible = {i: [] for i in range(40, 67)} @@ -60,21 +64,48 @@ def test_get_chapters() -> None: test_chapters_rev = [i for i in range(1, 21)] test_bible[66] = test_chapters_rev assert get_chapters("NT;-MAT3-5,17;-REV21,22") == test_bible + assert get_chapters("MAT-JHN;-MAT-LUK") == {43: []} - assert get_chapters("MAT40.usfm;MRK41.usfm1,2,3,4-6;LUK") == {40: [], 41: [1, 2, 3, 4, 5, 6], 42: []} + with raises(ValueError): + # empty string + get_chapters("") - with raises(RuntimeError): + with raises(ValueError): # invalid name - get_chapters("HELLO_WORLD") - - with raises(RuntimeError): - # subtracting book from nothing + get_chapters("ABC") + + with raises(ValueError): + # invalid range book + get_chapters("MAT-ABC") + with raises(ValueError): + # subtract invalid range book + get_chapters("NT;-ABC-LUK") + + with raises(ValueError): + # invalid chapter + get_chapters("MAT 500") + + with raises(ValueError): + # invalid range number + get_chapters("MAT 1-500") + with raises(ValueError): + # subtract invalid range number + get_chapters("MAT;-MAT 300-500") + + # subtracting from nothing + with raises(ValueError): get_chapters("-MRK") - - with raises(RuntimeError): - # invalid subtracting name - get_chapters("NT;OT;-HELLO_WORLD") - - with raises(RuntimeError): - # subtracting range - get_chapters("OT;NT;-MAT-LUK") + with raises(ValueError): + get_chapters("-MRK 1") + with raises(ValueError): + get_chapters("MRK 2-5;-MRK 1-4") + with raises(ValueError): + get_chapters("MRK 2-5;-MRK 6") + with raises(ValueError): + get_chapters("OT;-MRK-LUK") + + # invalid subtracting name + with raises(ValueError): + get_chapters("NT;OT;-ABC") + with raises(ValueError): + get_chapters("MAT;-ABC 1")