Skip to content

Commit

Permalink
Apply all changes in order regardless of type and other small fixes. …
Browse files Browse the repository at this point in the history
…Add more tests.
  • Loading branch information
isaac091 committed Nov 10, 2023
1 parent 55c982d commit 2bd953a
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 97 deletions.
180 changes: 100 additions & 80 deletions machine/scripture/parse.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import re
from typing import List, Set, Union
from typing import Dict, List, Set, Union

from .canon import book_id_to_number
from .verse_ref import Versification

USFM_FILE_PATTERN = re.compile(r"(?<=[A-Z]{3})\d+\.usfm")
BOOK_SPAN = re.compile(r"[A-Z]{3}-[A-Z]{3}")


def get_books(books: Union[str, List[str]]) -> Set[int]:
if isinstance(books, str):
Expand Down Expand Up @@ -40,14 +37,10 @@ def get_books(books: Union[str, List[str]]) -> Set[int]:

# Output format: { book_num: [chapters] }
# An empty list, i.e. book_num: [] signifies the inclusion of all chapters
def get_chapters(chapter_selections: str) -> dict:
versification = Versification.create("Original")
def get_chapters(
chapter_selections: str, versification: Versification = Versification.create("Original")
) -> Dict[int, List[int]]:
chapters = {}
spans = []
subtractions = []

# Normalize books written as "MAT01.usfm" to "MAT"
chapter_selections = re.sub(USFM_FILE_PATTERN, "", chapter_selections)

if ";" not in chapter_selections and not any(
s.isdigit() and (i == len(chapter_selections) - 1 or not chapter_selections[i + 1].isalpha())
Expand All @@ -58,93 +51,120 @@ def get_chapters(chapter_selections: str) -> dict:
sections = chapter_selections.split(";")

for section in sections:
if section == "":
continue
elif section.startswith("-"):
subtractions.append(section[1:])
section = section.strip()

if section.startswith("-"): # Subtraction
section = section[1:]
if any(
s.isdigit() and (i == len(section) - 1 or not section[i + 1].isalpha()) for i, s in enumerate(section)
): # Specific chapters from one book
book = book_id_to_number(section[:3])
if book == 0:
raise ValueError(f"{section[:3]} is an invalid book ID.")
if book not in chapters:
raise ValueError(f"{section[:3]} cannot be removed as it is not in the existing book selection.")

if chapters[book] == []:
chapters[book] = [i + 1 for i in range(versification.get_last_chapter(book))]

last_chapter = versification.get_last_chapter(book)
chapter_nums = section[3:].split(",")
for chapter_num in chapter_nums:
chapter_num = chapter_num.strip()
if "-" in chapter_num:
start, end = chapter_num.split("-")
if int(start) > last_chapter or int(end) > last_chapter:
raise ValueError(f"{chapter_num} is an invalid chapter range.")

for i in range(int(start), int(end) + 1):
if i not in chapters[book]:
raise ValueError(
f"{i} cannot be removed as it is not in the existing chapter selection."
)
chapters[book].remove(i)
else:
if int(chapter_num) > last_chapter:
raise ValueError(f"{int(chapter_num)} is an invalid chapter number.")
if int(chapter_num) not in chapters[book]:
raise ValueError(
f"{chapter_num} cannot be removed as it is not in the existing chapter selection."
)
chapters[book].remove(int(chapter_num))

if len(chapters[book]) == 0:
del chapters[book]
elif "-" in section: # Spans of books
ends = section.split("-")
if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0:
raise ValueError(f"{section} is an invalid book range.")

for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1):
if i not in chapters:
raise ValueError(
f"{section[:3]} cannot be removed as it is not in the existing book selection."
)

del chapters[i]
else: # Single whole book
book = book_id_to_number(section)
if book == 0:
raise ValueError(f"{section} is an invalid book ID.")
if book not in chapters:
raise ValueError(f"{section[:3]} cannot be removed as it is not in the existing book selection.")

del chapters[book]
elif any(
s.isdigit() and (i == len(section) - 1 or not section[i + 1].isalpha()) for i, s in enumerate(section)
): # Specific chapters from one book
book = book_id_to_number(section[:3])

if book == 0:
raise RuntimeError(f"{section[:3]} is an invalid book ID.")
raise ValueError(f"{section[:3]} is an invalid book ID.")

if book in chapters:
if chapters[book] == []:
continue
book_chapters = set(chapters[book])
else:
book_chapters = set()

chapter_nums = section[3:].split(",")
chapters[book] = set()
last_chapter = versification.get_last_chapter(book)
chapter_nums = section[3:].split(",")
for chapter_num in chapter_nums:
chapter_num = chapter_num.strip()
if "-" in chapter_num:
start, end = chapter_num.split("-")
for i in range(int(start), min(int(end), last_chapter) + 1):
chapters[book].add(i)
elif int(chapter_num) <= last_chapter:
chapters[book].add(int(chapter_num))
if int(start) > last_chapter or int(end) > last_chapter:
raise ValueError(f"{chapter_num} is an invalid chapter range.")

# Delete entry if no chapter numbers were valid
if len(chapters[book]) == 0:
del chapters[book]
for i in range(int(start), int(end) + 1):
book_chapters.add(i)
else:
if int(chapter_num) > last_chapter:
raise ValueError(f"{chapter_num} is an invalid chapter number.")

book_chapters.add(int(chapter_num))

if len(book_chapters) == last_chapter:
chapters[book] = []
else:
chapters[book] = sorted(list(book_chapters))
elif "-" in section: # Spans of books
spans.append(section)
ends = section.split("-")
if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0:
raise ValueError(f"{section} is an invalid book range.")

for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1):
chapters[i] = []
elif section == "OT":
for i in range(1, 40):
if i not in chapters:
chapters[i] = set()
chapters[i] = []
elif section == "NT":
for i in range(40, 67):
if i not in chapters:
chapters[i] = set()
chapters[i] = []
else: # Single whole book
book = book_id_to_number(section)
if book == 0:
raise RuntimeError(f"{section} is an invalid book ID.")

if book not in chapters:
chapters[book] = set()

for span in spans:
ends = span.split("-")
if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0:
raise RuntimeError(f"{span} is an invalid book range.")

for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1):
if i not in chapters:
chapters[i] = set()

for subtraction in subtractions:
if re.match(BOOK_SPAN, subtraction) is not None:
raise RuntimeError("Cannot subtract spans of books.")

book = book_id_to_number(subtraction[:3])
if book == 0:
raise RuntimeError(f"{subtraction[:3]} is an invalid book ID.")
if book not in chapters:
raise RuntimeError(f"{subtraction[:3]} cannot be removed as it is not in the existing book selection.")

# Subtract entire book
if len(subtraction) == 3:
del chapters[book]
continue

if len(chapters[book]) == 0:
chapters[book] = {i + 1 for i in range(versification.get_last_chapter(book))}
chapter_nums = subtraction[3:].split(",")
for chapter_num in chapter_nums:
if "-" in chapter_num:
start, end = chapter_num.split("-")
for i in range(int(start), int(end) + 1):
chapters[book].discard(i)
else:
chapters[book].discard(int(chapter_num))

# Delete entry if no chapter numbers are left
if len(chapters[book]) == 0:
del chapters[book]
# Make entry the empty set again if all chapters are still present
elif len(chapters[book]) == versification.get_last_chapter(book):
chapters[book] = set()

for k, v in chapters.items():
chapters[k] = sorted(list(v))
raise ValueError(f"{section} is an invalid book ID.")
chapters[book] = []

return chapters
65 changes: 48 additions & 17 deletions tests/scripture/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def test_get_books() -> None:
def test_get_chapters() -> None:
assert get_chapters("MAL") == {39: []}
assert get_chapters("GEN,EXO") == {1: [], 2: []}
assert get_chapters("1JN,2JN") == {62: [], 63: []}
assert get_chapters("OT") == {i: [] for i in range(1, 40)}
assert get_chapters("NT") == {i: [] for i in range(40, 67)}
whole_bible = {i: [] for i in range(1, 67)}
Expand All @@ -46,12 +47,15 @@ def test_get_chapters() -> None:
assert get_chapters("NT,OT,-MRK,-EXO") == whole_bible

assert get_chapters("MAT;MRK") == {40: [], 41: []}
assert get_chapters("MAT; MRK") == {40: [], 41: []}
assert get_chapters("MAT1,2,3") == {40: [1, 2, 3]}
assert get_chapters("MAT400-500") == {}
assert get_chapters("MAT1-4,12,9,100") == {40: [1, 2, 3, 4, 9, 12]}
assert get_chapters("MAT 1, 2, 3") == {40: [1, 2, 3]}
assert get_chapters("MAT1-4,12,9") == {40: [1, 2, 3, 4, 9, 12]}
assert get_chapters("MAT-LUK") == {40: [], 41: [], 42: []}
assert get_chapters("MAT1,2,3;MAT-LUK") == {40: [1, 2, 3], 41: [], 42: []}
assert get_chapters("MAT1,2,3;MAT-LUK") == {40: [], 41: [], 42: []}
assert get_chapters("2JN-3JN;EXO1,8,3-5;GEN") == {1: [], 2: [1, 3, 4, 5, 8], 63: [], 64: []}
assert get_chapters("1JN 1;1JN 2;1JN 3-5") == {62: []}
assert get_chapters("MAT-ROM;-ACT4-28") == {40: [], 41: [], 42: [], 43: [], 44: [1, 2, 3], 45: []}

assert get_chapters("NT;OT;-MRK;-EXO") == whole_bible
test_bible = {i: [] for i in range(40, 67)}
Expand All @@ -60,21 +64,48 @@ def test_get_chapters() -> None:
test_chapters_rev = [i for i in range(1, 21)]
test_bible[66] = test_chapters_rev
assert get_chapters("NT;-MAT3-5,17;-REV21,22") == test_bible
assert get_chapters("MAT-JHN;-MAT-LUK") == {43: []}

assert get_chapters("MAT40.usfm;MRK41.usfm1,2,3,4-6;LUK") == {40: [], 41: [1, 2, 3, 4, 5, 6], 42: []}
with raises(ValueError):
# empty string
get_chapters("")

with raises(RuntimeError):
with raises(ValueError):
# invalid name
get_chapters("HELLO_WORLD")

with raises(RuntimeError):
# subtracting book from nothing
get_chapters("ABC")

with raises(ValueError):
# invalid range book
get_chapters("MAT-ABC")
with raises(ValueError):
# subtract invalid range book
get_chapters("NT;-ABC-LUK")

with raises(ValueError):
# invalid chapter
get_chapters("MAT 500")

with raises(ValueError):
# invalid range number
get_chapters("MAT 1-500")
with raises(ValueError):
# subtract invalid range number
get_chapters("MAT;-MAT 300-500")

# subtracting from nothing
with raises(ValueError):
get_chapters("-MRK")

with raises(RuntimeError):
# invalid subtracting name
get_chapters("NT;OT;-HELLO_WORLD")

with raises(RuntimeError):
# subtracting range
get_chapters("OT;NT;-MAT-LUK")
with raises(ValueError):
get_chapters("-MRK 1")
with raises(ValueError):
get_chapters("MRK 2-5;-MRK 1-4")
with raises(ValueError):
get_chapters("MRK 2-5;-MRK 6")
with raises(ValueError):
get_chapters("OT;-MRK-LUK")

# invalid subtracting name
with raises(ValueError):
get_chapters("NT;OT;-ABC")
with raises(ValueError):
get_chapters("MAT;-ABC 1")

0 comments on commit 2bd953a

Please sign in to comment.