Skip to content

Commit

Permalink
Move get_books to parse.py and add get_chapters
Browse files Browse the repository at this point in the history
  • Loading branch information
isaac091 committed Nov 10, 2023
1 parent daa1b8e commit d5a5b54
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 65 deletions.
3 changes: 2 additions & 1 deletion machine/scripture/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
NON_CANONICAL_IDS,
book_id_to_number,
book_number_to_id,
get_books,
is_book_id_valid,
is_canonical,
is_nt,
is_ot,
is_ot_nt,
)
from .parse import get_books, get_chapters
from .verse_ref import (
NULL_VERSIFICATION,
VERSE_RANGE_SEPARATOR,
Expand Down Expand Up @@ -52,6 +52,7 @@ def __getattr__(name: str) -> Any:
"FIRST_BOOK",
"get_bbbcccvvv",
"get_books",
"get_chapters",
"is_book_id_valid",
"is_canonical",
"is_nt",
Expand Down
32 changes: 1 addition & 31 deletions machine/scripture/canon.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Set, Union
from typing import Union

ALL_BOOK_IDS = [
"GEN",
Expand Down Expand Up @@ -161,36 +161,6 @@ def book_id_to_number(id: str) -> int:
return BOOK_NUMBERS.get(id.upper(), 0)


def get_books(books: Union[str, List[str]]) -> Set[int]:
if isinstance(books, str):
books = books.split(",")
book_set: Set[int] = set()
for book_id in books:
book_id = book_id.strip().strip("*").upper()
if book_id == "NT":
book_set.update(range(40, 67))
elif book_id == "OT":
book_set.update(range(1, 40))
elif book_id.startswith("-"):
# remove the book from the set
book_id = book_id[1:]
book_num = book_id_to_number(book_id)
if book_num == 0:
raise RuntimeError(f"{book_id} is an invalid book ID.")
elif book_num not in book_set:
raise RuntimeError(
f"{book_id}:{book_num} cannot be removed as it is not in the existing book set of {book_set}"
)
else:
book_set.remove(book_num)
else:
book_num = book_id_to_number(book_id)
if book_num == 0:
raise RuntimeError(f"{book_id} is an invalid book ID.")
book_set.add(book_num)
return book_set


def is_nt(book_num: int) -> bool:
return book_num >= 40 and book_num < 67

Expand Down
150 changes: 150 additions & 0 deletions machine/scripture/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
from typing import List, Set, Union
import re

from .verse_ref import Versification
from .canon import book_id_to_number


USFM_FILE_PATTERN = re.compile(r"(?<=[A-Z]{3})\d+\.usfm")
BOOK_SPAN = re.compile(r"[A-Z]{3}-[A-Z]{3}")


def get_books(books: Union[str, List[str]]) -> Set[int]:
if isinstance(books, str):
books = books.split(",")
book_set: Set[int] = set()
for book_id in books:
book_id = book_id.strip().strip("*").upper()
if book_id == "NT":
book_set.update(range(40, 67))
elif book_id == "OT":
book_set.update(range(1, 40))
elif book_id.startswith("-"):
# remove the book from the set
book_id = book_id[1:]
book_num = book_id_to_number(book_id)
if book_num == 0:
raise RuntimeError(f"{book_id} is an invalid book ID.")
elif book_num not in book_set:
raise RuntimeError(
f"{book_id}:{book_num} cannot be removed as it is not in the existing book set of {book_set}"
)
else:
book_set.remove(book_num)
else:
book_num = book_id_to_number(book_id)
if book_num == 0:
raise RuntimeError(f"{book_id} is an invalid book ID.")
book_set.add(book_num)
return book_set


# Output format: { book_num: [chapters] }
# An empty list, i.e. book_num: [] signifies the inclusion of all chapters, while the absence of an entry means the book is not included
def get_chapters(chapter_selections: str) -> dict:
versification = Versification.create("Original")
chapters = {}
spans = []
subtractions = []

# Normalize books written as "MAT01.usfm" to "MAT"
chapter_selections = re.sub(USFM_FILE_PATTERN, "", chapter_selections)

if ";" not in chapter_selections and not any(
s.isdigit() and (i == len(chapter_selections) - 1 or not chapter_selections[i + 1].isalpha())
for i, s in enumerate(chapter_selections)
): # Backwards compatibility with get_books syntax:
sections = chapter_selections.split(",")
else:
sections = chapter_selections.split(";")

for section in sections:
if section == "":
continue
elif section.startswith("-"):
subtractions.append(section[1:])
elif any(
s.isdigit() and (i == len(section) - 1 or not section[i + 1].isalpha()) for i, s in enumerate(section)
): # Specific chapters from one book
book = book_id_to_number(section[:3])

if book == 0:
raise RuntimeError(f"{section[:3]} is an invalid book ID.")

chapter_nums = section[3:].split(",")
chapters[book] = set()
last_chapter = versification.get_last_chapter(book)
for chapter_num in chapter_nums:
if "-" in chapter_num:
start, end = chapter_num.split("-")
for i in range(int(start), min(int(end), last_chapter) + 1):
chapters[book].add(i)
elif int(chapter_num) <= last_chapter:
chapters[book].add(int(chapter_num))

# Delete entry if no chapter numbers were valid
if len(chapters[book]) == 0:
del chapters[book]
elif "-" in section: # Spans of books
spans.append(section)
elif section == "OT":
for i in range(1, 40):
if i not in chapters:
chapters[i] = set()
elif section == "NT":
for i in range(40, 67):
if i not in chapters:
chapters[i] = set()
else: # Single whole book
book = book_id_to_number(section)
if book == 0:
raise RuntimeError(f"{section} is an invalid book ID.")

if book not in chapters:
chapters[book] = set()

for span in spans:
ends = span.split("-")
if len(ends) != 2 or book_id_to_number(ends[0]) == 0 or book_id_to_number(ends[1]) == 0:
raise RuntimeError(f"{span} is an invalid book range.")

for i in range(book_id_to_number(ends[0]), book_id_to_number(ends[1]) + 1):
if not i in chapters:
chapters[i] = set()

for subtraction in subtractions:
if re.match(BOOK_SPAN, subtraction) is not None:
raise RuntimeError("Cannot subtract spans of books.")

book = book_id_to_number(subtraction[:3])
if book == 0:
raise RuntimeError(f"{subtraction[:3]} is an invalid book ID.")
if book not in chapters:
raise RuntimeError(f"{subtraction[:3]} cannot be removed as it is not in the existing book selection.")

# Subtract entire book
if len(subtraction) == 3:
del chapters[book]
continue

if len(chapters[book]) == 0:
chapters[book] = {i + 1 for i in range(versification.get_last_chapter(book))}
chapter_nums = subtraction[3:].split(",")
for chapter_num in chapter_nums:
if "-" in chapter_num:
start, end = chapter_num.split("-")
for i in range(int(start), int(end) + 1):
chapters[book].discard(i)
else:
chapters[book].discard(int(chapter_num))

# Delete entry if no chapter numbers are left or make entry the empty set again if all chapters are still present
if len(chapters[book]) == 0:
del chapters[book]
elif len(chapters[book]) == versification.get_last_chapter(book):
chapters[book] = set()

for k, v in chapters.items():
chapters[k] = sorted(list(v))

return chapters
33 changes: 0 additions & 33 deletions tests/scripture/test_canon.py

This file was deleted.

79 changes: 79 additions & 0 deletions tests/scripture/test_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from pytest import raises

from machine.scripture import get_books, get_chapters


def test_get_books() -> None:
assert get_books("MAL") == {39}
assert get_books("GEN,EXO") == {1, 2}
assert get_books("GEN,EXO") == get_books(["GEN", "EXO"])
assert get_books("OT") == {i for i in range(1, 40)}
assert get_books("NT") == {i for i in range(40, 67)}
whole_bible = {i for i in range(1, 67)}
assert get_books("NT,OT") == whole_bible

whole_bible.remove(2) # EXO
whole_bible.remove(41) # MRK
assert get_books("NT,OT,-MRK,-EXO") == whole_bible

with raises(RuntimeError):
# invalid name
get_books("HELLO_WORLD")

with raises(RuntimeError):
# subtracting book from nothing
get_books("-MRK")

with raises(RuntimeError):
# invalid subtracting name
get_books("NT,OT,-HELLO_WORLD")

with raises(RuntimeError):
# subtracting book from wrong set
get_books("OT,-MRK,NT")


def test_get_chapters() -> None:
assert get_chapters("MAL") == {39: []}
assert get_chapters("GEN,EXO") == {1: [], 2: []}
assert get_chapters("OT") == {i: [] for i in range(1, 40)}
assert get_chapters("NT") == {i: [] for i in range(40, 67)}
whole_bible = {i: [] for i in range(1, 67)}
assert get_chapters("NT,OT") == whole_bible

del whole_bible[2] # EXO
del whole_bible[41] # MRK
assert get_chapters("NT,OT,-MRK,-EXO") == whole_bible

assert get_chapters("MAT;MRK") == {40: [], 41: []}
assert get_chapters("MAT1,2,3") == {40: [1, 2, 3]}
assert get_chapters("MAT400-500") == {}
assert get_chapters("MAT1-4,12,9,100") == {40: [1, 2, 3, 4, 9, 12]}
assert get_chapters("MAT-LUK") == {40: [], 41: [], 42: []}
assert get_chapters("2JN-3JN;EXO1,8,3-5;GEN") == {1: [], 2: [1, 3, 4, 5, 8], 63: [], 64: []}

assert get_chapters("NT;OT;-MRK;-EXO") == whole_bible
test_bible = {i: [] for i in range(40, 67)}
test_chapters_mat = [1, 2] + [i for i in range(6, 17)] + [i for i in range(18, 29)]
test_bible[40] = test_chapters_mat
test_chapters_rev = [i for i in range(1, 21)]
test_bible[66] = test_chapters_rev
assert get_chapters("NT;-MAT3-5,17;-REV21,22") == test_bible

assert get_chapters("MAT40.usfm;MRK41.usfm1,2,3,4-6;LUK") == {40: [], 41: [1, 2, 3, 4, 5, 6], 42: []}

with raises(RuntimeError):
# invalid name
get_chapters("HELLO_WORLD")

with raises(RuntimeError):
# subtracting book from nothing
get_chapters("-MRK")

with raises(RuntimeError):
# invalid subtracting name
get_chapters("NT;OT;-HELLO_WORLD")

with raises(RuntimeError):
# subtracting range
get_chapters("OT;NT;-MAT-LUK")

0 comments on commit d5a5b54

Please sign in to comment.