Skip to content

Commit

Permalink
port commit ae53a3c, add test for \d marker and manual test for parat…
Browse files Browse the repository at this point in the history
…ext projects
  • Loading branch information
mshannon-sil committed Aug 17, 2024
1 parent 714db87 commit f3873bb
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 9 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ coverage.xml
*.py,cover
.hypothesis/
.pytest_cache/
tests/testutils/data/usfm/source/*
tests/testutils/data/usfm/target/*

# Translations
*.mo
Expand Down
9 changes: 8 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,12 @@
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
"black-formatter.path": ["poetry", "run", "black"]
"black-formatter.path": [
"poetry",
"run",
"black"
],
"python.analysis.extraPaths": [
"./tests"
]
}
4 changes: 2 additions & 2 deletions machine/corpora/scripture_ref_usfm_parser_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def start_para(
) -> None:
if self._cur_verse_ref.is_default:
self._update_verse_ref(state.verse_ref, marker)
if not state.is_verse_text:
if not state.is_verse_text or self._current_text_type == ScriptureTextType.NONVERSE:
self._start_parent_element(marker)
self._start_non_verse_text_wrapper(state)

Expand Down Expand Up @@ -121,7 +121,7 @@ def _start_verse_text_wrapper(self, state: UsfmParserState) -> None:
self._start_verse_text(state, self._create_verse_refs())

def _end_verse_text_wrapper(self, state: UsfmParserState) -> None:
if not self._duplicate_verse and self._cur_verse_ref.verse_num != 0:
if not self._duplicate_verse and self._cur_verse_ref.verse_num > 0:
self._end_verse_text(state, self._create_verse_refs())
self._cur_text_type_stack.pop()

Expand Down
10 changes: 5 additions & 5 deletions tests/corpora/test_usfm_file_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_get_rows_nonempty_text() -> None:
assert text is not None
rows = list(text)

assert len(rows) == 22
assert len(rows) == 23

assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:1", corpus.versification)
assert rows[0].text == "Chapter one, verse one."
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_get_rows_nonempty_text_all_text() -> None:
assert text is not None
rows = list(text)

assert len(rows) == 39
assert len(rows) == 44

assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/1:h", corpus.versification)
assert rows[0].text == "Matthew"
Expand Down Expand Up @@ -126,7 +126,7 @@ def test_get_rows_sentence_start() -> None:
assert text is not None
rows = list(text)

assert len(rows) == 22
assert len(rows) == 23

assert scripture_ref(rows[3]) == ScriptureRef.parse("MAT 1:4", corpus.versification)
assert rows[3].text == "Chapter one, verse four,"
Expand Down Expand Up @@ -154,7 +154,7 @@ def test_get_rows_include_markers() -> None:
assert text is not None
rows = list(text)

assert len(rows) == 22
assert len(rows) == 23

assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:1", corpus.versification)
assert (
Expand Down Expand Up @@ -209,7 +209,7 @@ def test_get_rows_include_markers_all_text() -> None:
assert text is not None
rows = list(text)

assert len(rows) == 35
assert len(rows) == 40

assert scripture_ref(rows[2]) == ScriptureRef.parse("MAT 1:0/3:ip", corpus.versification)
assert rows[2].text == "An introduction to Matthew\\fe + \\ft This is an endnote.\\fe*"
Expand Down
14 changes: 14 additions & 0 deletions tests/corpora/test_usfm_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pytest
from testutils.corpora_test_helpers import USFM_SOURCE_PROJECT_PATH, USFM_TARGET_PROJECT_PATH

from machine.corpora import ParatextTextCorpus, StandardParallelTextCorpus


@pytest.mark.skip(reason="This is for manual testing only. Remove this decorator to run the test.")
def test_parse_parallel_corpus():
t_corpus = ParatextTextCorpus(USFM_TARGET_PROJECT_PATH, include_all_text=True, include_markers=True)
s_corpus = ParatextTextCorpus(USFM_SOURCE_PROJECT_PATH, include_all_text=True, include_markers=True)
p_corpus = StandardParallelTextCorpus(s_corpus, t_corpus, all_source_rows=True, all_target_rows=False)

rows = list(p_corpus.get_rows())
assert rows
2 changes: 1 addition & 1 deletion tests/corpora/test_usfm_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def test_tokenize() -> None:
usfm = _read_usfm()
usfm_tokenizer = UsfmTokenizer()
tokens = usfm_tokenizer.tokenize(usfm)
assert len(tokens) == 174
assert len(tokens) == 203

assert tokens[0].type is UsfmTokenType.BOOK
assert tokens[0].marker == "id"
Expand Down
2 changes: 2 additions & 0 deletions tests/testutils/corpora_test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from . import TEST_DATA_PATH

USFM_TEST_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "Tes"
USFM_TARGET_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "Target"
USFM_SOURCE_PROJECT_PATH = TEST_DATA_PATH / "usfm" / "Source"
USX_TEST_PROJECT_PATH = TEST_DATA_PATH / "usx" / "Tes"
TEXT_TEST_PROJECT_PATH = TEST_DATA_PATH / "txt"
CUSTOM_VERS_PATH = TEST_DATA_PATH / "custom.vrs"
Expand Down
11 changes: 11 additions & 0 deletions tests/testutils/data/usfm/Tes/41MATTes.SFM
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,14 @@
\v 10 \tc3-4 \qt-s |Jesus\*Chapter 2 verse 10\qt-e\*
\v 11-12
\restore restore information
\c 3
\cl PSALM 3
\s1 Section 1
\mt1 Major Title 1
\d \va (1)\va* Description
\q1
\v 1 \va 2\va* \vp 1 (2)\vp* Chapter 3 verse 1.
\q1 3.1 part 2
\b
\q1 3.1 part 3
\q1 3.1 part 4

0 comments on commit f3873bb

Please sign in to comment.