diff --git a/dapitains/app/ingest.py b/dapitains/app/ingest.py index 91151b8..c46a2fb 100644 --- a/dapitains/app/ingest.py +++ b/dapitains/app/ingest.py @@ -80,17 +80,24 @@ def recurse(items, current_path): def get_nav( refs: List[Dict[str, Any]], paths: Dict[str, List[int]], - start_or_ref: Optional[str], - end: Optional[str], + start_or_ref: Optional[str] = None, + end: Optional[str] = None, down: Optional[int] = 1 ) -> Tuple[List[Dict[str, Any]], Optional[Dict[str, Any]], Optional[Dict[str, Any]]]: + """ Given a references set and a path set, provide the CitableUnit from start to end at down level. + + """ paths_index = list(paths.keys()) start_index, end_index = None, None - if start_or_ref: - start_index = paths_index.index(start_or_ref) if end: end_index = paths_index.index(end) + 1 + if start_or_ref: + start_index = paths_index.index(start_or_ref) + if not end: + for index, reference in enumerate(paths_index[start_index+1:]): + if len(paths[start_or_ref]) == len(paths[reference]): + end_index = index + start_index + 1 paths = dict(list(paths.items())[start_index:end_index]) diff --git a/dapitains/tei/citeStructure.py b/dapitains/tei/citeStructure.py index 5767544..bdf2338 100644 --- a/dapitains/tei/citeStructure.py +++ b/dapitains/tei/citeStructure.py @@ -47,11 +47,15 @@ class CitableUnit: node: Optional[saxonlib.PyXdmNode] = None dublinCore: Dict[str, List[str]] = field(default_factory=lambda: defaultdict(list)) extension: Dict[str, List[str]] = field(default_factory=lambda: defaultdict(list)) + level: int = 1 + parent: Optional[str] = None def json(self): out = { "citeType": self.citeType, - "ref": self.ref + "ref": self.ref, + "level": self.level, + "parent": self.parent } if self.children: out["members"] = [ @@ -189,26 +193,30 @@ def _dispatch( child_xpath: str, structure: CitableStructure, xpath_processor: saxonlib.PyXPathProcessor, - unit: CitableUnit): + unit: CitableUnit, + level: int): # target = self.generate_xpath(child.ref) if len(structure.children) == 1: self.find_refs( root=xpath_processor.evaluate_single(child_xpath), structure=structure.children[0], - unit=unit + unit=unit, + level=level ) else: self.find_refs_from_branches( root=xpath_processor.evaluate_single(child_xpath), structure=structure.children, - unit=unit + unit=unit, + level=level ) def find_refs( self, root: saxonlib.PyXdmNode, structure: CitableStructure = None, - unit: Optional[CitableUnit] = None + unit: Optional[CitableUnit] = None, + level: int = 1 ) -> List[CitableUnit]: xpath_proc = get_xpath_proc(elem=root) prefix = (unit.ref + structure.delim) if unit else "" @@ -218,7 +226,9 @@ def find_refs( for value in xpath_proc.evaluate(f"{xpath_prefix}{structure.xpath}"): child = CitableUnit( citeType=structure.citeType, - ref=f"{prefix}{value.string_value}" + ref=f"{prefix}{value.string_value}", + parent=unit.ref if unit else None, + level=level ) if structure.metadata: @@ -238,7 +248,8 @@ def find_refs( child_xpath=self.generate_xpath(child.ref), structure=structure, xpath_processor=xpath_proc, - unit=child + unit=child, + level=level+1 ) return units @@ -246,7 +257,8 @@ def find_refs_from_branches( self, root: saxonlib.PyXdmNode, structure: List[CitableStructure], - unit: Optional[CitableUnit] = None + unit: Optional[CitableUnit] = None, + level: int = 1 ) -> List[CitableUnit]: xpath_proc = get_xpath_proc(elem=root) prefix = (unit.ref) if unit else "" # ToDo: Reinject delim @@ -281,7 +293,9 @@ def compare_nodes_by_doc_order(node1, node2): for elem in unsorted: child_unit = CitableUnit( citeType=elem.struct.citeType, - ref=elem.citation + ref=elem.citation, + level=level, + parent=unit.ref if unit else None ) if unit: @@ -294,7 +308,8 @@ def compare_nodes_by_doc_order(node1, node2): child_xpath=self.generate_xpath(child_unit.ref), structure=elem.struct, xpath_processor=xpath_proc, - unit=child_unit + unit=child_unit, + level=level+1 ) return units diff --git a/tests/test_citeStructure.py b/tests/test_citeStructure.py index e73edd0..0dca40d 100644 --- a/tests/test_citeStructure.py +++ b/tests/test_citeStructure.py @@ -59,19 +59,20 @@ def test_parsing(): assert parser.generate_xpath("Luke") == "//body/div[@n='Luke']" assert [root.json() for root in parser.find_refs(root=TEI, structure=parser.units)] == [ - {'citeType': 'book', 'ref': 'Luke', 'members': [ - {'citeType': 'chapter', 'ref': 'Luke 1', 'members': [ - {'citeType': 'verse', 'ref': 'Luke 1:1'}, - {'citeType': 'verse', 'ref': 'Luke 1:2'}, - {'citeType': 'bloup', 'ref': 'Luke 1#1'} - ]} + {'citeType': 'book', 'ref': 'Luke', 'parent': None, 'level': 1, 'members': [ + {'citeType': 'chapter', 'ref': 'Luke 1', 'parent': 'Luke', 'level': 2, 'members': [ + {'citeType': 'verse', 'ref': 'Luke 1:1', 'parent': 'Luke 1', 'level': 3}, + {'citeType': 'verse', 'ref': 'Luke 1:2', 'parent': 'Luke 1', 'level': 3}, + {'citeType': 'bloup', 'ref': 'Luke 1#1', 'parent': 'Luke 1', 'level': 3} + ] + } ]}, - {'citeType': 'book', 'ref': 'Mark', 'members': [ - {'citeType': 'chapter', 'ref': 'Mark 1', 'members': [ - {'citeType': 'verse', 'ref': 'Mark 1:1'}, - {'citeType': 'verse', 'ref': 'Mark 1:2'}, - {'citeType': 'bloup', 'ref': 'Mark 1#1'}, - {'citeType': 'verse', 'ref': 'Mark 1:3'} + {'citeType': 'book', 'ref': 'Mark', 'parent': None, 'level': 1, 'members': [ + {'citeType': 'chapter', 'ref': 'Mark 1', 'parent': 'Mark', 'level': 2, 'members': [ + {'citeType': 'verse', 'ref': 'Mark 1:1', 'parent': 'Mark 1', 'level': 3}, + {'citeType': 'verse', 'ref': 'Mark 1:2', 'parent': 'Mark 1', 'level': 3}, + {'citeType': 'bloup', 'ref': 'Mark 1#1', 'parent': 'Mark 1', 'level': 3}, + {'citeType': 'verse', 'ref': 'Mark 1:3', 'parent': 'Mark 1', 'level': 3} ]} ]} ] @@ -84,17 +85,17 @@ def test_cite_data(): refs = parser.find_refs(root=TEI, structure=parser.units) refs = [ref.json() for ref in refs] assert refs == [ - {'citeType': 'book', 'ref': '1', 'dublinCore': { + {'citeType': 'book', 'ref': '1', 'parent': None, 'level': 1, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Introduction', 'Introduction'], 'http://purl.org/dc/terms/creator': ['John Doe']}}, - {'citeType': 'book', 'ref': '2', 'dublinCore': {'http://purl.org/dc/terms/title': ["Background", 'Contexte']}}, - {'citeType': 'book', 'ref': '3', 'dublinCore': { + {'citeType': 'book', 'ref': '2', 'parent': None, 'level': 1, 'dublinCore': {'http://purl.org/dc/terms/title': ["Background", 'Contexte']}}, + {'citeType': 'book', 'ref': '3', 'parent': None, 'level': 1, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Methodology', 'Méthodologie'], 'http://purl.org/dc/terms/creator': ['Albert Einstein']}}, - {'citeType': 'book', 'ref': '4', 'dublinCore': { + {'citeType': 'book', 'ref': '4', 'parent': None, 'level': 1, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Results', 'Résultats'], 'http://purl.org/dc/terms/creator': ['Isaac Newton']}}, - {'citeType': 'book', 'ref': '5', 'dublinCore': { + {'citeType': 'book', 'ref': '5', 'parent': None, 'level': 1, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Conclusion', 'Conclusion'], 'http://purl.org/dc/terms/creator': ['Marie Curie'] }}] @@ -108,24 +109,24 @@ def test_advanced_cite_data(): refs = parser.find_refs(root=TEI, structure=parser.units) refs = [ref.json() for ref in refs] assert refs == [ - {'citeType': 'part', 'ref': 'part-1', 'members': [ - {'citeType': 'book', 'ref': 'part-1.1', 'dublinCore': { + {'citeType': 'part', 'ref': 'part-1', 'parent': None, 'level': 1, 'members': [ + {'citeType': 'book', 'ref': 'part-1.1', 'parent': 'part-1', 'level': 2, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Introduction', 'Introduction'], 'http://purl.org/dc/terms/creator': ['John Doe']}}, - {'citeType': 'book', 'ref': 'part-1.2', 'dublinCore': { + {'citeType': 'book', 'ref': 'part-1.2', 'parent': 'part-1', 'level': 2, 'dublinCore': { 'http://purl.org/dc/terms/title': ["Background", 'Contexte'] }} ], 'extension': {"http://foo.bar/part": ["1"]}}, - {'citeType': 'part', 'ref': 'part-2', 'members': [ - {'citeType': 'book', 'ref': 'part-2.3', 'dublinCore': { + {'citeType': 'part', 'ref': 'part-2', 'parent': None, 'level': 1, 'members': [ + {'citeType': 'book', 'ref': 'part-2.3', 'parent': 'part-2', 'level': 2, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Methodology', 'Méthodologie'], 'http://purl.org/dc/terms/creator': ['Albert Einstein']}}, - {'citeType': 'book', 'ref': 'part-2.4', 'dublinCore': { + {'citeType': 'book', 'ref': 'part-2.4', 'parent': 'part-2', 'level': 2, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Results', 'Résultats'], 'http://purl.org/dc/terms/creator': ['Isaac Newton']}} ], 'extension': {"http://foo.bar/part": ["2"]}}, - {'citeType': 'part', 'ref': 'part-3', 'members': [ - {'citeType': 'book', 'ref': 'part-3.5', 'dublinCore': { + {'citeType': 'part', 'ref': 'part-3', 'parent': None, 'level': 1, 'members': [ + {'citeType': 'book', 'ref': 'part-3.5', 'parent': 'part-3', 'level': 2, 'dublinCore': { 'http://purl.org/dc/terms/title': ['Conclusion', 'Conclusion'], 'http://purl.org/dc/terms/creator': ['Marie Curie'] }} diff --git a/tests/test_db_create.py b/tests/test_db_create.py index e8dcdf4..edf4080 100644 --- a/tests/test_db_create.py +++ b/tests/test_db_create.py @@ -48,8 +48,12 @@ def test_simple_path(): "Mark 1:3": [1, 0, 3] } } - assert strip_members(get_member_by_path(refs[None], paths[None]["Luke"])) == {'citeType': 'book', 'ref': 'Luke'} - assert get_member_by_path(refs[None], paths[None]["Mark 1:3"]) == {'citeType': 'verse', 'ref': 'Mark 1:3'} + assert strip_members( + get_member_by_path(refs[None], paths[None]["Luke"]) + ) == {'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None}, "Check that members are stripped" + assert get_member_by_path( + refs[None], paths[None]["Mark 1:3"] + ) == {'citeType': 'verse', 'ref': 'Mark 1:3', "level": 3, "parent": "Mark 1"} def test_navigation(): @@ -59,19 +63,59 @@ def test_navigation(): for tree, obj in doc.citeStructure.items() } paths = {tree: generate_paths(ref) for tree, ref in refs.items()} + assert get_nav(refs[None], paths[None], start_or_ref=None, end=None, down=1) == ([ - {'citeType': 'book', 'ref': 'Luke'}, - {'citeType': 'book', 'ref': 'Mark'} - ], None, None) - assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Luke 1#1", down=0) == ([ - {'citeType': 'verse', 'ref': 'Luke 1:1'}, - {'citeType': 'verse', 'ref': 'Luke 1:2'}, - {'citeType': 'bloup', 'ref': 'Luke 1#1'} - ], {'citeType': 'verse', 'ref': 'Luke 1:1'}, {'citeType': 'bloup', 'ref': 'Luke 1#1'}) - assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Mark 1:2", down=0) == ([ - {'citeType': 'verse', 'ref': 'Luke 1:1'}, - {'citeType': 'verse', 'ref': 'Luke 1:2'}, - {'citeType': 'bloup', 'ref': 'Luke 1#1'}, - {'citeType': 'verse', 'ref': 'Mark 1:1'}, - {'citeType': 'verse', 'ref': 'Mark 1:2'} - ], {'citeType': 'verse', 'ref': 'Luke 1:1'}, {'citeType': 'verse', 'ref': 'Mark 1:2'}) \ No newline at end of file + {'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None}, + {'citeType': 'book', 'ref': 'Mark', "level": 1, "parent": None} + ], None, None), "Check that base function works" + + assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Luke 1#1", down=0) == ( + [ + {'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"}, + {'citeType': 'verse', 'ref': 'Luke 1:2', "level": 3, "parent": "Luke 1"}, + {'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"} + ], + {'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"}, + {'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"} + ), "Check that ?start/end works" + + assert get_nav(refs[None], paths[None], start_or_ref="Luke 1:1", end="Mark 1:2", down=0) == ( + [ + {'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"}, + {'citeType': 'verse', 'ref': 'Luke 1:2', "level": 3, "parent": "Luke 1"}, + {'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"}, + {'citeType': 'verse', 'ref': 'Mark 1:1', "level": 3, "parent": "Mark 1"}, + {'citeType': 'verse', 'ref': 'Mark 1:2', "level": 3, "parent": "Mark 1"} + ], + {'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"}, + {'citeType': 'verse', 'ref': 'Mark 1:2', "level": 3, "parent": "Mark 1"} + ), "Check that ?start/end works across parents" + + assert get_nav(refs[None], paths[None], start_or_ref="Luke 1", down=1) == ( + [ + {'citeType': 'verse', 'ref': 'Luke 1:1', "level": 3, "parent": "Luke 1"}, + {'citeType': 'verse', 'ref': 'Luke 1:2', "level": 3, "parent": "Luke 1"}, + {'citeType': 'bloup', 'ref': 'Luke 1#1', "level": 3, "parent": "Luke 1"} + ], + {'citeType': 'chapter', 'ref': 'Luke 1', "level": 2, "parent": "Luke"}, + None + ), "Check that ?ref works" + + assert get_nav(refs[None], paths[None], start_or_ref="Luke", down=1) == ( + [ + {'citeType': 'chapter', 'ref': 'Luke 1', "level": 2, "parent": "Luke"}, + ], + {'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None}, + None + ), "Check that ?ref works" + + assert get_nav(refs[None], paths[None], start_or_ref=None, end=None, down=2) == ( + [ + {'citeType': 'book', 'ref': 'Luke', "level": 1, "parent": None}, + {'citeType': 'chapter', 'ref': 'Luke 1', "level": 2, "parent": "Luke"}, + {'citeType': 'book', 'ref': 'Mark', "level": 1, "parent": None}, + {'citeType': 'chapter', 'ref': 'Mark 1', "level": 2, "parent": "Mark"} + ], + None, + None + ), "Check that down=2 works" \ No newline at end of file