From 733858645695f43eb812a0d6e950041c895468a7 Mon Sep 17 00:00:00 2001 From: john681611 Date: Fri, 13 Oct 2023 17:27:10 +0100 Subject: [PATCH] hack get root_cres working using Neo4J --- application/database/db.py | 67 ++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/application/database/db.py b/application/database/db.py index 3e4c0b293..bb1603d93 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -5,6 +5,7 @@ UniqueIdProperty, Relationship, RelationshipTo, + RelationshipFrom, ArrayProperty, StructuredRel, db, @@ -193,9 +194,16 @@ class NeoDocument(StructuredNode): related = Relationship("NeoDocument", "RELATED", model=RelatedRel) @classmethod - def to_cre_def(self, node): + def to_cre_def(self, node, parse_links=True): raise Exception(f"Shouldn't be parsing a NeoDocument") + @classmethod + def get_links(self, links_dict): + links = [] + for key in links_dict: + links.extend([cre_defs.Link(c.to_cre_def(c, parse_links=False), key) for c in links_dict[key]]) + return links + class NeoNode(NeoDocument): doctype = StringProperty() @@ -203,7 +211,7 @@ class NeoNode(NeoDocument): hyperlink = StringProperty() @classmethod - def to_cre_def(self, node): + def to_cre_def(self, node, parse_links=True): raise Exception(f"Shouldn't be parsing a NeoNode") @@ -213,7 +221,7 @@ class NeoStandard(NeoNode): section_id = StringProperty() @classmethod - def to_cre_def(self, node) -> cre_defs.Standard: + def to_cre_def(self, node, parse_links=True) -> cre_defs.Standard: return cre_defs.Standard( name=node.name, id=node.document_id, @@ -231,7 +239,7 @@ class NeoTool(NeoStandard): tooltype = StringProperty(required=True) @classmethod - def to_cre_def(self, node) -> cre_defs.Tool: + def to_cre_def(self, node, parse_links=True) -> cre_defs.Tool: return cre_defs.Tool( name=node.name, id=node.document_id, @@ -247,7 +255,7 @@ def to_cre_def(self, node) -> cre_defs.Tool: class NeoCode(NeoNode): @classmethod - def to_cre_def(self, node) -> cre_defs.Code: + def to_cre_def(self, node, parse_links=True) -> cre_defs.Code: return cre_defs.Code( name=node.name, id=node.document_id, @@ -260,17 +268,24 @@ def to_cre_def(self, node) -> cre_defs.Code: class NeoCRE(NeoDocument): # type: ignore external_id = StringProperty() + contained_in = RelationshipFrom('NeoCRE', 'CONTAINS', model=ContainsRel) contains = RelationshipTo("NeoCRE", "CONTAINS", model=ContainsRel) linked = RelationshipTo("NeoStandard", "LINKED_TO", model=LinkedToRel) same_as = RelationshipTo("NeoStandard", "SAME", model=SameRel) @classmethod - def to_cre_def(self, node) -> cre_defs.CRE: + def to_cre_def(self, node, parse_links=True) -> cre_defs.CRE: return cre_defs.CRE( name=node.name, id=node.document_id, description=node.description, tags=node.tags, + links=self.get_links({ + 'Contains': [*node.contains, *node.contained_in], + 'Linked To': node.linked, + 'Same as': node.same_as, + 'Related': node.related + }) if parse_links else [] ) @@ -464,19 +479,19 @@ def format_segment(seg: StructuredRel, nodes): ][0] return { - "start": NEO_DB.parse_node(start_node), - "end": NEO_DB.parse_node(end_node), + "start": NEO_DB.parse_node_no_links(start_node), + "end": NEO_DB.parse_node_no_links(end_node), "relationship": relation_map[type(seg)], } def format_path_record(rec): return { - "start": NEO_DB.parse_node(rec.start_node), - "end": NEO_DB.parse_node(rec.end_node), + "start": NEO_DB.parse_node_no_links(rec.start_node), + "end": NEO_DB.parse_node_no_links(rec.end_node), "path": [format_segment(seg, rec.nodes) for seg in rec.relationships], } - return [NEO_DB.parse_node(rec) for rec in base_standard], [ + return [NEO_DB.parse_node_no_links(rec) for rec in base_standard], [ format_path_record(rec[0]) for rec in (path_records + path_records_all) ] @@ -490,6 +505,10 @@ def standards(self) -> List[str]: @staticmethod def parse_node(node: NeoDocument) -> cre_defs.Document: return node.to_cre_def(node) + + @staticmethod + def parse_node_no_links(node: NeoDocument) -> cre_defs.Document: + return node.to_cre_def(node, parse_links=False) class CRE_Graph: @@ -1514,30 +1533,8 @@ def text_search(self, text: str) -> List[Optional[cre_defs.Document]]: def get_root_cres(self): """Returns CRES that only have "Contains" links""" - linked_groups = aliased(InternalLinks) - linked_cres = aliased(InternalLinks) - cres = ( - self.session.query(CRE) - .filter( - ~CRE.id.in_( - self.session.query(InternalLinks.cre).filter( - InternalLinks.type == cre_defs.LinkTypes.Contains, - ) - ) - ) - .filter( - ~CRE.id.in_( - self.session.query(InternalLinks.group).filter( - InternalLinks.type == cre_defs.LinkTypes.PartOf, - ) - ) - ) - .all() - ) - result = [] - for c in cres: - result.extend(self.get_CREs(external_id=c.external_id)) - return result + neo_cres = NeoCRE.nodes.has(contained_in=False) + return [c.to_cre_def(c) for c in neo_cres] def get_embeddings_by_doc_type(self, doc_type: str) -> Dict[str, List[float]]: res = {}