From 495bf8bbce6b6a969072e0627cfaace5d820e718 Mon Sep 17 00:00:00 2001 From: John Harvey <10814889+john681611@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:14:01 +0100 Subject: [PATCH] Neo4j ORM (#408) * Make Direct Dark Green * Update GapAnalysis.tsx Signed-off-by: John Harvey <10814889+john681611@users.noreply.github.com> * Trial implementation of Neo4J ORM * Populate Neo4j DB using model * More progress in conversion * get ORM working * Fix tests * Remove old NeoDB driver connection * Fix: id issue * Fix: Tags strings splitting * linting fixes * Remove NeomodelPath reference * Revert rebase bug --------- Signed-off-by: John Harvey <10814889+john681611@users.noreply.github.com> --- .gitignore | 1 + README.md | 4 +- application/database/db.py | 427 +++++++++++++++++++---------------- application/tests/db_test.py | 127 +++++------ requirements.txt | 1 + 5 files changed, 296 insertions(+), 264 deletions(-) diff --git a/.gitignore b/.gitignore index d6db6dd2b..a1f5606a8 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ Vagrantfile ## act secrets .secrets/ +.env ### conventions ### venv/ diff --git a/README.md b/README.md index 6e1925678..7ca1ec1c3 100644 --- a/README.md +++ b/README.md @@ -68,9 +68,7 @@ Alternatively, you can use the dockerfile with Some features like Gap Analysis require a neo4j DB running you can start this with
make docker-neo4jenviroment varaibles for app to connect to neo4jDB (default): -- NEO4J_URI (localhost) -- NEO4J_USR (neo4j) -- NEO4J_PASS (password) +- NEO4J_BOLT_URL (bolt://neo4j:password@localhost:7687) To run the web application for production you need gunicorn and you can run from within the cre_sync dir
make prod-rundiff --git a/application/database/db.py b/application/database/db.py index 4a0fa40c9..f9ca75d8b 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,5 +1,14 @@ -from neo4j import GraphDatabase -import neo4j +from neomodel import ( + config, + StructuredNode, + StringProperty, + UniqueIdProperty, + Relationship, + RelationshipTo, + ArrayProperty, + StructuredRel, + db, +) from sqlalchemy.orm import aliased import os import logging @@ -14,7 +23,6 @@ from application.utils import file from flask_sqlalchemy.model import DefaultMeta from sqlalchemy import func -from sqlalchemy.sql.expression import desc # type: ignore import uuid from application.utils.gap_analysis import get_path_score @@ -160,6 +168,112 @@ class Embeddings(BaseModel): # type: ignore ) +class RelatedRel(StructuredRel): + pass + + +class ContainsRel(StructuredRel): + pass + + +class LinkedToRel(StructuredRel): + pass + + +class SameRel(StructuredRel): + pass + + +class NeoDocument(StructuredNode): + document_id = UniqueIdProperty() + name = StringProperty(required=True) + description = StringProperty(required=True) + tags = ArrayProperty(StringProperty()) + doctype = StringProperty(required=True) + related = Relationship("NeoDocument", "RELATED", model=RelatedRel) + + @classmethod + def to_cre_def(self, node): + raise Exception(f"Shouldn't be parsing a NeoDocument") + + +class NeoNode(NeoDocument): + doctype = StringProperty() + version = StringProperty(required=True) + hyperlink = StringProperty() + + @classmethod + def to_cre_def(self, node): + raise Exception(f"Shouldn't be parsing a NeoNode") + + +class NeoStandard(NeoNode): + section = StringProperty() + subsection = StringProperty(required=True) + section_id = StringProperty() + + @classmethod + def to_cre_def(self, node) -> cre_defs.Standard: + return cre_defs.Standard( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + section=node.section, + sectionID=node.section_id, + subsection=node.subsection, + ) + + +class NeoTool(NeoStandard): + tooltype = StringProperty(required=True) + + @classmethod + def to_cre_def(self, node) -> cre_defs.Tool: + return cre_defs.Tool( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + section=node.section, + sectionID=node.section_id, + subsection=node.subsection, + ) + + +class NeoCode(NeoNode): + @classmethod + def to_cre_def(self, node) -> cre_defs.Code: + return cre_defs.Code( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + ) + + +class NeoCRE(NeoDocument): # type: ignore + external_id = StringProperty() + contains = RelationshipTo("NeoCRE", "CONTAINS", model=ContainsRel) + linked = RelationshipTo("NeoStandard", "LINKED_TO", model=LinkedToRel) + same_as = RelationshipTo("NeoStandard", "SAME", model=SameRel) + + @classmethod + def to_cre_def(self, node) -> cre_defs.CRE: + return cre_defs.CRE( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + ) + + class NEO_DB: __instance = None @@ -171,21 +285,9 @@ def instance(self): if self.__instance is None: self.__instance = self.__new__(self) - URI = os.getenv("NEO4J_URI") or "neo4j://localhost:7687" - AUTH = ( - os.getenv("NEO4J_USR") or "neo4j", - os.getenv("NEO4J_PASS") or "password", + config.DATABASE_URL = ( + os.getenv("NEO4J_BOLT_URL") or "bolt://neo4j:password@localhost:7687" ) - self.driver = GraphDatabase.driver(URI, auth=AUTH) - - try: - self.driver.verify_connectivity() - self.connected = True - except neo4j.exceptions.ServiceUnavailable: - logger.error( - "NEO4J ServiceUnavailable error - disabling neo4j related features" - ) - return self.__instance def __init__(sel): @@ -221,238 +323,173 @@ def populate_DB(self, session) -> nx.Graph: @classmethod def add_cre(self, dbcre: CRE): - if not self.connected: - return - self.driver.execute_query( - "MERGE (n:CRE {id: $nid, name: $name, description: $description, doctype: $doctype, links: $links, metadata: $metadata, tags: $tags})", - name=dbcre.name, - doctype="CRE", # dbcre.ntype, - nid=dbcre.id, - description=dbcre.description, - links=[], # dbcre.links, - tags=dbcre.tags, - metadata="{}", # dbcre.metadata, - database_="neo4j", + NeoCRE.create_or_update( + { + "name": dbcre.name, + "doctype": "CRE", # dbcre.ntype, + "document_id": dbcre.id, + "description": dbcre.description, + "links": [], # dbcre.links, + "tags": [dbcre.tags] if isinstance(dbcre.tags, str) else dbcre.tags, + } ) @classmethod def add_dbnode(self, dbnode: Node): - if not self.connected: - return if dbnode.ntype == "Standard": - self.driver.execute_query( - "MERGE (n:Standard {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink})", - name=dbnode.name, - doctype=dbnode.ntype, - nid=dbnode.id, - description=dbnode.description, - links=[], # dbnode.links, - tags=dbnode.tags, - metadata="{}", # dbnode.metadata, - hyperlink="", # dbnode.hyperlink or "", - version=dbnode.version or "", - section=dbnode.section, - sectionID=dbnode.section_id, # dbnode.sectionID, - subsection=dbnode.subsection or "", - database_="neo4j", + NeoStandard.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description or "", + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + "section": dbnode.section or "", + "section_id": dbnode.section_id or "", + "subsection": dbnode.subsection or "", + } ) return if dbnode.ntype == "Tool": - self.driver.execute_query( - "MERGE (n:Tool {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink, tooltype: $tooltype})", - name=dbnode.name, - doctype=dbnode.ntype, - nid=dbnode.id, - description=dbnode.description, - links=[], # dbnode.links, - tags=dbnode.tags, - metadata="{}", # dbnode.metadata, - hyperlink="", # dbnode.hyperlink or "", - version=dbnode.version or "", - section=dbnode.section, - sectionID=dbnode.section_id, # dbnode.sectionID, - subsection=dbnode.subsection or "", - tooltype="", # dbnode.tooltype, - database_="neo4j", + NeoTool.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description, + "links": [], # dbnode.links, + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "metadata": "{}", # dbnode.metadata, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + "section": dbnode.section, + "section_id": dbnode.section_id, # dbnode.sectionID, + "subsection": dbnode.subsection or "", + "tooltype": "", # dbnode.tooltype, + } ) return if dbnode.ntype == "Code": - self.driver.execute_query( - "MERGE (n:Code {id: $nid, name: $name, section: $section, sectionID: $sectionID, subsection: $subsection, tags: $tags, version: $version, description: $description, doctype: $doctype, links: $links, metadata: $metadata, hyperlink: $hyperlink})", - name=dbnode.name, - doctype=dbnode.ntype, - nid=dbnode.id, - description=dbnode.description, - links=[], # dbnode.links, - tags=dbnode.tags, - metadata="{}", # dbnode.metadata, - hyperlink="", # dbnode.hyperlink or "", - version=dbnode.version or "", + NeoCode.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description, + "links": [], # dbnode.links, + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "metadata": "{}", # dbnode.metadata, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + } ) return raise Exception(f"Unknown DB type: {dbnode.ntype}") @classmethod def link_CRE_to_CRE(self, id1, id2, link_type): - if not self.connected: + cre1 = NeoCRE.nodes.get(document_id=id1) + cre2 = NeoCRE.nodes.get(document_id=id2) + + if link_type == "Contains": + cre1.contains.connect(cre2) return - self.driver.execute_query( - "MATCH (a:CRE), (b:CRE) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=id1, - bID=id2, - relType=str.upper(link_type).replace(" ", "_"), - database_="neo4j", - ) + if link_type == "Related": + cre1.related.connect(cre2) + return + raise Exception(f"Unknown relation type {link_type}") @classmethod def link_CRE_to_Node(self, CRE_id, node_id, link_type): - if not self.connected: + cre = NeoCRE.nodes.get(document_id=CRE_id) + node = NeoNode.nodes.get(document_id=node_id) + if link_type == "Linked To": + cre.linked.connect(node) return - self.driver.execute_query( - "MATCH (a:CRE), (b:Standard|Tool) " - "WHERE a.id = $aID AND b.id = $bID " - "CALL apoc.create.relationship(a,$relType, {},b) " - "YIELD rel " - "RETURN rel", - aID=CRE_id, - bID=node_id, - relType=str.upper(link_type).replace(" ", "_"), - database_="neo4j", - ) + if link_type == "SAME": + cre.same_as.connect(node) + return + raise Exception(f"Unknown relation type {link_type}") @classmethod def gap_analysis(self, name_1, name_2): - if not self.connected: - return None, None - base_standard, _, _ = self.driver.execute_query( - """ - MATCH (BaseStandard:Standard|Tool {name: $name1}) - RETURN BaseStandard - """, - name1=name_1, - database_="neo4j", - ) + base_standard = NeoStandard.nodes.filter(name=name_1) - path_records_all, _, _ = self.driver.execute_query( + path_records_all, _ = db.cypher_query( """ - OPTIONAL MATCH (BaseStandard:Standard|Tool {name: $name1}) - OPTIONAL MATCH (CompareStandard:Standard|Tool {name: $name2}) + OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) + OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard)) WITH p - WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) RETURN p """, - name1=name_1, - name2=name_2, - database_="neo4j", + {"name1": name_1, "name2": name_2}, + resolve_objects=True, ) - path_records, _, _ = self.driver.execute_query( + + path_records, _ = db.cypher_query( """ - OPTIONAL MATCH (BaseStandard:Standard|Tool {name: $name1}) - OPTIONAL MATCH (CompareStandard:Standard|Tool {name: $name2}) + OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) + OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) WITH p - WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:CRE or n = BaseStandard or n = CompareStandard) + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE n:NeoCRE or n = BaseStandard or n = CompareStandard) RETURN p """, - name1=name_1, - name2=name_2, - database_="neo4j", + {"name1": name_1, "name2": name_2}, + resolve_objects=True, ) - def format_segment(seg): + def format_segment(seg: StructuredRel, nodes): + relation_map = { + RelatedRel: "RELATED", + ContainsRel: "CONTAINS", + LinkedToRel: "LINKED_TO", + SameRel: "SAME", + } + start_node = [ + node for node in nodes if node.element_id == seg._start_node_element_id + ][0] + end_node = [ + node for node in nodes if node.element_id == seg._end_node_element_id + ][0] + return { - "start": NEO_DB.parse_node(seg.start_node), - "end": NEO_DB.parse_node(seg.end_node), - "relationship": seg.type, + "start": NEO_DB.parse_node(start_node), + "end": NEO_DB.parse_node(end_node), + "relationship": relation_map[type(seg)], } def format_path_record(rec): return { "start": NEO_DB.parse_node(rec.start_node), "end": NEO_DB.parse_node(rec.end_node), - "path": [format_segment(seg) for seg in rec.relationships], + "path": [format_segment(seg, rec.nodes) for seg in rec.relationships], } - return [NEO_DB.parse_node(rec["BaseStandard"]) for rec in base_standard], [ - format_path_record(rec["p"]) for rec in (path_records + path_records_all) + return [NEO_DB.parse_node(rec) for rec in base_standard], [ + format_path_record(rec[0]) for rec in (path_records + path_records_all) ] @classmethod def standards(self) -> List[str]: - if not self.connected: - return - records, _, _ = self.driver.execute_query( - "MATCH (n:Standard|Tool) " "RETURN collect(distinct n.name)", - database_="neo4j", - ) - return records[0][0] + tools = NeoTool.nodes.all() + standards = NeoStandard.nodes.all() - @staticmethod - def parse_node(node: neo4j.graph.Node) -> cre_defs.Document: - name = node["name"] - id = node["id"] if "id" in node else None - description = node["description"] if "description" in node else None - # links = [self.parse_link(link) for link in node["links"]] - tags = node["tags"] - # metadata = node["metadata"] - if cre_defs.Credoctypes.Code.value in node.labels: - return cre_defs.Code( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - ) - if cre_defs.Credoctypes.Standard.value in node.labels: - return cre_defs.Standard( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - section=node["section"], - sectionID=node["sectionID"], - subsection=(node["subsection"] if "subsection" in node else None), - ) - if cre_defs.Credoctypes.Tool.value in node.labels: - return cre_defs.Tool( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - # hyperlink=(node["hyperlink"] if "hyperlink" in node else None), - version=(node["version"] if "version" in node else None), - section=node["section"], - sectionID=node["sectionID"], - subsection=(node["subsection"] if "subsection" in node else None), - ) - if cre_defs.Credoctypes.CRE.value in node.labels: - return cre_defs.CRE( - name=name, - id=id, - description=description, - # links=links, - tags=tags, - # metadata=metadata, - ) - raise Exception(f"Unknown node {node.labels}") + return list(set([x.name for x in tools] + [x.name for x in standards])) - # @classmethod - # def parse_link(self, link): - # return cre_defs.Link(ltype=link["ltype"], tags=link["tags"]) + @staticmethod + def parse_node(node: NeoDocument) -> cre_defs.Document: + return node.to_cre_def(node) class CRE_Graph: diff --git a/application/tests/db_test.py b/application/tests/db_test.py index d79671bc2..6fafe162d 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -1315,25 +1315,22 @@ def test_neo_db_parse_node_code(self): description = "description" tags = "tags" version = "version" + hyperlink = "version" expected = defs.Code( name=name, id=id, description=description, tags=tags, version=version, + hyperlink=hyperlink, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.Code.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - "version": version, - }, + graph_node = db.NeoCode( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + hyperlink=hyperlink, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) @@ -1346,6 +1343,7 @@ def test_neo_db_parse_node_standard(self): section = "section" sectionID = "sectionID" subsection = "subsection" + hyperlink = "version" expected = defs.Standard( name=name, id=id, @@ -1355,22 +1353,18 @@ def test_neo_db_parse_node_standard(self): section=section, sectionID=sectionID, subsection=subsection, + hyperlink=hyperlink, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.Standard.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - "version": version, - "section": section, - "sectionID": sectionID, - "subsection": subsection, - }, + graph_node = db.NeoStandard( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + section=section, + section_id=sectionID, + subsection=subsection, + hyperlink=hyperlink, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) @@ -1383,6 +1377,7 @@ def test_neo_db_parse_node_tool(self): section = "section" sectionID = "sectionID" subsection = "subsection" + hyperlink = "version" expected = defs.Tool( name=name, id=id, @@ -1392,22 +1387,18 @@ def test_neo_db_parse_node_tool(self): section=section, sectionID=sectionID, subsection=subsection, + hyperlink=hyperlink, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.Tool.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - "version": version, - "section": section, - "sectionID": sectionID, - "subsection": subsection, - }, + graph_node = db.NeoTool( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + section=section, + section_id=sectionID, + subsection=subsection, + hyperlink=hyperlink, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) @@ -1422,41 +1413,45 @@ def test_neo_db_parse_node_cre(self): description=description, tags=tags, ) - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=[defs.Credoctypes.CRE.value], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - }, + graph_node = db.NeoCRE( + name=name, + document_id=id, + description=description, + tags=tags, ) self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) - def test_neo_db_parse_node_unknown(self): + def test_neo_db_parse_node_Document(self): name = "name" id = "id" description = "description" tags = "tags" - graph_node = neo4j.graph.Node( - None, - "123", - "id", - n_labels=["ABC"], - properties={ - "name": name, - "id": id, - "description": description, - "tags": tags, - }, + graph_node = db.NeoDocument( + name=name, + document_id=id, + description=description, + tags=tags, + ) + with self.assertRaises(Exception) as cm: + db.NEO_DB.parse_node(graph_node) + + self.assertEqual(str(cm.exception), "Shouldn't be parsing a NeoDocument") + + def test_neo_db_parse_node_Node(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + graph_node = db.NeoNode( + name=name, + document_id=id, + description=description, + tags=tags, ) with self.assertRaises(Exception) as cm: db.NEO_DB.parse_node(graph_node) - self.assertEqual(str(cm.exception), "Unknown node frozenset({'ABC'})") + self.assertEqual(str(cm.exception), "Shouldn't be parsing a NeoNode") def test_get_embeddings_by_doc_type_paginated(self): """Given: a range of embedding for Nodes and a range of embeddings for CREs diff --git a/requirements.txt b/requirements.txt index 6659a57bf..4fcc0a936 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,7 @@ compliance-trestle nose==1.3.7 numpy==1.23.0 neo4j +neomodel openapi-schema-validator==0.3.4 openapi-spec-validator==0.5.1 openpyxl==3.1.0