fix bug 552 by ensuring that ALL db cre links are from Higher->Lower … (

#553) * fix bug 552 by ensuring that ALL db cre links are from Higher->Lower and of type either contains or related, no more lower->higher of type partof since this is prone to bugs, part of is now an api construct * nit: replace variable with constant enum in register_cre, code quality only * bugfix/552-existing-graph-contains-cycle-the-graph-changes * nit: fix add_link method parameter 'type' shadowing internal 'type' method * feature 555 remove obsolete SAME/SAM linktype * make inmemory graph a singleton, add comments in cre retrieval tests and ensure all graph relationships are Contains or Equal with the CRE-id and Node-id instead of the db ids * add tests for the inmemory graph * move cycle detection to the graph 'add_edge' method to fix databases with existing cycles
OWASP · Sep 29, 2024 · bff9a2a · bff9a2a
1 parent 77ae860
commit bff9a2a
Show file tree

Hide file tree

Showing 21 changed files with 985 additions and 425 deletions.
diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py
@@ -39,8 +39,8 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
     then map the one who doesn't to the CRE
     if both don't map to anything, just add them in the db as unlinked nodes
     """
-    if not node:
-        raise ValueError("node is None")
+    if not node or not issubclass(node.__class__, defs.Node):
+        raise ValueError(f"node is None or not of type Node, node: {node}")
 
     linked_node = collection.add_node(node)
     if node.embeddings:
@@ -68,23 +68,23 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
             db_link = collection.add_node(link.document)
             if cres:
                 for cre in cres:
-                    collection.add_link(cre=cre, node=linked_node, type=link.ltype)
+                    collection.add_link(cre=cre, node=linked_node, ltype=link.ltype)
                     for unlinked_standard in cre_less_nodes:  # if anything in this
                         collection.add_link(
                             cre=cre,
                             node=db.dbNodeFromNode(unlinked_standard),
-                            type=link.ltype,
+                            ltype=link.ltype,
                         )
             else:
                 cres = collection.find_cres_of_node(linked_node)
                 if cres:
                     for cre in cres:
-                        collection.add_link(cre=cre, node=db_link, type=link.ltype)
+                        collection.add_link(cre=cre, node=db_link, ltype=link.ltype)
                         for unlinked_node in cre_less_nodes:
                             collection.add_link(
                                 cre=cre,
                                 node=db.dbNodeFromNode(unlinked_node),
-                                type=link.ltype,
+                                ltype=link.ltype,
                             )
                 else:  # if neither the root nor a linked node has a CRE, add both as unlinked nodes
                     cre_less_nodes.append(link.document)
@@ -96,13 +96,13 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
             # dbcre,_ = register_cre(link.document, collection) # CREs are idempotent
             c = collection.get_CREs(name=link.document.name)[0]
             dbcre = db.dbCREfromCRE(c)
-            collection.add_link(dbcre, linked_node, type=link.ltype)
+            collection.add_link(dbcre, linked_node, ltype=link.ltype)
             cres_added.append(dbcre)
             for unlinked_standard in cre_less_nodes:  # if anything in this
                 collection.add_link(
                     cre=dbcre,
                     node=db.dbNodeFromNode(unlinked_standard),
-                    type=link.ltype,
+                    ltype=link.ltype,
                 )
             cre_less_nodes = []
 
@@ -117,18 +117,34 @@ def register_cre(cre: defs.CRE, collection: db.Node_collection) -> Tuple[db.CRE,
     dbcre: db.CRE = collection.add_cre(cre)
     for link in cre.links:
         if type(link.document) == defs.CRE:
-            logger.info(f"{link.document.id} {link.ltype} {cre.id}")
-            lower_cre, _ = register_cre(link.document, collection)
-            collection.add_internal_link(
-                higher=dbcre,
-                lower=lower_cre,
-                type=link.ltype,
-            )
+            other_cre, _ = register_cre(link.document, collection)
+
+            # the following flips the PartOf relationship so that we only have contains relationship in the database
+            if link.ltype == defs.LinkTypes.Contains:
+                collection.add_internal_link(
+                    higher=dbcre,
+                    lower=other_cre,
+                    ltype=defs.LinkTypes.Contains,
+                )
+            elif link.ltype == defs.LinkTypes.PartOf:
+                collection.add_internal_link(
+                    higher=other_cre,
+                    lower=dbcre,
+                    ltype=defs.LinkTypes.Contains,
+                )
+            elif link.ltype == defs.LinkTypes.Related:
+                collection.add_internal_link(
+                    higher=other_cre,
+                    lower=dbcre,
+                    ltype=defs.LinkTypes.Related,
+                )
+            else:
+                raise ValueError(f"Unknown link type {link.ltype}")
         else:
             collection.add_link(
                 cre=dbcre,
                 node=register_node(node=link.document, collection=collection),
-                type=link.ltype,
+                ltype=link.ltype,
             )
     return dbcre, existing
 
@@ -444,6 +460,7 @@ def download_cre_from_upstream(creid: str):
         cre = defs.Document.from_dict(credict)
         if cre.id in imported_cres:
             return
+
         register_cre(cre, collection)
         imported_cres[cre.id] = ""
         for link in cre.links:
@@ -520,8 +537,6 @@ def run(args: argparse.Namespace) -> None:  # pragma: no cover
         )
     elif args.add and args.cre_loc and not args.from_spreadsheet:
         add_from_disk(cache_loc=args.cache_file, cre_loc=args.cre_loc)
-    elif args.print_graph:
-        print_graph()
     # elif args.review and args.osib_in:
     #     review_osib_from_file(
     #         file_loc=args.osib_in, cache=args.cache_file, cre_loc=args.cre_loc
@@ -623,8 +638,6 @@ def run(args: argparse.Namespace) -> None:  # pragma: no cover
 
     if args.generate_embeddings:
         generate_embeddings(args.cache_file)
-    if args.owasp_proj_meta:
-        owasp_metadata_to_cre(args.owasp_proj_meta)
     if args.populate_neo4j_db:
         populate_neo4j_db(args.cache_file)
     if args.start_worker: