Skip to content

Commit

Permalink
fix bug 552 by ensuring that ALL db cre links are from Higher->Lower … (
Browse files Browse the repository at this point in the history
#553)

* fix bug 552 by ensuring that ALL db cre links are from Higher->Lower and of type either contains or related, no more lower->higher of type partof since this is prone to bugs, part of is now an api construct

* nit: replace variable with constant enum in register_cre, code quality only

* bugfix/552-existing-graph-contains-cycle-the-graph-changes

* nit: fix add_link method parameter 'type' shadowing internal 'type' method

* feature 555 remove obsolete SAME/SAM linktype

* make inmemory graph a singleton, add comments in cre retrieval tests and ensure all graph relationships are Contains or Equal with the CRE-id and Node-id instead of the db ids

* add tests for the inmemory graph

* move cycle detection to the graph 'add_edge' method to fix databases with existing cycles
  • Loading branch information
northdpole authored Sep 29, 2024
1 parent 77ae860 commit bff9a2a
Show file tree
Hide file tree
Showing 21 changed files with 985 additions and 425 deletions.
53 changes: 33 additions & 20 deletions application/cmd/cre_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
then map the one who doesn't to the CRE
if both don't map to anything, just add them in the db as unlinked nodes
"""
if not node:
raise ValueError("node is None")
if not node or not issubclass(node.__class__, defs.Node):
raise ValueError(f"node is None or not of type Node, node: {node}")

linked_node = collection.add_node(node)
if node.embeddings:
Expand Down Expand Up @@ -68,23 +68,23 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
db_link = collection.add_node(link.document)
if cres:
for cre in cres:
collection.add_link(cre=cre, node=linked_node, type=link.ltype)
collection.add_link(cre=cre, node=linked_node, ltype=link.ltype)
for unlinked_standard in cre_less_nodes: # if anything in this
collection.add_link(
cre=cre,
node=db.dbNodeFromNode(unlinked_standard),
type=link.ltype,
ltype=link.ltype,
)
else:
cres = collection.find_cres_of_node(linked_node)
if cres:
for cre in cres:
collection.add_link(cre=cre, node=db_link, type=link.ltype)
collection.add_link(cre=cre, node=db_link, ltype=link.ltype)
for unlinked_node in cre_less_nodes:
collection.add_link(
cre=cre,
node=db.dbNodeFromNode(unlinked_node),
type=link.ltype,
ltype=link.ltype,
)
else: # if neither the root nor a linked node has a CRE, add both as unlinked nodes
cre_less_nodes.append(link.document)
Expand All @@ -96,13 +96,13 @@ def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node:
# dbcre,_ = register_cre(link.document, collection) # CREs are idempotent
c = collection.get_CREs(name=link.document.name)[0]
dbcre = db.dbCREfromCRE(c)
collection.add_link(dbcre, linked_node, type=link.ltype)
collection.add_link(dbcre, linked_node, ltype=link.ltype)
cres_added.append(dbcre)
for unlinked_standard in cre_less_nodes: # if anything in this
collection.add_link(
cre=dbcre,
node=db.dbNodeFromNode(unlinked_standard),
type=link.ltype,
ltype=link.ltype,
)
cre_less_nodes = []

Expand All @@ -117,18 +117,34 @@ def register_cre(cre: defs.CRE, collection: db.Node_collection) -> Tuple[db.CRE,
dbcre: db.CRE = collection.add_cre(cre)
for link in cre.links:
if type(link.document) == defs.CRE:
logger.info(f"{link.document.id} {link.ltype} {cre.id}")
lower_cre, _ = register_cre(link.document, collection)
collection.add_internal_link(
higher=dbcre,
lower=lower_cre,
type=link.ltype,
)
other_cre, _ = register_cre(link.document, collection)

# the following flips the PartOf relationship so that we only have contains relationship in the database
if link.ltype == defs.LinkTypes.Contains:
collection.add_internal_link(
higher=dbcre,
lower=other_cre,
ltype=defs.LinkTypes.Contains,
)
elif link.ltype == defs.LinkTypes.PartOf:
collection.add_internal_link(
higher=other_cre,
lower=dbcre,
ltype=defs.LinkTypes.Contains,
)
elif link.ltype == defs.LinkTypes.Related:
collection.add_internal_link(
higher=other_cre,
lower=dbcre,
ltype=defs.LinkTypes.Related,
)
else:
raise ValueError(f"Unknown link type {link.ltype}")
else:
collection.add_link(
cre=dbcre,
node=register_node(node=link.document, collection=collection),
type=link.ltype,
ltype=link.ltype,
)
return dbcre, existing

Expand Down Expand Up @@ -444,6 +460,7 @@ def download_cre_from_upstream(creid: str):
cre = defs.Document.from_dict(credict)
if cre.id in imported_cres:
return

register_cre(cre, collection)
imported_cres[cre.id] = ""
for link in cre.links:
Expand Down Expand Up @@ -520,8 +537,6 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover
)
elif args.add and args.cre_loc and not args.from_spreadsheet:
add_from_disk(cache_loc=args.cache_file, cre_loc=args.cre_loc)
elif args.print_graph:
print_graph()
# elif args.review and args.osib_in:
# review_osib_from_file(
# file_loc=args.osib_in, cache=args.cache_file, cre_loc=args.cre_loc
Expand Down Expand Up @@ -623,8 +638,6 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover

if args.generate_embeddings:
generate_embeddings(args.cache_file)
if args.owasp_proj_meta:
owasp_metadata_to_cre(args.owasp_proj_meta)
if args.populate_neo4j_db:
populate_neo4j_db(args.cache_file)
if args.start_worker:
Expand Down
Loading

0 comments on commit bff9a2a

Please sign in to comment.