diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 7b0cbda8c..4d167a4ed 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -7,6 +7,13 @@ jobs: steps: - name: Check out code uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.11.4' + cache: 'pip' + - uses: actions/setup-node@v3 + with: + cache: 'yarn' - name: Install python dependencies run: sudo apt-get update && sudo apt-get install -y python3-setuptools python3-pip chromium-browser libgbm1 && make install-deps - name: Test-e2e diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 94bb1ca15..531b9b35b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,9 +7,14 @@ jobs: steps: - name: Check out code uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: '3.11.4' + cache: 'pip' + - uses: actions/setup-node@v3 + with: + cache: 'yarn' - name: Install python dependencies run: sudo apt-get update && sudo apt-get install -y python3-setuptools python3-pip && make install-deps - name: Test run: make test - - name: Test-e2e - run: make e2e \ No newline at end of file diff --git a/.gitignore b/.gitignore index cbf1dd8c1..a1f5606a8 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ Vagrantfile ## act secrets .secrets/ +.env ### conventions ### venv/ @@ -30,4 +31,7 @@ yarn-error.log coverage/ ### Dev db -standards_cache.sqlite \ No newline at end of file +standards_cache.sqlite + +### Neo4j +neo4j/ \ No newline at end of file diff --git a/Makefile b/Makefile index 1e9f86ac7..f78d2dce6 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,38 @@ .ONESHELL: -.PHONY: dev-run run test covers install-deps dev docker lint frontend clean all +.PHONY: run test covers install-deps dev docker lint frontend clean all prod-run: cp cres/db.sqlite standards_cache.sqlite; gunicorn cre:app --log-file=- -dev-run: - . ./venv/bin/activate && FLASK_APP=cre.py FLASK_CONFIG=development flask run +docker-neo4j: + docker start cre-neo4j 2>/dev/null || docker run -d --name cre-neo4j --env NEO4J_PLUGINS='["apoc"]' --env NEO4J_AUTH=neo4j/password --volume=`pwd`/.neo4j/data:/data --volume=`pwd`/.neo4j/logs:/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 neo4j + +docker-redis: + docker start redis-stack 2>/dev/null || docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest + +start-containers: docker-neo4j docker-redis + +start-worker: + . ./venv/bin/activate + FLASK_APP=`pwd`/cre.py python cre.py --start_worker + +dev-flask: + . ./venv/bin/activate + FLASK_APP=`pwd`/cre.py FLASK_CONFIG=development flask run + e2e: yarn build [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py export FLASK_CONFIG=development - fFLASK_CONFIG=development flask run& - + flask run& + yarn test:e2e killall yarn killall flask + test: [ -d "./venv" ] && . ./venv/bin/activate export FLASK_APP=$(CURDIR)/cre.py @@ -79,4 +94,8 @@ import-all: [ -d "./venv" ] && . ./venv/bin/activate rm -rf standards_cache.sqlite && make migrate-upgrade && export FLASK_APP=$(CURDIR)/cre.py && python cre.py --add --from_spreadsheet https://docs.google.com/spreadsheets/d/1eZOEYgts7d_-Dr-1oAbogPfzBLh6511b58pX3b59kvg && python cre.py --generate_embeddings && python cre.py --zap_in --cheatsheets_in --github_tools_in --capec_in --owasp_secure_headers_in --pci_dss_4_in --juiceshop_in && python cre.py --generate_embeddings +import-neo4j: + [ -d "./venv" ] && . ./venv/bin/activate + export FLASK_APP=$(CURDIR)/cre.py && python cre.py --populate_neo4j_db + all: clean lint test dev dev-run diff --git a/Procfile b/Procfile index 52d23bfd2..8537c1d95 100644 --- a/Procfile +++ b/Procfile @@ -1 +1,2 @@ -web: gunicorn cre:app --log-file=- \ No newline at end of file +web: gunicorn cre:app --log-file=-g +worker: FLASK_APP=`pwd`/cre.py python cre.py --start_worker \ No newline at end of file diff --git a/README.md b/README.md index e46c683a8..2b7dfd04a 100644 --- a/README.md +++ b/README.md @@ -60,11 +60,22 @@ To add a remote spreadsheet to your local database you can run
python cre.py --add --from_spreadsheet < google sheets url>
To run the web application for development you can run -
make dev-run
+
+$ make start-containers
+$ make start-worker 
+
+# in a seperate shell
+$ make dev-flask
+
Alternatively, you can use the dockerfile with
make docker && make docker-run
+Some features like Gap Analysis require a neo4j DB running, you can start this with +
make docker-neo4j
+enviroment varaibles for app to connect to neo4jDB (default): +- NEO4J_URL (neo4j//neo4j:password@localhost:7687) + To run the web application for production you need gunicorn and you can run from within the cre_sync dir
make prod-run
@@ -84,4 +95,4 @@ Please see [Contributing](CONTRIBUTING.md) for contributing instructions Roadmap --- -For a roadmap of what we would like to be done please see the [issues](https://github.com/OWASP/common-requirement-enumeration/issues). +For a roadmap of what we would like to be done please see the [issues](https://github.com/OWASP/common-requirement-enumeration/issues). \ No newline at end of file diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index d0bca16d8..d257389ec 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -17,7 +17,6 @@ from application.utils.external_project_parsers import ( capec_parser, cwe, - ccmv3, ccmv4, cheatsheets_parser, misc_tools_parser, @@ -375,14 +374,6 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover if args.export: cache = db_connect(args.cache_file) cache.export(args.export) - if args.csa_ccm_v3_in: - ccmv3.parse_ccm( - ccmFile=sheet_utils.readSpreadsheet( - alias="", - url="https://docs.google.com/spreadsheets/d/1b5i8OV919aiqW2KcYWOQvkLorL1bRPqjthJxLH0QpD8", - ), - cache=db_connect(args.cache_file), - ) if args.csa_ccm_v4_in: ccmv4.parse_ccm( ccmFile=sheet_utils.readSpreadsheet( @@ -426,6 +417,12 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover generate_embeddings(args.cache_file) if args.owasp_proj_meta: owasp_metadata_to_cre(args.owasp_proj_meta) + if args.populate_neo4j_db: + populate_neo4j_db(args.cache_file) + if args.start_worker: + from application.worker import start_worker + + start_worker(args.cache_file) def db_connect(path: str): @@ -530,3 +527,11 @@ def owasp_metadata_to_cre(meta_file: str): }, """ raise NotImplementedError("someone needs to work on this") + + +def populate_neo4j_db(cache: str): + logger.info(f"Populating neo4j DB: Connecting to SQL DB") + database = db_connect(path=cache) + logger.info(f"Populating neo4j DB: Populating") + database.neo_db.populate_DB(database.session) + logger.info(f"Populating neo4j DB: Complete") diff --git a/application/database/db.py b/application/database/db.py index 1c3c1a3af..74f35f128 100644 --- a/application/database/db.py +++ b/application/database/db.py @@ -1,3 +1,14 @@ +from neomodel import ( + config, + StructuredNode, + StringProperty, + UniqueIdProperty, + Relationship, + RelationshipTo, + ArrayProperty, + StructuredRel, + db, +) from sqlalchemy.orm import aliased import os import logging @@ -5,16 +16,17 @@ from collections import Counter from itertools import permutations from typing import Any, Dict, List, Optional, Tuple, cast - +from itertools import chain import networkx as nx import yaml from application.defs import cre_defs from application.utils import file from flask_sqlalchemy.model import DefaultMeta from sqlalchemy import func -from sqlalchemy.sql.expression import desc # type: ignore import uuid +from application.utils.gap_analysis import get_path_score + from .. import sqla # type: ignore logging.basicConfig() @@ -156,6 +168,350 @@ class Embeddings(BaseModel): # type: ignore ) +class RelatedRel(StructuredRel): + pass + + +class ContainsRel(StructuredRel): + pass + + +class LinkedToRel(StructuredRel): + pass + + +class SameRel(StructuredRel): + pass + + +class NeoDocument(StructuredNode): + document_id = UniqueIdProperty() + name = StringProperty(required=True) + description = StringProperty(required=True) + tags = ArrayProperty(StringProperty()) + doctype = StringProperty(required=True) + related = Relationship("NeoDocument", "RELATED", model=RelatedRel) + + @classmethod + def to_cre_def(self, node): + raise Exception(f"Shouldn't be parsing a NeoDocument") + + +class NeoNode(NeoDocument): + doctype = StringProperty() + version = StringProperty(required=True) + hyperlink = StringProperty() + + @classmethod + def to_cre_def(self, node): + raise Exception(f"Shouldn't be parsing a NeoNode") + + +class NeoStandard(NeoNode): + section = StringProperty() + subsection = StringProperty(required=True) + section_id = StringProperty() + + @classmethod + def to_cre_def(self, node) -> cre_defs.Standard: + return cre_defs.Standard( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + section=node.section, + sectionID=node.section_id, + subsection=node.subsection, + ) + + +class NeoTool(NeoStandard): + tooltype = StringProperty(required=True) + + @classmethod + def to_cre_def(self, node) -> cre_defs.Tool: + return cre_defs.Tool( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + section=node.section, + sectionID=node.section_id, + subsection=node.subsection, + ) + + +class NeoCode(NeoNode): + @classmethod + def to_cre_def(self, node) -> cre_defs.Code: + return cre_defs.Code( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + hyperlink=node.hyperlink, + version=node.version, + ) + + +class NeoCRE(NeoDocument): # type: ignore + external_id = StringProperty() + contains = RelationshipTo("NeoCRE", "CONTAINS", model=ContainsRel) + linked = RelationshipTo("NeoStandard", "LINKED_TO", model=LinkedToRel) + same_as = RelationshipTo("NeoStandard", "SAME", model=SameRel) + + @classmethod + def to_cre_def(self, node) -> cre_defs.CRE: + return cre_defs.CRE( + name=node.name, + id=node.document_id, + description=node.description, + tags=node.tags, + ) + + +class NEO_DB: + __instance = None + + driver = None + connected = False + + @classmethod + def instance(self): + if self.__instance is None: + self.__instance = self.__new__(self) + + config.DATABASE_URL = ( + os.getenv("NEO4J_URL") or "neo4j://neo4j:password@localhost:7687" + ) + return self.__instance + + def __init__(sel): + raise ValueError("NEO_DB is a singleton, please call instance() instead") + + @classmethod + def add_gap_analysis(self, standard1: NeoNode, standard2: NeoNode): + """ + Populates the DB with a precompute of the gap analysis between the two specific standards + """ + + @classmethod + def populate_DB(self, session): + for il in session.query(InternalLinks).all(): + group = session.query(CRE).filter(CRE.id == il.group).first() + if not group: + logger.error(f"CRE {il.group} does not exist?") + self.add_cre(group) + + cre = session.query(CRE).filter(CRE.id == il.cre).first() + if not cre: + logger.error(f"CRE {il.cre} does not exist?") + self.add_cre(cre) + + self.link_CRE_to_CRE(il.group, il.cre, il.type) + + for lnk in session.query(Links).all(): + node = session.query(Node).filter(Node.id == lnk.node).first() + if not node: + logger.error(f"Node {lnk.node} does not exist?") + self.add_dbnode(node) + + cre = session.query(CRE).filter(CRE.id == lnk.cre).first() + self.add_cre(cre) + + self.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type) + + @classmethod + def add_cre(self, dbcre: CRE): + NeoCRE.create_or_update( + { + "name": dbcre.name, + "doctype": "CRE", # dbcre.ntype, + "document_id": dbcre.id, + "description": dbcre.description, + "links": [], # dbcre.links, + "tags": [dbcre.tags] if isinstance(dbcre.tags, str) else dbcre.tags, + } + ) + + @classmethod + def add_dbnode(self, dbnode: Node): + if dbnode.ntype == "Standard": + NeoStandard.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description or "", + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + "section": dbnode.section or "", + "section_id": dbnode.section_id or "", + "subsection": dbnode.subsection or "", + } + ) + return + if dbnode.ntype == "Tool": + NeoTool.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description, + "links": [], # dbnode.links, + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "metadata": "{}", # dbnode.metadata, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + "section": dbnode.section, + "section_id": dbnode.section_id, # dbnode.sectionID, + "subsection": dbnode.subsection or "", + "tooltype": "", # dbnode.tooltype, + } + ) + return + if dbnode.ntype == "Code": + NeoCode.create_or_update( + { + "name": dbnode.name, + "doctype": dbnode.ntype, + "document_id": dbnode.id, + "description": dbnode.description, + "links": [], # dbnode.links, + "tags": [dbnode.tags] + if isinstance(dbnode.tags, str) + else dbnode.tags, + "metadata": "{}", # dbnode.metadata, + "hyperlink": "", # dbnode.hyperlink or "", + "version": dbnode.version or "", + } + ) + return + raise Exception(f"Unknown DB type: {dbnode.ntype}") + + @classmethod + def link_CRE_to_CRE(self, id1, id2, link_type): + cre1 = NeoCRE.nodes.get(document_id=id1) + cre2 = NeoCRE.nodes.get(document_id=id2) + + if link_type == "Contains": + cre1.contains.connect(cre2) + return + if link_type == "Related": + cre1.related.connect(cre2) + return + raise Exception(f"Unknown relation type {link_type}") + + @classmethod + def link_CRE_to_Node(self, CRE_id, node_id, link_type): + cre = NeoCRE.nodes.get(document_id=CRE_id) + node = NeoNode.nodes.get(document_id=node_id) + if link_type == "Linked To": + cre.linked.connect(node) + return + if link_type == "SAME": + cre.same_as.connect(node) + return + raise Exception(f"Unknown relation type {link_type}") + + @classmethod + def gap_analysis(self, name_1, name_2): + base_standard = NeoStandard.nodes.filter(name=name_1) + denylist = ["Cross-cutting concerns"] + from pprint import pprint + from datetime import datetime + + t1 = datetime.now() + path_records_all, _ = db.cypher_query( + """ + OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) + OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) + OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard)) + WITH p + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) + RETURN p + """, + {"name1": name_1, "name2": name_2, "denylist": denylist}, + resolve_objects=True, + ) + t2 = datetime.now() + pprint(f"path records all took {t2-t1}") + pprint(path_records_all.__len__()) + # [ end= size=4>]] + path_records, _ = db.cypher_query( + """ + OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1}) + OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2}) + OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard)) + WITH p + WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist) + RETURN p + """, + {"name1": name_1, "name2": name_2, "denylist": denylist}, + resolve_objects=True, + ) + t3 = datetime.now() + + def format_segment(seg: StructuredRel, nodes): + relation_map = { + RelatedRel: "RELATED", + ContainsRel: "CONTAINS", + LinkedToRel: "LINKED_TO", + SameRel: "SAME", + } + start_node = [ + node for node in nodes if node.element_id == seg._start_node_element_id + ][0] + end_node = [ + node for node in nodes if node.element_id == seg._end_node_element_id + ][0] + + return { + "start": NEO_DB.parse_node(start_node), + "end": NEO_DB.parse_node(end_node), + "relationship": relation_map[type(seg)], + } + + def format_path_record(rec): + return { + "start": NEO_DB.parse_node(rec.start_node), + "end": NEO_DB.parse_node(rec.end_node), + "path": [format_segment(seg, rec.nodes) for seg in rec.relationships], + } + + pprint( + f"path records all took {t2-t1} path records took {t3 - t2}, total: {t3 - t1}" + ) + return [NEO_DB.parse_node(rec) for rec in base_standard], [ + format_path_record(rec[0]) for rec in (path_records + path_records_all) + ] + + @classmethod + def standards(self) -> List[str]: + tools = [] + for x in db.cypher_query("""MATCH (n:NeoTool) RETURN DISTINCT n.name""")[0]: + tools.extend(x) + standards = [] + for x in db.cypher_query("""MATCH (n:NeoStandard) RETURN DISTINCT n.name""")[ + 0 + ]: # 0 is the results, 1 is the "n.name" param + standards.extend(x) + return list(set([x for x in tools] + [x for x in standards])) + + @staticmethod + def parse_node(node: NeoDocument) -> cre_defs.Document: + return node.to_cre_def(node) + + class CRE_Graph: graph: nx.Graph = None __instance = None @@ -189,6 +545,8 @@ def add_cre(cls, dbcre: CRE, graph: nx.DiGraph) -> nx.DiGraph: @classmethod def add_dbnode(cls, dbnode: Node, graph: nx.DiGraph) -> nx.DiGraph: if dbnode: + # coma separated tags + graph.add_node( "Node: " + str(dbnode.id), internal_id=dbnode.id, @@ -231,11 +589,13 @@ def load_cre_graph(cls, session) -> nx.Graph: class Node_collection: graph: nx.Graph = None + neo_db: NEO_DB = None session = sqla.session def __init__(self) -> None: if not os.environ.get("NO_LOAD_GRAPH"): self.graph = CRE_Graph.instance(sqla.session) + self.neo_db = NEO_DB.instance() self.session = sqla.session def __get_external_links(self) -> List[Tuple[CRE, Node, str]]: @@ -1059,30 +1419,8 @@ def find_path_between_nodes( return res - def gap_analysis(self, node_names: List[str]) -> List[cre_defs.Node]: - """Since the CRE structure is a tree-like graph with - leaves being nodes we can find the paths between nodes - find_path_between_nodes() is a graph-path-finding method - """ - processed_nodes = [] - dbnodes: List[Node] = [] - for name in node_names: - dbnodes.extend(self.session.query(Node).filter(Node.name == name).all()) - - for node in dbnodes: - working_node = nodeFromDB(node) - for other_node in dbnodes: - if node.id == other_node.id: - continue - if self.find_path_between_nodes(node.id, other_node.id): - working_node.add_link( - cre_defs.Link( - ltype=cre_defs.LinkTypes.LinkedTo, - document=nodeFromDB(other_node), - ) - ) - processed_nodes.append(working_node) - return processed_nodes + def standards(self) -> List[str]: + return self.neo_db.standards() def text_search(self, text: str) -> List[Optional[cre_defs.Document]]: """Given a piece of text, tries to find the best match @@ -1427,3 +1765,26 @@ def dbCREfromCRE(cre: cre_defs.CRE) -> CRE: external_id=cre.id, tags=",".join(tags), ) + + +def gap_analysis(neo_db, node_names: List[str]): + base_standard, paths = neo_db.gap_analysis(node_names[0], node_names[1]) + if base_standard is None: + return None + grouped_paths = {} + for node in base_standard: + key = node.id + if key not in grouped_paths: + grouped_paths[key] = {"start": node, "paths": {}} + + for path in paths: + key = path["start"].id + end_key = path["end"].id + path["score"] = get_path_score(path) + del path["start"] + if end_key in grouped_paths[key]["paths"]: + if grouped_paths[key]["paths"][end_key]["score"] > path["score"]: + grouped_paths[key]["paths"][end_key] = path + else: + grouped_paths[key]["paths"][end_key] = path + return (node_names, grouped_paths) diff --git a/application/frontend/src/const.ts b/application/frontend/src/const.ts index 231f78447..6f176723f 100644 --- a/application/frontend/src/const.ts +++ b/application/frontend/src/const.ts @@ -36,3 +36,4 @@ export const CRE = '/cre'; export const GRAPH = '/graph'; export const DEEPLINK = '/deeplink'; export const BROWSEROOT = '/root_cres'; +export const GAP_ANALYSIS = '/map_analysis'; diff --git a/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx new file mode 100644 index 000000000..6da81b358 --- /dev/null +++ b/application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx @@ -0,0 +1,337 @@ +import axios from 'axios'; +import React, { useEffect, useRef, useState } from 'react'; +import { useLocation } from 'react-router-dom'; +import { + Accordion, + Button, + Container, + Dropdown, + DropdownItemProps, + Grid, + Icon, + Label, + Popup, + Table, +} from 'semantic-ui-react'; + +import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndicator'; +import { useEnvironment } from '../../hooks'; +import { GapAnalysisPathStart } from '../../types'; +import { getDocumentDisplayName } from '../../utils'; +import { getInternalUrl } from '../../utils/document'; + +const GetSegmentText = (segment, segmentID) => { + let textPart = segment.end; + let nextID = segment.end.id; + let arrow = ; + if (segmentID !== segment.start.id) { + textPart = segment.start; + nextID = segment.start.id; + arrow = ; + } + const text = ( + <> +
+ {arrow}{' '} + + {segment.relationship.replace('_', ' ').toLowerCase()} {segment.score > 0 && <> (+{segment.score})} + +
{getDocumentDisplayName(textPart, true)} {textPart.section ?? ''} {textPart.subsection ?? ''}{' '} + {textPart.description ?? ''} + + ); + return { text, nextID }; +}; + +function useQuery() { + const { search } = useLocation(); + + return React.useMemo(() => new URLSearchParams(search), [search]); +} + +const GetStrength = (score) => { + if (score == 0) return 'Direct'; + if (score <= 2) return 'Strong'; + if (score >= 20) return 'Weak'; + return 'Average'; +}; + +const GetStrengthColor = (score) => { + if (score === 0) return 'darkgreen'; + if (score <= 2) return '#93C54B'; + if (score >= 20) return 'Red'; + return 'Orange'; +}; + +const GetResultLine = (path, gapAnalysis, key) => { + let segmentID = gapAnalysis[key].start.id; + return ( +
+ + {getDocumentDisplayName(path.end, true)} } + > + + {getDocumentDisplayName(gapAnalysis[key].start, true)} + {path.path.map((segment) => { + const { text, nextID } = GetSegmentText(segment, segmentID); + segmentID = nextID; + return text; + })} + + + + ({GetStrength(path.score)}:{path.score}) + + } + > + + Generally: lower is better +
+ {GetStrength(0)}: Directly Linked +
+ {GetStrength(2)}: Closely connected likely to have + majority overlap +
+ {GetStrength(6)}: Connected likely to have partial + overlap +
+ {GetStrength(22)}: Weakly connected likely to have + small or no overlap +
+
+
+
+ ); +}; + +export const GapAnalysis = () => { + const standardOptionsDefault = [{ key: '', text: '', value: undefined }]; + const searchParams = useQuery(); + const [standardOptions, setStandardOptions] = useState( + standardOptionsDefault + ); + const [BaseStandard, setBaseStandard] = useState(searchParams.get('base') ?? ''); + const [CompareStandard, setCompareStandard] = useState( + searchParams.get('compare') ?? '' + ); + const [gaJob, setgaJob] = useState(""); + const [gapAnalysis, setGapAnalysis] = useState>(); + const [activeIndex, SetActiveIndex] = useState(); + const [loadingStandards, setLoadingStandards] = useState(false); + const [loadingGA, setLoadingGA] = useState(false); + const [error, setError] = useState(null); + const { apiUrl } = useEnvironment(); + const timerIdRef = useRef(); + + const GetStrongPathsCount = (paths) => + Math.max( + Object.values(paths).filter( + (x) => GetStrength(x.score) === 'Strong' || GetStrength(x.score) === 'Direct' + ).length, + 3 + ); + + useEffect(() => { + const fetchData = async () => { + const result = await axios.get(`${apiUrl}/standards`); + setLoadingStandards(false); + setStandardOptions( + standardOptionsDefault.concat(result.data.sort().map((x) => ({ key: x, text: x, value: x }))) + ); + }; + + setLoadingStandards(true); + fetchData().catch((e) => { + setLoadingStandards(false); + setError(e.response.data.message ?? e.message); + }); + }, [setStandardOptions, setLoadingStandards, setError]); + + useEffect(() => { + console.log("gajob changed, polling") + const pollingCallback = () => { + const fetchData = async () => { + const result = await axios.get( + `${apiUrl}/ma_job_results?id=` + gaJob, + { + headers: { + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache', + 'Expires': '0', + } + }, + ); + if (result.data.result) { + setLoadingGA(false); + setGapAnalysis(result.data.result); + setgaJob("") + } + } + if (!gaJob) return; + fetchData().catch((e) => { + setLoadingGA(false); + setError(e.response.data.message ?? e.message); + }); + } + + const startPolling = () => { + // Polling every 10 seconds + timerIdRef.current = setInterval(pollingCallback, 10000); + }; + const stopPolling = () => { + clearInterval(timerIdRef.current); + }; + + if (gaJob) { + console.log("started polling") + startPolling(); + } else { + console.log("stoped polling") + stopPolling(); + } + + return () => { + stopPolling(); + }; + }, [gaJob]) + + useEffect(() => { + const fetchData = async () => { + const result = await axios.get( + `${apiUrl}/map_analysis?standard=${BaseStandard}&standard=${CompareStandard}` + ); + if (result.data.result) { + setLoadingGA(false); + setGapAnalysis(result.data.result); + } else if (result.data.job_id) { + setgaJob(result.data.job_id) + } + }; + + if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return; + setGapAnalysis(undefined); + setLoadingGA(true); + fetchData().catch((e) => { + setLoadingGA(false); + setError(e.response.data.message ?? e.message); + }); + }, [BaseStandard, CompareStandard, setGapAnalysis, setLoadingGA, setError]); + + const handleAccordionClick = (e, titleProps) => { + const { index } = titleProps; + const newIndex = activeIndex === index ? -1 : index; + SetActiveIndex(newIndex); + }; + + return ( +
+ + + + + {' '} + Base:{' '} + setBaseStandard(value?.toString())} + value={BaseStandard} + /> + + + Compare:{' '} + setCompareStandard(value?.toString())} + value={CompareStandard} + /> + {gapAnalysis && ( +
+ +
+ )} +
+
+
+ + + {gapAnalysis && ( + <> + {Object.keys(gapAnalysis) + .sort((a, b) => + getDocumentDisplayName(gapAnalysis[a].start, true).localeCompare( + getDocumentDisplayName(gapAnalysis[b].start, true) + ) + ) + .map((key) => ( + + + +

+ {getDocumentDisplayName(gapAnalysis[key].start, true)} +

+
+
+ + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice(0, GetStrongPathsCount(gapAnalysis[key].paths)) + .map((path) => GetResultLine(path, gapAnalysis, key))} + {Object.keys(gapAnalysis[key].paths).length > 3 && ( + + + + + + {Object.values(gapAnalysis[key].paths) + .sort((a, b) => a.score - b.score) + .slice( + GetStrongPathsCount(gapAnalysis[key].paths), + Object.keys(gapAnalysis[key].paths).length + ) + .map((path) => GetResultLine(path, gapAnalysis, key))} + + + )} + {Object.keys(gapAnalysis[key].paths).length === 0 && No links Found} + +
+ ))} + + )} +
+
+
+ ); +}; diff --git a/application/frontend/src/routes.tsx b/application/frontend/src/routes.tsx index 876462503..548c2d7a3 100644 --- a/application/frontend/src/routes.tsx +++ b/application/frontend/src/routes.tsx @@ -1,10 +1,22 @@ import { ReactNode } from 'react'; -import { BROWSEROOT, CRE, DEEPLINK, GRAPH, INDEX, SEARCH, SECTION, SECTION_ID, STANDARD } from './const'; +import { + BROWSEROOT, + CRE, + DEEPLINK, + GAP_ANALYSIS, + GRAPH, + INDEX, + SEARCH, + SECTION, + SECTION_ID, + STANDARD, +} from './const'; import { CommonRequirementEnumeration, Graph, Search, Standard } from './pages'; import { BrowseRootCres } from './pages/BrowseRootCres/browseRootCres'; import { Chatbot } from './pages/chatbot/chatbot'; import { Deeplink } from './pages/Deeplink/Deeplink'; +import { GapAnalysis } from './pages/GapAnalysis/GapAnalysis'; import { MembershipRequired } from './pages/MembershipRequired/MembershipRequired'; import { SearchName } from './pages/Search/SearchName'; import { StandardSection } from './pages/Standard/StandardSection'; @@ -23,6 +35,12 @@ export const ROUTES: IRoute[] = [ showFilter: false, showHeader: false, }, + { + path: GAP_ANALYSIS, + component: GapAnalysis, + showHeader: true, + showFilter: false, + }, { path: `/node${STANDARD}/:id${SECTION}/:section`, component: StandardSection, diff --git a/application/frontend/src/scaffolding/Header/Header.tsx b/application/frontend/src/scaffolding/Header/Header.tsx index aa872fb43..f1d09813c 100644 --- a/application/frontend/src/scaffolding/Header/Header.tsx +++ b/application/frontend/src/scaffolding/Header/Header.tsx @@ -13,6 +13,10 @@ const getLinks = (): { to: string; name: string }[] => [ to: `/`, name: 'Open CRE', }, + { + to: `/map_analysis`, + name: 'Map analysis', + }, ]; export const Header = () => { diff --git a/application/frontend/src/scaffolding/Header/header.scss b/application/frontend/src/scaffolding/Header/header.scss index e01e85568..faec51d53 100644 --- a/application/frontend/src/scaffolding/Header/header.scss +++ b/application/frontend/src/scaffolding/Header/header.scss @@ -20,6 +20,7 @@ padding-top: 10px; padding-bottom: 10px; text-align: center; + margin: 0 2px; .item { color: white !important; diff --git a/application/frontend/src/types.ts b/application/frontend/src/types.ts index c8b7cec72..60760f837 100644 --- a/application/frontend/src/types.ts +++ b/application/frontend/src/types.ts @@ -19,3 +19,20 @@ export interface LinkedDocument { document: Document; ltype: string; } + +interface GapAnalysisPathSegment { + start: Document; + end: Document; + relationship: string; + score: number; +} + +interface GapAnalysisPath { + end: Document; + path: GapAnalysisPathSegment[]; +} + +export interface GapAnalysisPathStart { + start: Document; + paths: Record; +} diff --git a/application/frontend/src/utils/document.ts b/application/frontend/src/utils/document.ts index 07b5a784d..efafd4c2e 100644 --- a/application/frontend/src/utils/document.ts +++ b/application/frontend/src/utils/document.ts @@ -7,14 +7,14 @@ import { } from '../const'; import { Document, LinkedDocument } from '../types'; -export const getDocumentDisplayName = (document: Document) => { +export const getDocumentDisplayName = (document: Document, noID = false) => { // [document.doctype, document.id, document.name, document.section, document.subsection].filter(Boolean).join(' - '); // format: Standard - ASVS - V1.1 if (!document) { return ''; } return [ document.doctype, - document.id, + noID ? '' : document.id, document.name, document.version, document.sectionID, @@ -47,7 +47,7 @@ export const groupBy = (list: T[], getKey: (item: T) => return previous; }, {} as Record); -export const getInternalUrl = (doc: Document): String => { +export const getInternalUrl = (doc: Document): string => { if (doc.doctype.toLowerCase() != 'cre') { var standardAPIPath = `/node/${doc.doctype.toLowerCase()}/${doc.name}/`; if (doc) { diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index 0c5a9bcb1..a543a00c0 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -40,7 +40,7 @@ class Repo: repo_path="https://github.com/foo/bar.git", ) expected = defs.Standard( - name="Cheat_sheets", + name="OWASP Cheat Sheets", hyperlink="https://github.com/foo/bar/tree/master/cs.md", section="Secrets Management Cheat Sheet", ) diff --git a/application/tests/db_test.py b/application/tests/db_test.py index 936e240c7..6ab6402c9 100644 --- a/application/tests/db_test.py +++ b/application/tests/db_test.py @@ -3,11 +3,13 @@ import os import tempfile import unittest +from unittest.mock import patch import uuid from copy import copy, deepcopy from pprint import pprint from pydoc import doc from typing import Any, Dict, List, Union +import neo4j import yaml from application import create_app, sqla # type: ignore @@ -763,153 +765,6 @@ def test_get_nodes_with_pagination(self) -> None: (None, None, None), ) - def test_gap_analysis(self) -> None: - """Given - the following standards SA1, SA2, SA3 SAA1 , SB1, SD1, SDD1, SW1, SX1 - the following CREs CA, CB, CC, CD, CDD , CW, CX - the following links - CC -> CA, CB,CD - CD -> CDD - CA-> SA1, SAA1 - CB -> SB1 - CD -> SD1 - CDD -> SDD1 - CW -> SW1 - CX -> SA3, SX1 - NoCRE -> SA2 - - Then: - gap_analysis(SA) returns SA1, SA2, SA3 - gap_analysis(SA,SAA) returns SA1 <-> SAA1, SA2, SA3 - gap_analysis(SA,SDD) returns SA1 <-> SDD1, SA2, SA3 - gap_analysis(SA, SW) returns SA1,SA2,SA3, SW1 # no connection - gap_analysis(SA, SB, SD, SW) returns SA1 <->(SB1,SD1), SA2 , SW1, SA3 - gap_analysis(SA, SX) returns SA1, SA2, SA3->SX1 - - give me a single standard - give me two standards connected by same cre - give me two standards connected by cres who are children of the same cre - give me two standards connected by completely different cres - give me two standards with sections on different trees. - - give me two standards without connections - give me 3 or more standards - - """ - - collection = db.Node_collection() - collection.graph.graph = db.CRE_Graph.load_cre_graph(sqla.session) - - cres = { - "dbca": collection.add_cre(defs.CRE(id="1", description="CA", name="CA")), - "dbcb": collection.add_cre(defs.CRE(id="2", description="CB", name="CB")), - "dbcc": collection.add_cre(defs.CRE(id="3", description="CC", name="CC")), - "dbcd": collection.add_cre(defs.CRE(id="4", description="CD", name="CD")), - "dbcdd": collection.add_cre( - defs.CRE(id="5", description="CDD", name="CDD") - ), - "dbcw": collection.add_cre(defs.CRE(id="6", description="CW", name="CW")), - "dbcx": collection.add_cre(defs.CRE(id="7", description="CX", name="CX")), - } - def_standards = { - "sa1": defs.Standard(name="SA", section="SA1"), - "sa2": defs.Standard(name="SA", section="SA2"), - "sa3": defs.Standard(name="SA", section="SA3"), - "saa1": defs.Standard(name="SAA", section="SAA1"), - "sb1": defs.Standard(name="SB", section="SB1"), - "sd1": defs.Standard(name="SD", section="SD1"), - "sdd1": defs.Standard(name="SDD", section="SDD1"), - "sw1": defs.Standard(name="SW", section="SW1"), - "sx1": defs.Standard(name="SX", section="SX1"), - } - standards = {} - for k, s in def_standards.items(): - standards["db" + k] = collection.add_node(s) - ltype = defs.LinkTypes.LinkedTo - collection.add_link(cre=cres["dbca"], node=standards["dbsa1"]) - collection.add_link(cre=cres["dbca"], node=standards["dbsaa1"]) - collection.add_link(cre=cres["dbcb"], node=standards["dbsb1"]) - collection.add_link(cre=cres["dbcd"], node=standards["dbsd1"]) - collection.add_link(cre=cres["dbcdd"], node=standards["dbsdd1"]) - collection.add_link(cre=cres["dbcw"], node=standards["dbsw1"]) - collection.add_link(cre=cres["dbcx"], node=standards["dbsa3"]) - collection.add_link(cre=cres["dbcx"], node=standards["dbsx1"]) - - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbca"]) - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbcb"]) - collection.add_internal_link(group=cres["dbcc"], cre=cres["dbcd"]) - collection.add_internal_link(group=cres["dbcd"], cre=cres["dbcdd"]) - - expected = { - "SA": [def_standards["sa1"], def_standards["sa2"], def_standards["sa3"]], - "SA,SAA": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["saa1"]) - ), - copy(def_standards["saa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SAA,SA": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["saa1"]) - ), - copy(def_standards["saa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SA,SDD": [ - copy(def_standards["sa1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sdd1"]) - ), - copy(def_standards["sdd1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa1"]) - ), - def_standards["sa2"], - def_standards["sa3"], - ], - "SA,SW": [ - def_standards["sa1"], - def_standards["sa2"], - def_standards["sa3"], - def_standards["sw1"], - ], - "SA,SB,SD,SW": [ - copy(def_standards["sa1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sb1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sd1"])), - copy(def_standards["sb1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sa1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sd1"])), - copy(def_standards["sd1"]) - .add_link(defs.Link(ltype=ltype, document=def_standards["sa1"])) - .add_link(defs.Link(ltype=ltype, document=def_standards["sb1"])), - def_standards["sa2"], - def_standards["sa3"], - def_standards["sw1"], - ], - "SA,SX": [ - def_standards["sa1"], - def_standards["sa2"], - copy(def_standards["sa3"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sx1"]) - ), - copy(def_standards["sx1"]).add_link( - defs.Link(ltype=ltype, document=def_standards["sa3"]) - ), - ], - } - - self.maxDiff = None - for args, expected_vals in expected.items(): - stands = args.split(",") - res = collection.gap_analysis(stands) - self.assertCountEqual(res, expected_vals) - def test_add_internal_link(self) -> None: """test that internal links are added successfully, edge cases: @@ -1285,6 +1140,330 @@ def test_get_root_cres(self): self.maxDiff = None self.assertEqual(root_cres, [cres[0], cres[1], cres[7]]) + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_disconnected(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = False + gap_mock.return_value = (None, None) + + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), None) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_no_nodes(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + + gap_mock.return_value = ([], []) + self.assertEqual( + db.gap_analysis(collection.neo_db, ["a", "b"]), (["a", "b"], {}) + ) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_no_links(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + + gap_mock.return_value = ([defs.CRE(name="bob", id=1)], []) + self.assertEqual( + db.gap_analysis(collection.neo_db, ["a", "b"]), + (["a", "b"], {1: {"start": defs.CRE(name="bob", id=1), "paths": {}}}), + ) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_one_link(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + } + ], + ) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + } + }, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_duplicate_link_path_existing_lower(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + ] + path2 = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id="a"), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + }, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path2, + }, + ], + ) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + } + }, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + + @patch.object(db.NEO_DB, "gap_analysis") + def test_gap_analysis_duplicate_link_path_existing_higher(self, gap_mock): + collection = db.Node_collection() + collection.neo_db.connected = True + path = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + ] + path2 = [ + { + "end": defs.CRE(name="bob", id=1), + "relationship": "LINKED_TO", + "start": defs.CRE(name="bob", id="a"), + }, + { + "end": defs.CRE(name="bob", id=2), + "relationship": "RELATED", + "start": defs.CRE(name="bob", id="a"), + }, + ] + gap_mock.return_value = ( + [defs.CRE(name="bob", id=1)], + [ + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path2, + }, + { + "start": defs.CRE(name="bob", id=1), + "end": defs.CRE(name="bob", id=2), + "path": path, + }, + ], + ) + expected = ( + ["a", "b"], + { + 1: { + "start": defs.CRE(name="bob", id=1), + "paths": { + 2: {"end": defs.CRE(name="bob", id=2), "path": path, "score": 0} + }, + } + }, + ) + self.assertEqual(db.gap_analysis(collection.neo_db, ["a", "b"]), expected) + + def test_neo_db_parse_node_code(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + version = "version" + hyperlink = "version" + expected = defs.Code( + name=name, + id=id, + description=description, + tags=tags, + version=version, + hyperlink=hyperlink, + ) + graph_node = db.NeoCode( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + hyperlink=hyperlink, + ) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) + + def test_neo_db_parse_node_standard(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + version = "version" + section = "section" + sectionID = "sectionID" + subsection = "subsection" + hyperlink = "version" + expected = defs.Standard( + name=name, + id=id, + description=description, + tags=tags, + version=version, + section=section, + sectionID=sectionID, + subsection=subsection, + hyperlink=hyperlink, + ) + graph_node = db.NeoStandard( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + section=section, + section_id=sectionID, + subsection=subsection, + hyperlink=hyperlink, + ) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) + + def test_neo_db_parse_node_tool(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + version = "version" + section = "section" + sectionID = "sectionID" + subsection = "subsection" + hyperlink = "version" + expected = defs.Tool( + name=name, + id=id, + description=description, + tags=tags, + version=version, + section=section, + sectionID=sectionID, + subsection=subsection, + hyperlink=hyperlink, + ) + graph_node = db.NeoTool( + name=name, + document_id=id, + description=description, + tags=tags, + version=version, + section=section, + section_id=sectionID, + subsection=subsection, + hyperlink=hyperlink, + ) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) + + def test_neo_db_parse_node_cre(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + expected = defs.CRE( + name=name, + id=id, + description=description, + tags=tags, + ) + graph_node = db.NeoCRE( + name=name, + document_id=id, + description=description, + tags=tags, + ) + self.assertEqual(db.NEO_DB.parse_node(graph_node), expected) + + def test_neo_db_parse_node_Document(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + graph_node = db.NeoDocument( + name=name, + document_id=id, + description=description, + tags=tags, + ) + with self.assertRaises(Exception) as cm: + db.NEO_DB.parse_node(graph_node) + + self.assertEqual(str(cm.exception), "Shouldn't be parsing a NeoDocument") + + def test_neo_db_parse_node_Node(self): + name = "name" + id = "id" + description = "description" + tags = "tags" + graph_node = db.NeoNode( + name=name, + document_id=id, + description=description, + tags=tags, + ) + with self.assertRaises(Exception) as cm: + db.NEO_DB.parse_node(graph_node) + + self.assertEqual(str(cm.exception), "Shouldn't be parsing a NeoNode") + def test_get_embeddings_by_doc_type_paginated(self): """Given: a range of embedding for Nodes and a range of embeddings for CREs when called with doc_type CRE return the cre embeddings diff --git a/application/tests/gap_analysis_test.py b/application/tests/gap_analysis_test.py new file mode 100644 index 000000000..52e812a3f --- /dev/null +++ b/application/tests/gap_analysis_test.py @@ -0,0 +1,233 @@ +import unittest +from application.defs import cre_defs + +from application.utils.gap_analysis import ( + get_path_score, + get_relation_direction, + get_next_id, + PENALTIES, +) + + +class TestGapAnalysis(unittest.TestCase): + def tearDown(self) -> None: + return None + + def setUp(self) -> None: + return None + + def test_get_relation_direction_UP(self): + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } + self.assertEqual(get_relation_direction(step, "123"), "UP") + + def test_get_relation_direction_DOWN(self): + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } + self.assertEqual(get_relation_direction(step, "234"), "DOWN") + + def test_get_next_id_start(self): + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } + self.assertEqual(get_next_id(step, "234"), "123") + + def test_get_next_id_end(self): + step = { + "start": cre_defs.CRE(name="bob", id="123"), + "end": cre_defs.CRE(name="bob", id="234"), + } + self.assertEqual(get_next_id(step, "123"), "234") + + def test_get_path_score_direct_siblings_returns_zero(self): + path = { + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), + "path": [ + { + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + }, + { + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE( + name="bob", id="e2ac59b2-c1d8-4525-a6b3-155d480aecc9" + ), + }, + ], + } + self.assertEqual(get_path_score(path), 0) + + def test_get_path_score_one_up_returns_one_up_penaltiy(self): + path = { + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), + "path": [ + { + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + }, + { + "end": cre_defs.CRE(name="bob", id="123"), + "relationship": "CONTAINS", + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + }, + { + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE(name="bob", id="123"), + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["CONTAINS_UP"]) + self.assertEqual(path["path"][1]["score"], PENALTIES["CONTAINS_UP"]) + + def test_get_path_score_one_down_one_returns_one_down_penaltiy(self): + path = { + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), + "path": [ + { + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + }, + { + "end": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + "relationship": "CONTAINS", + "start": cre_defs.CRE(name="bob", id="123"), + }, + { + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE(name="bob", id="123"), + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["CONTAINS_DOWN"]) + self.assertEqual(path["path"][1]["score"], PENALTIES["CONTAINS_DOWN"]) + + def test_get_path_score_related_returns_related_penalty(self): + path = { + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), + "path": [ + { + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + }, + { + "end": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + "relationship": "RELATED", + "start": cre_defs.CRE(name="bob", id="123"), + }, + { + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE(name="bob", id="123"), + }, + ], + } + self.assertEqual(get_path_score(path), PENALTIES["RELATED"]) + self.assertEqual(path["path"][1]["score"], PENALTIES["RELATED"]) + + def test_get_path_score_one_of_each_returns_penalty(self): + path = { + "start": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "end": cre_defs.CRE(name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d"), + "path": [ + { + "end": cre_defs.CRE( + name="bob", id="029f7cd7-ef2f-4f25-b0d2-3227cde4b34b" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + }, + { + "end": cre_defs.CRE( + name="bob", id="07bc9f6f-5387-4dc6-b277-0022ed76049f" + ), + "relationship": "CONTAINS", + "start": cre_defs.CRE(name="bob", id="123"), + }, + { + "end": cre_defs.CRE(name="bob", id="456"), + "relationship": "RELATED", + "start": cre_defs.CRE(name="bob", id="123"), + }, + { + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), + "relationship": "CONTAINS", + "start": cre_defs.CRE(name="bob", id="456"), + }, + { + "end": cre_defs.CRE( + name="bob", id="7d030730-14cc-4c43-8927-f2d0f5fbcf5d" + ), + "relationship": "LINKED_TO", + "start": cre_defs.CRE(name="bob", id="456"), + }, + ], + } + self.assertEqual( + get_path_score(path), + PENALTIES["RELATED"] + + PENALTIES["CONTAINS_UP"] + + PENALTIES["CONTAINS_DOWN"], + ) + self.assertEqual(path["path"][1]["score"], PENALTIES["CONTAINS_DOWN"]) + self.assertEqual(path["path"][2]["score"], PENALTIES["RELATED"]) + self.assertEqual(path["path"][3]["score"], PENALTIES["CONTAINS_UP"]) diff --git a/application/utils/external_project_parsers/ccmv3.py b/application/utils/external_project_parsers/ccmv3.py deleted file mode 100644 index c4838f9ed..000000000 --- a/application/utils/external_project_parsers/ccmv3.py +++ /dev/null @@ -1,69 +0,0 @@ -import logging -import os -from pprint import pprint -from typing import Dict, Any -from application.database import db -from application.defs import cre_defs as defs - -from application.database.db import dbCREfromCRE -import re - -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -def make_nist_map(cache: db.Node_collection): - nist_map = {} - re_id = re.compile("(?P\w+-\d+)") - - nist = cache.get_nodes(name="NIST 800-53 v5") - if not nist: - logger.fatal("This CRE DB does not contain NIST, this is fatal") - return - - for nst in nist: - ri = re_id.search(nst.section) - if ri: - nist_map[ri.group("id")] = nst - return nist_map - - -def parse_ccm(ccmFile: Dict[str, Any], cache: db.Node_collection): - nist_map = make_nist_map(cache) - - for ccm_mapping in ccmFile.get("0. ccmv3"): - # cre: defs.CRE - # linked_standard: defs.Standard - if "CCM V3.0 Control ID" not in ccm_mapping: - logger.error("string 'CCM V3.0 Control ID' was not found in mapping line") - continue - - ccm = defs.Standard( - name="Cloud Controls Matrix v3.0", - section=ccm_mapping.pop("CCM V3.0 Control ID"), - subsection="", - version="v3", - hyperlink="", - ) - dbccm = cache.add_node(ccm) - logger.debug(f"Registered CCM with id {ccm.section}") - - if ccm_mapping.get("NIST SP800-53 R3"): - nist_links = ccm_mapping.pop("NIST SP800-53 R3").split("\n") - - for nl in nist_links: - if nl.strip() not in nist_map.keys(): - logger.error(f"could not find NIST '{nl}' in the database") - continue - relevant_cres = [ - el.document - for el in nist_map.get(nl.strip()).links - if el.document.doctype == defs.Credoctypes.CRE - ] - - for c in relevant_cres: - cache.add_link(cre=dbCREfromCRE(cre=c), node=dbccm) - logger.debug( - f"Added link between CRE {c.id} and CCM v3.0 {dbccm.section}" - ) diff --git a/application/utils/external_project_parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/cheatsheets_parser.py index 383b39f4b..52678be36 100644 --- a/application/utils/external_project_parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/cheatsheets_parser.py @@ -9,7 +9,7 @@ def cheatsheet(section: str, hyperlink: str, tags: List[str]) -> defs.Standard: return defs.Standard( - name=f"Cheat_sheets", + name=f"OWASP Cheat Sheets", section=section, tags=tags, hyperlink=hyperlink, diff --git a/application/utils/gap_analysis.py b/application/utils/gap_analysis.py new file mode 100644 index 000000000..f5f49902a --- /dev/null +++ b/application/utils/gap_analysis.py @@ -0,0 +1,34 @@ +PENALTIES = { + "RELATED": 2, + "CONTAINS_UP": 2, + "CONTAINS_DOWN": 1, + "LINKED_TO": 0, + "SAME": 0, +} + + +def get_path_score(path): + score = 0 + previous_id = path["start"].id + for step in path["path"]: + penalty_type = step["relationship"] + + if step["relationship"] == "CONTAINS": + penalty_type = f"CONTAINS_{get_relation_direction(step, previous_id)}" + pentalty = PENALTIES[penalty_type] + score += pentalty + step["score"] = pentalty + previous_id = get_next_id(step, previous_id) + return score + + +def get_relation_direction(step, previous_id): + if step["start"].id == previous_id: + return "UP" + return "DOWN" + + +def get_next_id(step, previous_id): + if step["start"].id == previous_id: + return step["end"].id + return step["start"].id diff --git a/application/web/web_main.py b/application/web/web_main.py index a3671797d..a69f9c90d 100644 --- a/application/web/web_main.py +++ b/application/web/web_main.py @@ -2,12 +2,15 @@ # silence mypy for the routes file from functools import wraps import json +import hashlib import logging import os import pathlib import urllib.parse from typing import Any from application.utils import oscal_utils +import redis +from rq import Worker, Queue, Connection, job, exceptions from application import cache from application.database import db @@ -17,6 +20,7 @@ from application.utils import mdutils, redirectors from application.prompt_client import prompt_client as prompt_client from enum import Enum +from flask import json as flask_json from flask import ( Blueprint, abort, @@ -65,6 +69,18 @@ def extend_cre_with_tag_links( return cre +def neo4j_not_running_rejection(): + logger.info("Neo4j is disabled") + return ( + jsonify( + { + "message": "Backend services connected to this feature are not running at the moment." + } + ), + 500, + ) + + @app.route("/rest/v1/id/", methods=["GET"]) @app.route("/rest/v1/name/", methods=["GET"]) @cache.cached(timeout=50) @@ -203,15 +219,98 @@ def find_document_by_tag() -> Any: abort(404) -@app.route("/rest/v1/gap_analysis", methods=["GET"]) -@cache.cached(timeout=50) -def gap_analysis() -> Any: # TODO (spyros): add export result to spreadsheet +def make_standards_hash(standards: list): + return hashlib.md5(":".join(standards).encode("utf-8")).hexdigest() + + +@app.route("/rest/v1/map_analysis", methods=["GET"]) +@cache.cached(timeout=50, query_string=True) +def gap_analysis() -> Any: database = db.Node_collection() standards = request.args.getlist("standard") - documents = database.gap_analysis(standards=standards) - if documents: - res = [doc.todict() for doc in documents] - return jsonify(res) + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + standards_hash = make_standards_hash(standards) + if conn.exists(standards_hash): + ga = conn.get(standards_hash) + if ga: + ga_result = conn.get(standards_hash) + ga_obj = json.loads(ga_result) + return jsonify({"result":ga_obj}) + q = Queue(connection=conn) + ga_job = q.enqueue_call(db.gap_analysis, [database.neo_db, standards]) + + conn.set(standards_hash, "") + return jsonify({"job_id": ga_job.id}) + + +@app.route("/rest/v1/ma_job_results", methods=["GET"]) +def fetch_job() -> Any: + logger.info("fetching job results") + jobid = request.args.get("id") + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + try: + res = job.Job.fetch(id=jobid, connection=conn) + except exceptions.NoSuchJobError as nje: + abort(404, "No such job") + + logger.info("job exists") + if res.get_status() == job.JobStatus.FAILED: + abort(500, "background job failed") + elif res.get_status() == job.JobStatus.STOPPED: + abort(500, "background job stopped") + elif res.get_status() == job.JobStatus.CANCELED: + abort(500, "background job canceled") + elif res.get_status() == job.JobStatus.STARTED or res.get_status() == job.JobStatus.QUEUED: + logger.info("but hasn't finished") + return jsonify({"status": res.get_status()}) + + result = res.latest_result() + logger.info("and has finished") + + if res.latest_result().type == result.Type.SUCCESSFUL: + ga_result = result.return_value + logger.info("and has results") + + if len(ga_result) == 2: + standards = ga_result[0] + standards_hash = make_standards_hash(standards=standards) + + if conn.exists(standards_hash): + logger.info("and hash is already in cache") + ga = conn.get(standards_hash) + if ga != "": + logger.info("and results already in cache") + + logger.warning( + f"there was already a gap analysis for standards {standards}, this could be a bug" + ) + return jsonify({"result":ga_result[1]}) + elif res.latest_result().type == result.Type.FAILED: + logger.error(res.latest_result().exc_string) + abort(500) + else: + logger.warning(f"job stopped? {res.latest_result().type}") + abort(500) + return jsonify({}) + + +@app.route("/rest/v1/standards", methods=["GET"]) +@cache.cached(timeout=50) +def standards() -> Any: + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + standards = conn.get("NodeNames") + if standards: + return standards + else: + database = db.Node_collection() + standards = database.standards() + if standards is None: + neo4j_not_running_rejection() + conn.set("NodeNames", flask_json.dumps(standards)) + return standards @app.route("/rest/v1/text_search", methods=["GET"]) diff --git a/application/worker.py b/application/worker.py new file mode 100644 index 000000000..2fbdf1f64 --- /dev/null +++ b/application/worker.py @@ -0,0 +1,22 @@ +import os +import redis +from rq import Worker, Queue, Connection +from application.database import db +import logging +from application.cmd.cre_main import db_connect + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +listen = ["high", "default", "low"] + + +def start_worker(cache: str): + redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") + conn = redis.from_url(redis_url) + logger.info(f"Worker Starting") + database = db_connect(path=cache) + with Connection(conn): + worker = Worker(map(Queue, listen)) + worker.work() diff --git a/cre.py b/cre.py index 0feb76a62..ac072821f 100644 --- a/cre.py +++ b/cre.py @@ -187,7 +187,16 @@ def main() -> None: action="store_true", help="for every node, download the text pointed to by the hyperlink and generate embeddings for the content of the specific node", ) - + parser.add_argument( + "--populate_neo4j_db", + action="store_true", + help="populate the neo4j db", + ) + parser.add_argument( + "--start_worker", + action="store_true", + help="start redis queue worker", + ) args = parser.parse_args() from application.cmd import cre_main diff --git a/requirements.txt b/requirements.txt index 7eb93e3db..3927281eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,12 @@ compliance-trestle coverage dacite docx==0.2.4 +Flask==2.3.2 Flask_Caching==2.0.2 flask_compress==1.13 Flask_Cors==4.0.0 Flask_Migrate==4.0.4 -Flask-SQLAlchemy==3.0.5 +Flask-SQLAlchemy gitpython google-api-core google_auth_oauthlib @@ -23,11 +24,91 @@ playwright psycopg2-binary pygithub python_markdown_maker==1.0 -scikit_learn==1.3.0 +scikit_learn scipy==1.11.2 semver -setuptools==66.1.1 +setuptools==68.2.2 simplify_docx==0.1.2 -SQLAlchemy==2.0.20 +SQLAlchemy +compliance-trestle +nose==1.3.7 +mypy +numpy==1.23.0 +neo4j +neomodel==5.1.2 +openapi-schema-validator==0.3.4 +openapi-spec-validator==0.5.1 +openpyxl==3.1.0 +orderedmultidict==1.0.1 +orjson==3.8.5 +packaging +paramiko==3.0.0 +pathable==0.4.3 +pathspec==0.9.0 +pbr==5.8.0 +pep517==0.8.2 +Pillow +pip-autoremove==0.9.1 +platformdirs==2.2.0 +playwright==1.33.0 +pluggy==1.0.0 +prance +prompt-toolkit==3.0.19 +proto-plus==1.22.2 +protobuf==4.23.1 +psycopg2-binary +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycodestyle==2.7.0 +pycparser==2.21 +pydantic==1.10.4 +pyee==9.0.4 +pyflakes==2.3.1 +PyGithub==1.53 +PyJWT==1.7.1 +PyNaCl==1.5.0 +pyparsing==2.4.6 +pyrsistent==0.17.3 +PySnooper==1.1.1 +pytest==7.3.1 +pytest-base-url==2.0.0 +pytest-playwright==0.3.3 +python-dateutil==2.8.1 +python-docx==0.8.11 +python-dotenv==0.21.1 +python-frontmatter==1.0.0 +python-markdown-maker==1.0 +python-slugify==8.0.1 +PyYAML==5.3.1 +regex==2021.11.10 +requests==2.27.1 +requests-oauthlib==1.3.1 +rfc3986==1.5.0 +rsa==4.7 +rq==1.15.1 +redis==5.0.1 +ruamel.yaml==0.17.21 +ruamel.yaml.clib==0.2.7 +scikit-learn==1.2.2 +Shapely==1.8.5.post1 +simplify-docx==0.1.2 +six +smmap==3.0.4 +sniffio==1.3.0 +soupsieve==2.4.1 +SQLAlchemy==1.3.23 +sqlalchemy-stubs==0.4 +testresources==2.0.1 +text-unidecode==1.3 +threadpoolctl==3.1.0 +toml==0.10.2 +tomli==1.2.2 +tqdm==4.65.0 +typed-ast==1.5.4 +types-PyYAML==5.4.8 +typing-inspect==0.7.1 +typing_extensions==4.4.0 +untangle==1.1.1 +urllib3==1.26.8 vertexai==0.0.1 xmltodict==0.13.0