Skip to content

Commit

Permalink
Precompute map analysis (#433)
Browse files Browse the repository at this point in the history
* add time metrics

* progress

* Revert "Revert "Use allShortestPaths over shortestPath (#431)""

This reverts commit 167828c.

* map analysis through redis

* new reqs

* add worker actually

* abort if background job fails

* lint

* minor cleanup

* move hash to utils and make db method write to redis optionally

* make ga job results shared among clients

* lint
  • Loading branch information
northdpole authored Oct 23, 2023
1 parent 167828c commit f508bca
Show file tree
Hide file tree
Showing 15 changed files with 338 additions and 168 deletions.
32 changes: 24 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,23 +1,38 @@

.ONESHELL:

.PHONY: dev-run run test covers install-deps dev docker lint frontend clean all
.PHONY: run test covers install-deps dev docker lint frontend clean all

prod-run:
cp cres/db.sqlite standards_cache.sqlite; gunicorn cre:app --log-file=-

dev-run:
. ./venv/bin/activate && FLASK_APP=cre.py FLASK_CONFIG=development flask run
docker-neo4j:
docker start cre-neo4j 2>/dev/null || docker run -d --name cre-neo4j --env NEO4J_PLUGINS='["apoc"]' --env NEO4J_AUTH=neo4j/password --volume=`pwd`/.neo4j/data:/data --volume=`pwd`/.neo4j/logs:/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 neo4j

docker-redis:
docker start redis-stack 2>/dev/null || docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest

start-containers: docker-neo4j docker-redis

start-worker:
. ./venv/bin/activate
FLASK_APP=`pwd`/cre.py python cre.py --start_worker

dev-flask:
. ./venv/bin/activate
FLASK_APP=`pwd`/cre.py FLASK_CONFIG=development flask run

e2e:
yarn build
[ -d "./venv" ] && . ./venv/bin/activate
export FLASK_APP=$(CURDIR)/cre.py
export FLASK_CONFIG=development
FLASK_CONFIG=development flask run&
flask run&

yarn test:e2e
killall yarn
killall flask

test:
[ -d "./venv" ] && . ./venv/bin/activate
export FLASK_APP=$(CURDIR)/cre.py
Expand Down Expand Up @@ -45,9 +60,6 @@ docker:
docker-run:
docker run -it -p 5000:5000 opencre:$(shell git rev-parse HEAD)

docker-neo4j:
docker run --env NEO4J_PLUGINS='["apoc"]' --volume=./neo4j/data:/data --volume=/data --volume=/logs --workdir=/var/lib/neo4j -p 7474:7474 -p 7687:7687 -d neo4j

lint:
[ -d "./venv" ] && . ./venv/bin/activate && black . && yarn lint

Expand Down Expand Up @@ -82,4 +94,8 @@ import-all:
[ -d "./venv" ] && . ./venv/bin/activate
rm -rf standards_cache.sqlite && make migrate-upgrade && export FLASK_APP=$(CURDIR)/cre.py && python cre.py --add --from_spreadsheet https://docs.google.com/spreadsheets/d/1eZOEYgts7d_-Dr-1oAbogPfzBLh6511b58pX3b59kvg && python cre.py --generate_embeddings && python cre.py --zap_in --cheatsheets_in --github_tools_in --capec_in --owasp_secure_headers_in --pci_dss_4_in --juiceshop_in && python cre.py --generate_embeddings

import-neo4j:
[ -d "./venv" ] && . ./venv/bin/activate
export FLASK_APP=$(CURDIR)/cre.py && python cre.py --populate_neo4j_db

all: clean lint test dev dev-run
3 changes: 2 additions & 1 deletion Procfile
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
web: gunicorn cre:app --log-file=-
web: gunicorn cre:app --log-file=-g
worker: FLASK_APP=`pwd`/cre.py python cre.py --start_worker
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,18 @@ To add a remote spreadsheet to your local database you can run
<pre>python cre.py --add --from_spreadsheet < google sheets url></pre>

To run the web application for development you can run
<pre>make dev-run</pre>
<pre>
$ make start-containers
$ make start-worker

# in a seperate shell
$ make dev-flask
</pre>

Alternatively, you can use the dockerfile with
<pre>make docker && make docker-run</pre>

Some features like Gap Analysis require a neo4j DB running you can start this with
Some features like Gap Analysis require a neo4j DB running, you can start this with
<pre>make docker-neo4j</pre>
enviroment varaibles for app to connect to neo4jDB (default):
- NEO4J_URL (neo4j//neo4j:password@localhost:7687)
Expand Down
13 changes: 4 additions & 9 deletions application/cmd/cre_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from application.utils.external_project_parsers import (
capec_parser,
cwe,
ccmv3,
ccmv4,
cheatsheets_parser,
misc_tools_parser,
Expand Down Expand Up @@ -375,14 +374,6 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover
if args.export:
cache = db_connect(args.cache_file)
cache.export(args.export)
if args.csa_ccm_v3_in:
ccmv3.parse_ccm(
ccmFile=sheet_utils.readSpreadsheet(
alias="",
url="https://docs.google.com/spreadsheets/d/1b5i8OV919aiqW2KcYWOQvkLorL1bRPqjthJxLH0QpD8",
),
cache=db_connect(args.cache_file),
)
if args.csa_ccm_v4_in:
ccmv4.parse_ccm(
ccmFile=sheet_utils.readSpreadsheet(
Expand Down Expand Up @@ -428,6 +419,10 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover
owasp_metadata_to_cre(args.owasp_proj_meta)
if args.populate_neo4j_db:
populate_neo4j_db(args.cache_file)
if args.start_worker:
from application.worker import start_worker

start_worker(args.cache_file)


def db_connect(path: str):
Expand Down
101 changes: 68 additions & 33 deletions application/database/db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from flask import json as flask_json
import json
import redis
from neomodel import (
config,
StructuredNode,
Expand All @@ -16,7 +19,6 @@
from collections import Counter
from itertools import permutations
from typing import Any, Dict, List, Optional, Tuple, cast

import networkx as nx
import yaml
from application.defs import cre_defs
Expand All @@ -26,6 +28,8 @@
import uuid

from application.utils.gap_analysis import get_path_score
from application.utils.hash import make_array_hash


from .. import sqla # type: ignore

Expand Down Expand Up @@ -294,8 +298,7 @@ def __init__(sel):
raise ValueError("NEO_DB is a singleton, please call instance() instead")

@classmethod
def populate_DB(self, session) -> nx.Graph:
graph = nx.DiGraph()
def populate_DB(self, session):
for il in session.query(InternalLinks).all():
group = session.query(CRE).filter(CRE.id == il.group).first()
if not group:
Expand All @@ -319,7 +322,6 @@ def populate_DB(self, session) -> nx.Graph:
self.add_cre(cre)

self.link_CRE_to_Node(lnk.cre, lnk.node, lnk.type)
return graph

@classmethod
def add_cre(self, dbcre: CRE):
Expand Down Expand Up @@ -423,31 +425,38 @@ def link_CRE_to_Node(self, CRE_id, node_id, link_type):
def gap_analysis(self, name_1, name_2):
base_standard = NeoStandard.nodes.filter(name=name_1)
denylist = ["Cross-cutting concerns"]
from pprint import pprint
from datetime import datetime

t1 = datetime.now()
path_records_all, _ = db.cypher_query(
"""
OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1})
OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2})
OPTIONAL MATCH p = shortestPath((BaseStandard)-[*..20]-(CompareStandard))
OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[*..20]-(CompareStandard))
WITH p
WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
RETURN p
""",
{"name1": name_1, "name2": name_2, "denylist": denylist},
resolve_objects=True,
)

t2 = datetime.now()
pprint(f"path records all took {t2-t1}")
pprint(path_records_all.__len__())
path_records, _ = db.cypher_query(
"""
OPTIONAL MATCH (BaseStandard:NeoStandard {name: $name1})
OPTIONAL MATCH (CompareStandard:NeoStandard {name: $name2})
OPTIONAL MATCH p = shortestPath((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard))
OPTIONAL MATCH p = allShortestPaths((BaseStandard)-[:(LINKED_TO|CONTAINS)*..20]-(CompareStandard))
WITH p
WHERE length(p) > 1 AND ALL(n in NODES(p) WHERE (n:NeoCRE or n = BaseStandard or n = CompareStandard) AND NOT n.name in $denylist)
RETURN p
""",
{"name1": name_1, "name2": name_2, "denylist": denylist},
resolve_objects=True,
)
t3 = datetime.now()

def format_segment(seg: StructuredRel, nodes):
relation_map = {
Expand Down Expand Up @@ -476,16 +485,24 @@ def format_path_record(rec):
"path": [format_segment(seg, rec.nodes) for seg in rec.relationships],
}

pprint(
f"path records all took {t2-t1} path records took {t3 - t2}, total: {t3 - t1}"
)
return [NEO_DB.parse_node(rec) for rec in base_standard], [
format_path_record(rec[0]) for rec in (path_records + path_records_all)
]

@classmethod
def standards(self) -> List[str]:
tools = NeoTool.nodes.all()
standards = NeoStandard.nodes.all()

return list(set([x.name for x in tools] + [x.name for x in standards]))
tools = []
for x in db.cypher_query("""MATCH (n:NeoTool) RETURN DISTINCT n.name""")[0]:
tools.extend(x)
standards = []
for x in db.cypher_query("""MATCH (n:NeoStandard) RETURN DISTINCT n.name""")[
0
]: # 0 is the results, 1 is the "n.name" param
standards.extend(x)
return list(set([x for x in tools] + [x for x in standards]))

@staticmethod
def parse_node(node: NeoDocument) -> cre_defs.Document:
Expand Down Expand Up @@ -1399,28 +1416,6 @@ def find_path_between_nodes(

return res

def gap_analysis(self, node_names: List[str]):
base_standard, paths = self.neo_db.gap_analysis(node_names[0], node_names[1])
if base_standard is None:
return None
grouped_paths = {}
for node in base_standard:
key = node.id
if key not in grouped_paths:
grouped_paths[key] = {"start": node, "paths": {}}

for path in paths:
key = path["start"].id
end_key = path["end"].id
path["score"] = get_path_score(path)
del path["start"]
if end_key in grouped_paths[key]["paths"]:
if grouped_paths[key]["paths"][end_key]["score"] > path["score"]:
grouped_paths[key]["paths"][end_key] = path
else:
grouped_paths[key]["paths"][end_key] = path
return grouped_paths

def standards(self) -> List[str]:
return self.neo_db.standards()

Expand Down Expand Up @@ -1767,3 +1762,43 @@ def dbCREfromCRE(cre: cre_defs.CRE) -> CRE:
external_id=cre.id,
tags=",".join(tags),
)


def gap_analysis(
neo_db: NEO_DB,
node_names: List[str],
store_in_cache: bool = False,
cache_key: str = "",
):
base_standard, paths = neo_db.gap_analysis(node_names[0], node_names[1])
if base_standard is None:
return None
grouped_paths = {}
for node in base_standard:
key = node.id
if key not in grouped_paths:
grouped_paths[key] = {"start": node, "paths": {}}

for path in paths:
key = path["start"].id
end_key = path["end"].id
path["score"] = get_path_score(path)
del path["start"]
if end_key in grouped_paths[key]["paths"]:
if grouped_paths[key]["paths"][end_key]["score"] > path["score"]:
grouped_paths[key]["paths"][end_key] = path
else:
grouped_paths[key]["paths"][end_key] = path

if (
store_in_cache
): # lightweight memory option to not return potentially huge object and instead store in a cache,
# in case this is called via worker, we save both this and the caller memory by avoiding duplicate object in mem
conn = redis.from_url(os.getenv("REDIS_URL", "redis://localhost:6379"))
if cache_key == "":
cache_key = make_array_hash(node_names)

conn.set(cache_key, flask_json.dumps({"result": grouped_paths}))
return (node_names, {})

return (node_names, grouped_paths)
57 changes: 54 additions & 3 deletions application/frontend/src/pages/GapAnalysis/GapAnalysis.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import axios from 'axios';
import React, { useEffect, useState } from 'react';
import React, { useEffect, useRef, useState } from 'react';
import { useLocation } from 'react-router-dom';
import {
Accordion,
Expand Down Expand Up @@ -125,12 +125,14 @@ export const GapAnalysis = () => {
const [CompareStandard, setCompareStandard] = useState<string | undefined>(
searchParams.get('compare') ?? ''
);
const [gaJob, setgaJob] = useState<string>('');
const [gapAnalysis, setGapAnalysis] = useState<Record<string, GapAnalysisPathStart>>();
const [activeIndex, SetActiveIndex] = useState<string>();
const [loadingStandards, setLoadingStandards] = useState<boolean>(false);
const [loadingGA, setLoadingGA] = useState<boolean>(false);
const [error, setError] = useState<string | null | object>(null);
const { apiUrl } = useEnvironment();
const timerIdRef = useRef<NodeJS.Timer>();

const GetStrongPathsCount = (paths) =>
Math.max(
Expand All @@ -156,13 +158,62 @@ export const GapAnalysis = () => {
});
}, [setStandardOptions, setLoadingStandards, setError]);

useEffect(() => {
console.log('gajob changed, polling');
const pollingCallback = () => {
const fetchData = async () => {
const result = await axios.get(`${apiUrl}/ma_job_results?id=` + gaJob, {
headers: {
'Cache-Control': 'no-cache',
Pragma: 'no-cache',
Expires: '0',
},
});
if (result.data.result) {
setLoadingGA(false);
setGapAnalysis(result.data.result);
setgaJob('');
}
};
if (!gaJob) return;
fetchData().catch((e) => {
setLoadingGA(false);
setError(e.response.data.message ?? e.message);
});
};

const startPolling = () => {
// Polling every 10 seconds
timerIdRef.current = setInterval(pollingCallback, 10000);
};
const stopPolling = () => {
clearInterval(timerIdRef.current);
};

if (gaJob) {
console.log('started polling');
startPolling();
} else {
console.log('stoped polling');
stopPolling();
}

return () => {
stopPolling();
};
}, [gaJob]);

useEffect(() => {
const fetchData = async () => {
const result = await axios.get(
`${apiUrl}/map_analysis?standard=${BaseStandard}&standard=${CompareStandard}`
);
setLoadingGA(false);
setGapAnalysis(result.data);
if (result.data.result) {
setLoadingGA(false);
setGapAnalysis(result.data.result);
} else if (result.data.job_id) {
setgaJob(result.data.job_id);
}
};

if (!BaseStandard || !CompareStandard || BaseStandard === CompareStandard) return;
Expand Down
Loading

0 comments on commit f508bca

Please sign in to comment.